/* Autogenerated file, DO NOT EDIT manually! generated by brw_oa.py
 *
 * Copyright (c) 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <stdbool.h>

#include "util/hash_table.h"

#include "brw_oa_bdw.h"
#include "brw_context.h"
#include "brw_performance_query.h"


#define MIN(a, b) ((a < b) ? (a) : (b))
#define MAX(a, b) ((a > b) ? (a) : (b))


/* Render Metrics Basic Gen8 :: GPU Core Clocks */
static uint64_t
bdw__render_basic__gpu_core_clocks__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: EU Active */
static float
bdw__render_basic__eu_active__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: L3 Misses */
static uint64_t
bdw__render_basic__l3_misses__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: GTI L3 Throughput */
static uint64_t
bdw__render_basic__gti_l3_throughput__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: $L3Misses 64 UMUL */
   uint64_t tmp0 = bdw__render_basic__l3_misses__read(brw, query, accumulator) * 64;

   return tmp0;
}

/* Render Metrics Basic Gen8 :: EU Both FPU Pipes Active */
static float
bdw__render_basic__eu_fpu_both_active__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: Sampler Cache Misses */
static uint64_t
bdw__render_basic__sampler_l1_misses__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: B 4 READ B 5 READ UADD 8 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 8;

   return tmp3;
}

/* Render Metrics Basic Gen8 :: VS Send Pipe Active */
static float
bdw__render_basic__vs_send_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: Sampler 1 Bottleneck */
static float
bdw__render_basic__sampler1_bottleneck__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: VS FPU1 Pipe Active */
static float
bdw__render_basic__vs_fpu1_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__render_basic__gs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: L3 Sampler Throughput */
static uint64_t
bdw__render_basic__l3_sampler_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: $SamplerL1Misses 64 UMUL */
   uint64_t tmp0 = bdw__render_basic__sampler_l1_misses__read(brw, query, accumulator) * 64;

   return tmp0;
}

/* Render Metrics Basic Gen8 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__render_basic__hi_depth_test_fails__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: FS Both FPU Active */
static float
bdw__render_basic__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__render_basic__vs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__render_basic__ps_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: Sampler 0 Busy */
static float
bdw__render_basic__sampler0_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: Sampler 1 Busy */
static float
bdw__render_basic__sampler1_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: Samplers Busy */
static float
bdw__render_basic__samplers_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: $Sampler0Busy $Sampler1Busy FMAX */
   double tmp0 = bdw__render_basic__sampler0_busy__read(brw, query, accumulator);
   double tmp1 = bdw__render_basic__sampler1_busy__read(brw, query, accumulator);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Render Metrics Basic Gen8 :: GTI Fixed Pipe Throughput */
static uint64_t
bdw__render_basic__gti_vf_throughput__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: B 6 READ B 7 READ UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__render_basic__shader_barriers__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: Sampler 0 Bottleneck */
static float
bdw__render_basic__sampler0_bottleneck__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: Sampler Texels */
static uint64_t
bdw__render_basic__sampler_texels__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: Pixels Failing Tests */
static uint64_t
bdw__render_basic__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__render_basic__gpu_time__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__render_basic__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__render_basic__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Render Metrics Basic Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__render_basic__sampler_texel_misses__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__render_basic__cs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__render_basic__shader_memory_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: L3 Lookup Accesses w/o IC */
static uint64_t
bdw__render_basic__l3_lookups__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: $SamplerL1Misses $ShaderMemoryAccesses UADD */
   uint64_t tmp0 = bdw__render_basic__sampler_l1_misses__read(brw, query, accumulator) + bdw__render_basic__shader_memory_accesses__read(brw, query, accumulator);

   return tmp0;
}

/* Render Metrics Basic Gen8 :: SLM Bytes Read */
static uint64_t
bdw__render_basic__slm_bytes_read__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: GTI Read Throughput */
static uint64_t
bdw__render_basic__gti_read_throughput__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: PS FPU1 Pipe Active */
static float
bdw__render_basic__ps_fpu1_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: PS Send Pipeline Active */
static float
bdw__render_basic__ps_send_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: Rasterized Pixels */
static uint64_t
bdw__render_basic__rasterized_pixels__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: GPU Busy */
static float
bdw__render_basic__gpu_busy__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: GTI Depth Throughput */
static uint64_t
bdw__render_basic__gti_depth_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 0 READ C 1 READ UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = accumulator[query->c_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic Gen8 :: VS FPU0 Pipe Active */
static float
bdw__render_basic__vs_fpu0_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: PS FPU0 Pipe Active */
static float
bdw__render_basic__ps_fpu0_active__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__render_basic__ds_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: Samples Written */
static uint64_t
bdw__render_basic__samples_written__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: EU Stall */
static float
bdw__render_basic__eu_stall__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen8 :: Samples Blended */
static uint64_t
bdw__render_basic__samples_blended__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: Early Depth Test Fails */
static uint64_t
bdw__render_basic__early_depth_test_fails__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: Samplers Bottleneck */
static float
bdw__render_basic__sampler_bottleneck__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: $Sampler0Bottleneck $Sampler1Bottleneck FMAX */
   double tmp0 = bdw__render_basic__sampler0_bottleneck__read(brw, query, accumulator);
   double tmp1 = bdw__render_basic__sampler1_bottleneck__read(brw, query, accumulator);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Render Metrics Basic Gen8 :: GTI HDC TLB Lookup Throughput */
static uint64_t
bdw__render_basic__gti_hdc_lookups_throughput__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: C 5 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: GTI RCC Throughput */
static uint64_t
bdw__render_basic__gti_rcc_throughput__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 2 READ C 3 READ UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 2];
   uint64_t tmp1 = accumulator[query->c_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__render_basic__hs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Render Metrics Basic Gen8 :: GTI Write Throughput */
static uint64_t
bdw__render_basic__gti_write_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: SLM Bytes Written */
static uint64_t
bdw__render_basic__slm_bytes_written__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__render_basic__l3_shader_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__render_basic__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Render Metrics Basic Gen8 :: Samples Killed in FS */
static uint64_t
bdw__render_basic__samples_killed_in_ps__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__render_basic__shader_atomics__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_render_basic_mux_regs[219];
static struct brw_perf_query_register_prog bdw_render_basic_b_counter_regs[5];
static struct brw_perf_query_register_prog bdw_render_basic_flex_regs[7];

static struct brw_perf_query_counter bdw_render_basic_query_counters[52];
static struct brw_perf_query_info bdw_render_basic_query = {
   .kind = OA_COUNTERS,
   .name = "Render Metrics Basic Gen8",
   .guid = "b541bd57-0e0f-4154-b4c0-5858010a2bf7",
   .counters = bdw_render_basic_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_render_basic_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_render_basic_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_render_basic_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_render_basic_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_render_basic_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143F000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14110014 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14310014 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BF000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118A0317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13837BE0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3B800060 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183D0800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3F0023 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00584000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08584000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5A4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5B8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B2400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1D4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1F0800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAA00 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18380001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00392000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A391000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00104000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08104000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00110030 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08110031 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10110000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00134000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16130020 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06308000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08308000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06311800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08311880 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10310000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E334000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16330080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABF1180 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADA8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A9D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB94000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A0380 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A000E };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A00A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2820 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B2550 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C1000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D831021 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F83572F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01835680 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0383002C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11830000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05830000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05844000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C137 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C147 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15804000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801110 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800331 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800802 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45801465 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53801111 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x478014A5 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800CA5 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800003 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143F000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BF000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14910014 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14B10014 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118A0317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13837BE0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3B800060 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3F0023 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5A4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1D4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A391000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DC4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DC8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00BD8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18BD0800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABF1180 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00D84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08D84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADA8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDB8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18DB2400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A9D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9F0800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F2A00 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B80001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B92000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B98000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB94000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00904000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08904000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00910030 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08910031 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10910000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00934000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16930020 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B08000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B08000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B11800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B11880 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10B10000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB34000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B30080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88B800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A0380 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A000E };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A0080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2840 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B26A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C1100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018D2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D831021 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F83572F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01835680 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0383002C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11830000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05830000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05844000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C137 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C147 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15804000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801550 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800331 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800802 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x458004A1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53805555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F801421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800845 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 32;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 52;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 88;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 112;
         counter->size = sizeof(float);
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 116;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 120;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 144;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__render_basic__avg_gpu_core_frequency__max(brw);
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 232;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 236;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 248;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 264;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 268;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 288;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 304;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 312;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 352;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 360;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 376;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Compute Metrics Basic Gen8 :: GPU Core Clocks */
static uint64_t
bdw__compute_basic__gpu_core_clocks__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: EU Active */
static float
bdw__compute_basic__eu_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen8 :: Untyped Bytes Read */
static uint64_t
bdw__compute_basic__untyped_bytes_read__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = accumulator[query->b_offset + 7];
   uint64_t tmp2 = accumulator[query->c_offset + 0];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic Gen8 :: EU Both FPU Pipes Active */
static float
bdw__compute_basic__eu_fpu_both_active__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen8 :: Typed Bytes Written */
static uint64_t
bdw__compute_basic__typed_bytes_written__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = accumulator[query->b_offset + 4];
   uint64_t tmp2 = accumulator[query->b_offset + 5];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic Gen8 :: EU FPU0 Pipe Active */
static float
bdw__compute_basic__fpu0_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen8 :: EU FPU1 Pipe Active */
static float
bdw__compute_basic__fpu1_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen8 :: EU AVG IPC Rate */
static float
bdw__compute_basic__eu_avg_ipc_rate__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = accumulator[query->a_offset + 10];
   uint64_t tmp2 = accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Compute Metrics Basic Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__compute_basic__gs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__compute_basic__hi_depth_test_fails__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__compute_basic__vs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__compute_basic__ps_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__compute_basic__shader_barriers__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: Sampler Texels */
static uint64_t
bdw__compute_basic__sampler_texels__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Pixels Failing Tests */
static uint64_t
bdw__compute_basic__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__compute_basic__gpu_time__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Basic Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_basic__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__compute_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_basic__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Compute Metrics Basic Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__compute_basic__sampler_texel_misses__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__compute_basic__cs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: SLM Bytes Read */
static uint64_t
bdw__compute_basic__slm_bytes_read__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: GTI Read Throughput */
static uint64_t
bdw__compute_basic__gti_read_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 4 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Untyped Writes */
static uint64_t
bdw__compute_basic__untyped_bytes_written__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = accumulator[query->c_offset + 2];
   uint64_t tmp2 = accumulator[query->c_offset + 3];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic Gen8 :: GPU Busy */
static float
bdw__compute_basic__gpu_busy__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Basic Gen8 :: Rasterized Pixels */
static uint64_t
bdw__compute_basic__rasterized_pixels__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Typed Bytes Read */
static uint64_t
bdw__compute_basic__typed_bytes_read__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = accumulator[query->b_offset + 1];
   uint64_t tmp2 = accumulator[query->b_offset + 2];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__compute_basic__ds_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: EU Thread Occupancy */
static float
bdw__compute_basic__eu_thread_occupancy__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0 * 8;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = brw->perfquery.sys_vars.eu_threads_count;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Compute Metrics Basic Gen8 :: EU Stall */
static float
bdw__compute_basic__eu_stall__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen8 :: Samples Blended */
static uint64_t
bdw__compute_basic__samples_blended__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Early Depth Test Fails */
static uint64_t
bdw__compute_basic__early_depth_test_fails__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__compute_basic__shader_memory_accesses__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__compute_basic__hs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: GTI Write Throughput */
static uint64_t
bdw__compute_basic__gti_write_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: C 5 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: SLM Bytes Written */
static uint64_t
bdw__compute_basic__slm_bytes_written__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__compute_basic__l3_shader_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__compute_basic__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Compute Metrics Basic Gen8 :: Samples Killed in FS */
static uint64_t
bdw__compute_basic__samples_killed_in_ps__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Samples Written */
static uint64_t
bdw__compute_basic__samples_written__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Basic Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__compute_basic__shader_atomics__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

/* Compute Metrics Basic Gen8 :: EU Send Pipe Active */
static float
bdw__compute_basic__eu_send_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog bdw_compute_basic_mux_regs[182];
static struct brw_perf_query_register_prog bdw_compute_basic_b_counter_regs[5];
static struct brw_perf_query_register_prog bdw_compute_basic_flex_regs[7];

static struct brw_perf_query_counter bdw_compute_basic_query_counters[39];
static struct brw_perf_query_info bdw_compute_basic_query = {
   .kind = OA_COUNTERS,
   .name = "Compute Metrics Basic Gen8",
   .guid = "35fbc9b2-a891-40a6-a38d-022bb7057552",
   .counters = bdw_compute_basic_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_compute_basic_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_compute_basic_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_compute_basic_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_compute_basic_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_compute_basic_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105C00E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3580001A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3B800060 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065C2100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5C0041 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5C6600 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005C6580 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00580042 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08582080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C58004C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E582580 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B1000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A5B0104 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAA00 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08380042 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A382080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E38404C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0238404B };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16380000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18381145 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04380000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02392000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B02A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B5550 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F850A80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03844000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808137 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C147 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C0E5 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C0E3 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13804000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800111 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F801062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41801084 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10DC00E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10D800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10B800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3580001A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3B800060 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DC2100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADC0041 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDC6600 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DC6580 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DC8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDC8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00D80042 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08D82080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CD8004C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ED82580 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18DB1000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ADB0104 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9FAA00 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B80042 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB82080 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB8404C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B8404B };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B80000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B81145 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B80000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B98000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B92000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88F800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B0540 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5500 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038D2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F850A80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03844000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808137 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C147 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C0E5 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C0E3 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13804000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D805000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00000003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00002001 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00778008 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00088078 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00808708 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00A08908 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of typed memory bytes written via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 44;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 2;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__compute_basic__avg_gpu_core_frequency__max(brw);
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 204;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 288;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Render Metrics for 3D Pipeline Profile :: GPU Core Clocks */
static uint64_t
bdw__render_pipe_profile__gpu_core_clocks__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: EU Active */
static float
bdw__render_pipe_profile__eu_active__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics for 3D Pipeline Profile :: VS Bottleneck */
static float
bdw__render_pipe_profile__vs_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Hi-Depth Bottleneck */
static float
bdw__render_pipe_profile__hi_depth_bottleneck__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: GS Bottleneck */
static float
bdw__render_pipe_profile__gs_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: GS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__gs_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
static uint64_t
bdw__render_pipe_profile__hi_depth_test_fails__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: VS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__vs_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: FS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__ps_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: BC Bottleneck */
static float
bdw__render_pipe_profile__bc_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: HS Stall */
static float
bdw__render_pipe_profile__hs_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Shader Barrier Messages */
static uint64_t
bdw__render_pipe_profile__shader_barriers__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels */
static uint64_t
bdw__render_pipe_profile__sampler_texels__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: Pixels Failing Tests */
static uint64_t
bdw__render_pipe_profile__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: GPU Time Elapsed */
static uint64_t
bdw__render_pipe_profile__gpu_time__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
static uint64_t
bdw__render_pipe_profile__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__render_pipe_profile__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
static uint64_t
bdw__render_pipe_profile__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels Misses */
static uint64_t
bdw__render_pipe_profile__sampler_texel_misses__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: CS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__cs_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: VF Bottleneck */
static float
bdw__render_pipe_profile__vf_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Read */
static uint64_t
bdw__render_pipe_profile__slm_bytes_read__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: Strip-Fans Bottleneck */
static float
bdw__render_pipe_profile__sf_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Sampler Accesses */
static uint64_t
bdw__render_pipe_profile__sampler_accesses__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 28 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 28];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: SF Stall */
static float
bdw__render_pipe_profile__sf_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: GPU Busy */
static float
bdw__render_pipe_profile__gpu_busy__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: HS Bottleneck */
static float
bdw__render_pipe_profile__hs_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: CL Stall */
static float
bdw__render_pipe_profile__cl_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: SO Bottleneck */
static float
bdw__render_pipe_profile__so_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Rasterized Pixels */
static uint64_t
bdw__render_pipe_profile__rasterized_pixels__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: DS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__ds_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: Samples Written */
static uint64_t
bdw__render_pipe_profile__samples_written__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: DS Bottleneck */
static float
bdw__render_pipe_profile__ds_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: EU Stall */
static float
bdw__render_pipe_profile__eu_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics for 3D Pipeline Profile :: Clipper Bottleneck */
static float
bdw__render_pipe_profile__cl_bottleneck__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: DS Stall */
static float
bdw__render_pipe_profile__ds_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Early Depth Bottleneck */
static float
bdw__render_pipe_profile__early_depth_bottleneck__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: B 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Samples Blended */
static uint64_t
bdw__render_pipe_profile__samples_blended__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: Early Depth Test Fails */
static uint64_t
bdw__render_pipe_profile__early_depth_test_fails__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: Shader Memory Accesses */
static uint64_t
bdw__render_pipe_profile__shader_memory_accesses__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: HS Threads Dispatched */
static uint64_t
bdw__render_pipe_profile__hs_threads__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Written */
static uint64_t
bdw__render_pipe_profile__slm_bytes_written__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: L3 Shader Throughput */
static uint64_t
bdw__render_pipe_profile__l3_shader_throughput__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__render_pipe_profile__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Samples Killed in FS */
static uint64_t
bdw__render_pipe_profile__samples_killed_in_ps__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics for 3D Pipeline Profile :: SO Stall */
static float
bdw__render_pipe_profile__so_stall__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
static uint64_t
bdw__render_pipe_profile__shader_atomics__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_render_pipe_profile_mux_regs[111];
static struct brw_perf_query_register_prog bdw_render_pipe_profile_b_counter_regs[21];
static struct brw_perf_query_register_prog bdw_render_pipe_profile_flex_regs[7];

static struct brw_perf_query_counter bdw_render_pipe_profile_query_counters[44];
static struct brw_perf_query_info bdw_render_pipe_profile_query = {
   .kind = OA_COUNTERS,
   .name = "Render Metrics for 3D Pipeline Profile",
   .guid = "233d0544-fff7-4281-8291-e02f222aff72",
   .counters = bdw_render_pipe_profile_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_render_pipe_profile_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_render_pipe_profile_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_render_pipe_profile_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_render_pipe_profile_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_render_pipe_profile_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1F000F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10176800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1191001F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B880320 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01890C40 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118A1C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118D7C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118E0020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118F4C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11900000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13900001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06584000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5B4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x081E0040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x021F5400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x001F0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F0010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F0080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C384000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06392000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C13C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06164000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06170012 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00170000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01910005 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07880002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01880C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F880000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D880000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05880000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09890032 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A0800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A0A00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B54C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAA55 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0019 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C0100 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8D0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018D1000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DF000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8D3000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038DE000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058D3000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8E0004 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058E000C };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8F0020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198F0C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078F8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098F4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B900980 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03900D80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01900000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0784C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801111 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F801011 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800443 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51801111 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800422 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53801111 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800C60 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800422 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800021 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FFEA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x00007FFC };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007AFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000F5FD };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00079FFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000F3FB };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007BF7A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000F7E7 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007FEFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000F7CF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00077FFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000EFDF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0006FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000CFBF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0003FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x00005F7F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00025024 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00035034 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00045044 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00055054 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00065064 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 60;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__render_pipe_profile__avg_gpu_core_frequency__max(brw);
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 156;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 164;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 204;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 212;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Memory Reads Distribution Gen8 :: GPU Core Clocks */
static uint64_t
bdw__memory_reads__gpu_core_clocks__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: EU Active */
static float
bdw__memory_reads__eu_active__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen8 :: GtiL3Bank0Reads */
static uint64_t
bdw__memory_reads__gti_l3_bank0_reads__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__memory_reads__gs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiRingAccesses */
static uint64_t
bdw__memory_reads__gti_ring_accesses__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 3 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__memory_reads__hi_depth_test_fails__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__memory_reads__vs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__memory_reads__ps_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiL3Bank3Reads */
static uint64_t
bdw__memory_reads__gti_l3_bank3_reads__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 7 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 7];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__memory_reads__shader_barriers__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiRsMemoryReads */
static uint64_t
bdw__memory_reads__gti_rs_memory_reads__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: Sampler Texels */
static uint64_t
bdw__memory_reads__sampler_texels__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: Pixels Failing Tests */
static uint64_t
bdw__memory_reads__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiHizMemoryReads */
static uint64_t
bdw__memory_reads__gti_hiz_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__memory_reads__gpu_time__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__memory_reads__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__memory_reads__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__memory_reads__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__memory_reads__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Memory Reads Distribution Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__memory_reads__sampler_texel_misses__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiRccMemoryReads */
static uint64_t
bdw__memory_reads__gti_rcc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__memory_reads__cs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: SLM Bytes Read */
static uint64_t
bdw__memory_reads__slm_bytes_read__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiL3Bank1Reads */
static uint64_t
bdw__memory_reads__gti_l3_bank1_reads__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GPU Busy */
static float
bdw__memory_reads__gpu_busy__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution Gen8 :: GtiCmdStreamerMemoryReads */
static uint64_t
bdw__memory_reads__gti_cmd_streamer_memory_reads__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiL3Bank2Reads */
static uint64_t
bdw__memory_reads__gti_l3_bank2_reads__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 6 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiMemoryReads */
static uint64_t
bdw__memory_reads__gti_memory_reads__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 0 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 0];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: Rasterized Pixels */
static uint64_t
bdw__memory_reads__rasterized_pixels__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiRczMemoryReads */
static uint64_t
bdw__memory_reads__gti_rcz_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__memory_reads__ds_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: Samples Written */
static uint64_t
bdw__memory_reads__samples_written__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: EU Stall */
static float
bdw__memory_reads__eu_stall__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen8 :: Samples Blended */
static uint64_t
bdw__memory_reads__samples_blended__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: Early Depth Test Fails */
static uint64_t
bdw__memory_reads__early_depth_test_fails__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiMscMemoryReads */
static uint64_t
bdw__memory_reads__gti_msc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiVfMemoryReads */
static uint64_t
bdw__memory_reads__gti_vf_memory_reads__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 1 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 1];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: GtiStcMemoryReads */
static uint64_t
bdw__memory_reads__gti_stc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__memory_reads__shader_memory_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__memory_reads__hs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Memory Reads Distribution Gen8 :: SLM Bytes Written */
static uint64_t
bdw__memory_reads__slm_bytes_written__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__memory_reads__l3_shader_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__memory_reads__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Memory Reads Distribution Gen8 :: Samples Killed in FS */
static uint64_t
bdw__memory_reads__samples_killed_in_ps__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Reads Distribution Gen8 :: GtiL3Reads */
static uint64_t
bdw__memory_reads__gti_l3_reads__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: $GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD */
   uint64_t tmp0 = bdw__memory_reads__gti_l3_bank2_reads__read(brw, query, accumulator) + bdw__memory_reads__gti_l3_bank3_reads__read(brw, query, accumulator);
   uint64_t tmp1 = bdw__memory_reads__gti_l3_bank1_reads__read(brw, query, accumulator) + tmp0;
   uint64_t tmp2 = bdw__memory_reads__gti_l3_bank0_reads__read(brw, query, accumulator) + tmp1;

   return tmp2;
}

/* Memory Reads Distribution Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__memory_reads__shader_atomics__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_memory_reads_mux_regs[50];
static struct brw_perf_query_register_prog bdw_memory_reads_b_counter_regs[28];
static struct brw_perf_query_register_prog bdw_memory_reads_flex_regs[7];

static struct brw_perf_query_counter bdw_memory_reads_query_counters[42];
static struct brw_perf_query_info bdw_memory_reads_query = {
   .kind = OA_COUNTERS,
   .name = "Memory Reads Distribution Gen8",
   .guid = "2b255d48-2117-4fef-a8f7-f151e1d25a2c",
   .counters = bdw_memory_reads_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_memory_reads_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_memory_reads_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_memory_reads_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_memory_reads_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_memory_reads_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198B0343 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13845800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15840018 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3580001A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038B6300 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058B6B62 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078B006A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85A080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385000A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01840018 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844C80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09840D9A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B840E9C };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D840F9E };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F840010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11840000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2F8000E5 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x138080E3 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C0E1 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47801042 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00006667 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000275C, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002758, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002754, .val = 0x00006465 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002750, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007F81A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007F82A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007F872 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007F8BA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007F87A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007F8EA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007F8E2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007F8F2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FE00 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00025024 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00035034 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00045044 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00055054 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00065064 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__memory_reads__avg_gpu_core_frequency__max(brw);
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 232;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 304;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Memory Writes Distribution Gen8 :: GPU Core Clocks */
static uint64_t
bdw__memory_writes__gpu_core_clocks__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: EU Active */
static float
bdw__memory_writes__eu_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen8 :: GtiMemoryWrites */
static uint64_t
bdw__memory_writes__gti_memory_writes__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 0 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 0];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__memory_writes__gs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiRingAccesses */
static uint64_t
bdw__memory_writes__gti_ring_accesses__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 3 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__memory_writes__hi_depth_test_fails__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__memory_writes__vs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__memory_writes__ps_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiMscMemoryWrites */
static uint64_t
bdw__memory_writes__gti_msc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__memory_writes__shader_barriers__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiCmdStreamerMemoryWrites */
static uint64_t
bdw__memory_writes__gti_cmd_streamer_memory_writes__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: Sampler Texels */
static uint64_t
bdw__memory_writes__sampler_texels__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: Pixels Failing Tests */
static uint64_t
bdw__memory_writes__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: GtiL3Bank0Writes */
static uint64_t
bdw__memory_writes__gti_l3_bank0_writes__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiL3Bank1Writes */
static uint64_t
bdw__memory_writes__gti_l3_bank1_writes__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiL3Bank2Writes */
static uint64_t
bdw__memory_writes__gti_l3_bank2_writes__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 6 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 6];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiL3Bank3Writes */
static uint64_t
bdw__memory_writes__gti_l3_bank3_writes__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 7 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 7];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiL3Writes */
static uint64_t
bdw__memory_writes__gti_l3_writes__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $GtiL3Bank0Writes $GtiL3Bank1Writes $GtiL3Bank2Writes $GtiL3Bank3Writes UADD UADD UADD */
   uint64_t tmp0 = bdw__memory_writes__gti_l3_bank2_writes__read(brw, query, accumulator) + bdw__memory_writes__gti_l3_bank3_writes__read(brw, query, accumulator);
   uint64_t tmp1 = bdw__memory_writes__gti_l3_bank1_writes__read(brw, query, accumulator) + tmp0;
   uint64_t tmp2 = bdw__memory_writes__gti_l3_bank0_writes__read(brw, query, accumulator) + tmp1;

   return tmp2;
}

/* Memory Writes Distribution Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__memory_writes__gpu_time__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Writes Distribution Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__memory_writes__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__memory_writes__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__memory_writes__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__memory_writes__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Memory Writes Distribution Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__memory_writes__sampler_texel_misses__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__memory_writes__cs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: SLM Bytes Read */
static uint64_t
bdw__memory_writes__slm_bytes_read__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: GtiRccMemoryWrites */
static uint64_t
bdw__memory_writes__gti_rcc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiSoMemoryWrites */
static uint64_t
bdw__memory_writes__gti_so_memory_writes__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GPU Busy */
static float
bdw__memory_writes__gpu_busy__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Writes Distribution Gen8 :: GtiStcMemoryWrites */
static uint64_t
bdw__memory_writes__gti_stc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: Rasterized Pixels */
static uint64_t
bdw__memory_writes__rasterized_pixels__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__memory_writes__ds_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: Samples Written */
static uint64_t
bdw__memory_writes__samples_written__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: EU Stall */
static float
bdw__memory_writes__eu_stall__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen8 :: Samples Blended */
static uint64_t
bdw__memory_writes__samples_blended__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: Early Depth Test Fails */
static uint64_t
bdw__memory_writes__early_depth_test_fails__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__memory_writes__shader_memory_accesses__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__memory_writes__hs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: GtiRczMemoryWrites */
static uint64_t
bdw__memory_writes__gti_rcz_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: SLM Bytes Written */
static uint64_t
bdw__memory_writes__slm_bytes_written__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__memory_writes__l3_shader_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__memory_writes__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Memory Writes Distribution Gen8 :: Samples Killed in FS */
static uint64_t
bdw__memory_writes__samples_killed_in_ps__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Memory Writes Distribution Gen8 :: GtiHizMemoryWrites */
static uint64_t
bdw__memory_writes__gti_hiz_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__memory_writes__shader_atomics__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_memory_writes_mux_regs[49];
static struct brw_perf_query_register_prog bdw_memory_writes_b_counter_regs[28];
static struct brw_perf_query_register_prog bdw_memory_writes_flex_regs[7];

static struct brw_perf_query_counter bdw_memory_writes_query_counters[41];
static struct brw_perf_query_info bdw_memory_writes_query = {
   .kind = OA_COUNTERS,
   .name = "Memory Writes Distribution Gen8",
   .guid = "f7fd3220-b466-4a4d-9f98-b0caf3f2394c",
   .counters = bdw_memory_writes_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_memory_writes_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_memory_writes_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_memory_writes_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_memory_writes_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_memory_writes_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198B0343 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x13845400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3580001A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800805 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038B6300 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058B6B62 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078B006A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85A080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x23850002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01840010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844880 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09840992 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B840A94 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D840B96 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11840000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2D800147 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2F8000E5 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x138080E3 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C0E1 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47801082 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00006667 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000275C, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002758, .val = 0x86543210 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002754, .val = 0x00006465 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002750, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007F81A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007F82A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007F822 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007F8BA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007F87A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007F8EA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007F8E2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FE00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007F8F2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FE00 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00025024 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00035034 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00045044 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00055054 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00065064 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__memory_writes__avg_gpu_core_frequency__max(brw);
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 240;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 304;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Compute Metrics Extended Gen8 :: GPU Core Clocks */
static uint64_t
bdw__compute_extended__gpu_core_clocks__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EU Active */
static float
bdw__compute_extended__eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Extended Gen8 :: EU Both FPU Pipes Active */
static float
bdw__compute_extended__eu_fpu_both_active__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Extended Gen8 :: EU FPU0 Pipe Active */
static float
bdw__compute_extended__fpu0_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Extended Gen8 :: EU FPU1 Pipe Active */
static float
bdw__compute_extended__fpu1_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Extended Gen8 :: EU AVG IPC Rate */
static float
bdw__compute_extended__eu_avg_ipc_rate__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = accumulator[query->a_offset + 10];
   uint64_t tmp2 = accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Compute Metrics Extended Gen8 :: Typed Writes 0 */
static uint64_t
bdw__compute_extended__typed_writes0__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 0 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EuTypedAtomics0 */
static uint64_t
bdw__compute_extended__eu_typed_atomics0__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: Typed Atomics 0 */
static uint64_t
bdw__compute_extended__typed_atomics0__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: TypedAtomicsPerCacheLine */
static float
bdw__compute_extended__typed_atomics_per_cache_line__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: $EuTypedAtomics0 $TypedAtomics0 FDIV */
   double tmp0 = bdw__compute_extended__eu_typed_atomics0__read(brw, query, accumulator);
   double tmp1 = bdw__compute_extended__typed_atomics0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen8 :: EuUntypedReads0 */
static uint64_t
bdw__compute_extended__eu_untyped_reads0__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: Untyped Writes 0 */
static uint64_t
bdw__compute_extended__untyped_writes0__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: C 1 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EuUntypedAtomics0 */
static uint64_t
bdw__compute_extended__eu_untyped_atomics0__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EuUntypedWrites0 */
static uint64_t
bdw__compute_extended__eu_untyped_writes0__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 1 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EuA64UntypedWrites0 */
static uint64_t
bdw__compute_extended__eu_a64_untyped_writes0__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: UntypedWritesPerCacheLine */
static float
bdw__compute_extended__untyped_writes_per_cache_line__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: $EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV */
   uint64_t tmp0 = bdw__compute_extended__eu_untyped_writes0__read(brw, query, accumulator) + bdw__compute_extended__eu_a64_untyped_writes0__read(brw, query, accumulator);
   double tmp1 = tmp0;
   double tmp2 = bdw__compute_extended__untyped_writes0__read(brw, query, accumulator);
   double tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Extended Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__compute_extended__shader_barriers__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: Sampler Texels */
static uint64_t
bdw__compute_extended__sampler_texels__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Extended Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__compute_extended__gpu_time__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Extended Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_extended__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__compute_extended__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Extended Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_extended__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Compute Metrics Extended Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__compute_extended__sampler_texel_misses__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics Extended Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__compute_extended__cs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: SLM Bytes Read */
static uint64_t
bdw__compute_extended__slm_bytes_read__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Extended Gen8 :: EuTypedWrites0 */
static uint64_t
bdw__compute_extended__eu_typed_writes0__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: TypedWritesPerCacheLine */
static float
bdw__compute_extended__typed_writes_per_cache_line__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: $EuTypedWrites0 $TypedWrites0 FDIV */
   double tmp0 = bdw__compute_extended__eu_typed_writes0__read(brw, query, accumulator);
   double tmp1 = bdw__compute_extended__typed_writes0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen8 :: Typed Reads 0 */
static uint64_t
bdw__compute_extended__typed_reads0__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: Untyped Reads 0 */
static uint64_t
bdw__compute_extended__untyped_reads0__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 3 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EuA64UntypedReads0 */
static uint64_t
bdw__compute_extended__eu_a64_untyped_reads0__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EU Thread Occupancy */
static float
bdw__compute_extended__eu_thread_occupancy__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0 * 8;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = brw->perfquery.sys_vars.eu_threads_count;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Compute Metrics Extended Gen8 :: EU Stall */
static float
bdw__compute_extended__eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Extended Gen8 :: EuTypedReads0 */
static uint64_t
bdw__compute_extended__eu_typed_reads0__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: UntypedReadsPerCacheLine */
static float
bdw__compute_extended__untyped_reads_per_cache_line__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: $EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV */
   uint64_t tmp0 = bdw__compute_extended__eu_untyped_reads0__read(brw, query, accumulator) + bdw__compute_extended__eu_a64_untyped_reads0__read(brw, query, accumulator);
   double tmp1 = tmp0;
   double tmp2 = bdw__compute_extended__untyped_reads0__read(brw, query, accumulator);
   double tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Extended Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__compute_extended__shader_memory_accesses__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: TypedReadsPerCacheLine */
static float
bdw__compute_extended__typed_reads_per_cache_line__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: $EuTypedReads0 $TypedReads0 FDIV */
   double tmp0 = bdw__compute_extended__eu_typed_reads0__read(brw, query, accumulator);
   double tmp1 = bdw__compute_extended__typed_reads0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen8 :: SLM Bytes Written */
static uint64_t
bdw__compute_extended__slm_bytes_written__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics Extended Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__compute_extended__l3_shader_throughput__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__compute_extended__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Compute Metrics Extended Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__compute_extended__shader_atomics__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

/* Compute Metrics Extended Gen8 :: EU Send Pipe Active */
static float
bdw__compute_extended__eu_send_active__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_extended__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog bdw_compute_extended_mux_regs[648];
static struct brw_perf_query_register_prog bdw_compute_extended_b_counter_regs[21];
static struct brw_perf_query_register_prog bdw_compute_extended_flex_regs[7];

static struct brw_perf_query_counter bdw_compute_extended_query_counters[38];
static struct brw_perf_query_info bdw_compute_extended_query = {
   .kind = OA_COUNTERS,
   .name = "Compute Metrics Extended Gen8",
   .guid = "e99ccaca-821c-4df9-97a7-96bdb7204e43",
   .counters = bdw_compute_extended_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_compute_extended_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_compute_extended_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_compute_extended_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_compute_extended_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_compute_extended_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (brw->perfquery.sys_vars.subslice_mask & 0x01) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143D0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x163D2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183D0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003D0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063D0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083D0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3D0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3D2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043D21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5825C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00586100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0258204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06588000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0858C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A58C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C58C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0458C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B5400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A5B0155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18381555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0239A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0439A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2AA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B5551 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x02) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105C00E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x145B0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x165B2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5C25C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005C6100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025C204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065B0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085B0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5B0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5B2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045B21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A5B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18381555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0239A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0439A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2AA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B5551 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x04) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143A0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x163A2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183A0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E38A5C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0038A100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0238204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16388000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183802AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04380000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06380000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08388000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A388000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0239A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0439A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003A0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063A0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083A0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3A0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3A2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043A21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A3A0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2AA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B5551 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x08) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BD0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16BD2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18BD0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10D800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DC8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00BD0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06BD0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08BD0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABD0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBD2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04BD21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BD0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EBD0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ABD0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ED825C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00D86100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02D8204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06D88000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08D8C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AD8C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CD8C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04D8C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18DB5400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ADB0155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADB4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B81555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B98000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88F800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B5540 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5500 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x10) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10DC00E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14DB0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16DB2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18DB0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDC25C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DC6100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DC204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DC8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04DCC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DB0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DB0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08DB0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADB0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDB2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04DB21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10DB0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDB0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ADB0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B81555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB84000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B98000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88F800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B5540 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5500 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x20) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10B800E0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BA0160 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16BA2800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18BA0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9FAA2A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB8A5C1 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B8A100 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B8204C };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16B88000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B802AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B80000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B80000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B88000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB88000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B98000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B9A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00BA0011 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06BA0900 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08BA0A13 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABA0B15 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBA2317 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04BA21B7 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BA0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EBA0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ABA0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88F800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B888000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B5540 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5500 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058DA000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FC2A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007FC6A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007FC92 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007FCA2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007FC32 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007FC9A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007FE6A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000BF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007FE7A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000BF00 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00000003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00002001 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00778008 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00088078 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00808708 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00A08908 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 2;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "Ratio of EU typed atomics requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "Ratio of EU untyped write requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__compute_extended__avg_gpu_core_frequency__max(brw);
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "Ratio of EU typed write requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 208;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 212;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "Ratio of EU untyped read requests to L3 cache line reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "Ratio of EU typed read requests to L3 cache line reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 272;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Compute Metrics L3 Cache Gen8 :: GPU Core Clocks */
static uint64_t
bdw__compute_l3_cache__gpu_core_clocks__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: EU Active */
static float
bdw__compute_l3_cache__eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 03 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank03_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_accesses__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 0 READ C 1 READ B 2 READ B 3 READ C 2 READ C 3 READ B 6 READ B 7 READ UADD UADD UADD UADD UADD UADD UADD 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = accumulator[query->c_offset + 1];
   uint64_t tmp2 = accumulator[query->b_offset + 2];
   uint64_t tmp3 = accumulator[query->b_offset + 3];
   uint64_t tmp4 = accumulator[query->c_offset + 2];
   uint64_t tmp5 = accumulator[query->c_offset + 3];
   uint64_t tmp6 = accumulator[query->b_offset + 6];
   uint64_t tmp7 = accumulator[query->b_offset + 7];
   uint64_t tmp8 = tmp6 + tmp7;
   uint64_t tmp9 = tmp5 + tmp8;
   uint64_t tmp10 = tmp4 + tmp9;
   uint64_t tmp11 = tmp3 + tmp10;
   uint64_t tmp12 = tmp2 + tmp11;
   uint64_t tmp13 = tmp1 + tmp12;
   uint64_t tmp14 = tmp0 + tmp13;
   uint64_t tmp15 = tmp14 * 2;

   return tmp15;
}

/* Compute Metrics L3 Cache Gen8 :: EU Both FPU Pipes Active */
static float
bdw__compute_l3_cache__eu_fpu_both_active__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Total Throughput */
static uint64_t
bdw__compute_l3_cache__l3_total_throughput__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: $L3Accesses 64 UMUL */
   uint64_t tmp0 = bdw__compute_l3_cache__l3_accesses__read(brw, query, accumulator) * 64;

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU0 Pipe Active */
static float
bdw__compute_l3_cache__fpu0_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU1 Pipe Active */
static float
bdw__compute_l3_cache__fpu1_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: EU AVG IPC Rate */
static float
bdw__compute_l3_cache__eu_avg_ipc_rate__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = accumulator[query->a_offset + 10];
   uint64_t tmp2 = accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU0 Binary Instruction */
static float
bdw__compute_l3_cache__eu_binary_fpu0_instruction__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__gs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__compute_l3_cache__hi_depth_test_fails__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__vs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__ps_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU0 Hybrid Instruction */
static float
bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Misses */
static uint64_t
bdw__compute_l3_cache__l3_misses__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: C 4 READ C 5 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Compute Metrics L3 Cache Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__compute_l3_cache__shader_barriers__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 00 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank00_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: C 0 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU0 Move Instruction */
static float
bdw__compute_l3_cache__eu_move_fpu0_instruction__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 19];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: Sampler Texels */
static uint64_t
bdw__compute_l3_cache__sampler_texels__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: Pixels Failing Tests */
static uint64_t
bdw__compute_l3_cache__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 10 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank10_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: C 2 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 2];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU1 Hybrid Instruction */
static float
bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 14];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__compute_l3_cache__gpu_time__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics L3 Cache Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_l3_cache__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__compute_l3_cache__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics L3 Cache Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__compute_l3_cache__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Compute Metrics L3 Cache Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__compute_l3_cache__sampler_texel_misses__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__cs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: SLM Bytes Read */
static uint64_t
bdw__compute_l3_cache__slm_bytes_read__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 10 IC Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank10_ic_accesses__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: B 4 READ B 5 READ UADD 2 UMUL $L3Bank10Accesses UMIN */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = MIN(tmp3, bdw__compute_l3_cache__l3_bank10_accesses__read(brw, query, accumulator));

   return tmp4;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 10 IC Hits */
static uint64_t
bdw__compute_l3_cache__l3_bank10_ic_hits__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 5 READ 2 UMUL $L3Bank10IcAccesses UMIN */
   uint64_t tmp0 = accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 2;
   uint64_t tmp2 = MIN(tmp1, bdw__compute_l3_cache__l3_bank10_ic_accesses__read(brw, query, accumulator));

   return tmp2;
}

/* Compute Metrics L3 Cache Gen8 :: GTI Read Throughput */
static uint64_t
bdw__compute_l3_cache__gti_read_throughput__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: GTI L3 Throughput */
static uint64_t
bdw__compute_l3_cache__gti_l3_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: C 4 READ C 5 READ UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 00 IC Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank00_ic_accesses__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = MIN(tmp3, bdw__compute_l3_cache__l3_bank00_accesses__read(brw, query, accumulator));

   return tmp4;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 00 IC Hits */
static uint64_t
bdw__compute_l3_cache__l3_bank00_ic_hits__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 2;
   uint64_t tmp2 = MIN(tmp1, bdw__compute_l3_cache__l3_bank00_ic_accesses__read(brw, query, accumulator));

   return tmp2;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 01 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank01_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: C 1 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: GPU Busy */
static float
bdw__compute_l3_cache__gpu_busy__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU0 Ternary Instruction */
static float
bdw__compute_l3_cache__eu_ternary_fpu0_instruction__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__compute_l3_cache__shader_atomics__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: Rasterized Pixels */
static uint64_t
bdw__compute_l3_cache__rasterized_pixels__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__ds_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: Samples Written */
static uint64_t
bdw__compute_l3_cache__samples_written__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU1 Move Instruction */
static float
bdw__compute_l3_cache__eu_move_fpu1_instruction__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 20];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: EU Stall */
static float
bdw__compute_l3_cache__eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: Samples Blended */
static uint64_t
bdw__compute_l3_cache__samples_blended__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: Early Depth Test Fails */
static uint64_t
bdw__compute_l3_cache__early_depth_test_fails__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 11 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank11_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: C 3 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 02 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank02_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 13 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank13_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 7 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__compute_l3_cache__shader_memory_accesses__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__compute_l3_cache__hs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Compute Metrics L3 Cache Gen8 :: GTI Write Throughput */
static uint64_t
bdw__compute_l3_cache__gti_write_throughput__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: SLM Bytes Written */
static uint64_t
bdw__compute_l3_cache__slm_bytes_written__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__compute_l3_cache__l3_shader_throughput__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__compute_l3_cache__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Compute Metrics L3 Cache Gen8 :: Samples Killed in FS */
static uint64_t
bdw__compute_l3_cache__samples_killed_in_ps__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: L3 Bank 12 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank12_accesses__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 6 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU1 Binary Instruction */
static float
bdw__compute_l3_cache__eu_binary_fpu1_instruction__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: EU FPU1 Ternary Instruction */
static float
bdw__compute_l3_cache__eu_ternary_fpu1_instruction__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache Gen8 :: EU Send Pipe Active */
static float
bdw__compute_l3_cache__eu_send_active__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog bdw_compute_l3_cache_mux_regs[107];
static struct brw_perf_query_register_prog bdw_compute_l3_cache_b_counter_regs[13];
static struct brw_perf_query_register_prog bdw_compute_l3_cache_flex_regs[7];

static struct brw_perf_query_counter bdw_compute_l3_cache_query_counters[58];
static struct brw_perf_query_info bdw_compute_l3_cache_query = {
   .kind = OA_COUNTERS,
   .name = "Compute Metrics L3 Cache Gen8",
   .guid = "27a364dc-8225-4ecb-b607-d6f1925598d9",
   .counters = bdw_compute_l3_cache_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_compute_l3_cache_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_compute_l3_cache_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_compute_l3_cache_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_compute_l3_cache_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_compute_l3_cache_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143F00B3 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BF00B3 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x138303C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3B800060 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800805 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003F0029 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063F1400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083F1225 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3F1327 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x001D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x061D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x081DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1F0800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F2A00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F0280 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00391000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06394000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABF1429 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBF1225 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00BF1380 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02BF0026 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DA8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DA4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x009D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x029D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B94000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B91000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F880003 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A8020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B0520 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BA950 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0016 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038D2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAA0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03835180 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834022 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11830000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07830000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09830000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C137 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C147 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x15804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800111 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800840 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x418014A2 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x30800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x30800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FEFE };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FEFD };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FBEF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FBDF };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00000003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00002001 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00101100 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00201200 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00301300 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00401400 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 16;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 32;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 52;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 2;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 60;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 96;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 120;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 128;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_accesses__read;
         counter->name = "L3 Bank 10 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 10.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 152;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 160;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__compute_l3_cache__avg_gpu_core_frequency__max(brw);
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_ic_accesses__read;
         counter->name = "L3 Bank 10 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 10 from IC cache.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 208;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_ic_hits__read;
         counter->name = "L3 Bank 10 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 10 from IC cache.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 216;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 240;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 248;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 256;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 264;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 268;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 304;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 308;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank11_accesses__read;
         counter->name = "L3 Bank 11 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 11.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 328;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 336;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank13_accesses__read;
         counter->name = "L3 Bank 13 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 13.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 344;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 352;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 360;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 376;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 384;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 392;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank12_accesses__read;
         counter->name = "L3 Bank 12 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 12.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 400;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 408;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 412;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 416;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Data Port Reads Coalescing Gen8 :: GPU Core Clocks */
static uint64_t
bdw__data_port_reads_coalescing__gpu_core_clocks__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU Active */
static float
bdw__data_port_reads_coalescing__eu_active__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Reads Coalescing Gen8 :: EU Both FPU Pipes Active */
static float
bdw__data_port_reads_coalescing__eu_fpu_both_active__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Reads Coalescing Gen8 :: EU FPU0 Pipe Active */
static float
bdw__data_port_reads_coalescing__fpu0_active__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Reads Coalescing Gen8 :: EU FPU1 Pipe Active */
static float
bdw__data_port_reads_coalescing__fpu1_active__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Reads Coalescing Gen8 :: EU AVG IPC Rate */
static float
bdw__data_port_reads_coalescing__eu_avg_ipc_rate__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = accumulator[query->a_offset + 10];
   uint64_t tmp2 = accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Data Port Reads Coalescing Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__gs_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__vs_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__ps_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__data_port_reads_coalescing__shader_barriers__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: Sampler Texels */
static uint64_t
bdw__data_port_reads_coalescing__sampler_texels__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Data Port Reads Coalescing Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__data_port_reads_coalescing__gpu_time__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Data Port Reads Coalescing Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__data_port_reads_coalescing__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__data_port_reads_coalescing__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Reads Coalescing Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__data_port_reads_coalescing__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Data Port Reads Coalescing Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__data_port_reads_coalescing__sampler_texel_misses__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Data Port Reads Coalescing Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__cs_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: SLM Bytes Read */
static uint64_t
bdw__data_port_reads_coalescing__slm_bytes_read__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Data Port Reads Coalescing Gen8 :: EU to Data Port 0 Reads 64 */
static uint64_t
bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: Data Port 0 to L3 Data Reads */
static uint64_t
bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: Data Port 0 to L3 Data Writes */
static uint64_t
bdw__data_port_reads_coalescing__hdc0_l3_data_writes__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU to Data Port 0 Reads 128 */
static uint64_t
bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__ds_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU Thread Occupancy */
static float
bdw__data_port_reads_coalescing__eu_thread_occupancy__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0 * 8;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = brw->perfquery.sys_vars.eu_threads_count;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Data Port Reads Coalescing Gen8 :: All Data Port 0 Writes to L3 */
static uint64_t
bdw__data_port_reads_coalescing__hdc0_l3_writes__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU Stall */
static float
bdw__data_port_reads_coalescing__eu_stall__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Reads Coalescing Gen8 :: EU to Data Port 0 Reads 32 */
static uint64_t
bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU to Data Port 0 Reads 256 */
static uint64_t
bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EuBytesReadPerCacheLine */
static float
bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read(struct brw_context *brw,
                                                                    const struct brw_perf_query_info *query,
                                                                    uint64_t *accumulator)
{
   /* RPN equation: $EuHdc0Reads32B 32 UMUL $EuHdc0Reads64B 64 UMUL $EuHdc0Reads128B 128 UMUL $EuHdc0Reads256B 256 UMUL UADD UADD UADD $Hdc0L3DataReads FDIV */
   uint64_t tmp0 = bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read(brw, query, accumulator) * 32;
   uint64_t tmp1 = bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read(brw, query, accumulator) * 64;
   uint64_t tmp2 = bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read(brw, query, accumulator) * 128;
   uint64_t tmp3 = bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read(brw, query, accumulator) * 256;
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = tmp1 + tmp4;
   uint64_t tmp6 = tmp0 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read(brw, query, accumulator);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* Data Port Reads Coalescing Gen8 :: EuDataReadsPerCacheLine */
static float
bdw__data_port_reads_coalescing__eu_data_reads_per_cache_line__read(struct brw_context *brw,
                                                                    const struct brw_perf_query_info *query,
                                                                    uint64_t *accumulator)
{
   /* RPN equation: $EuBytesReadPerCacheLine 64 FDIV */
   double tmp0 = bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read(brw, query, accumulator);
   double tmp1 = 64;
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Data Port Reads Coalescing Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__data_port_reads_coalescing__shader_memory_accesses__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__data_port_reads_coalescing__hs_threads__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: SLM Bytes Written */
static uint64_t
bdw__data_port_reads_coalescing__slm_bytes_written__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Data Port Reads Coalescing Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__data_port_reads_coalescing__l3_shader_throughput__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__data_port_reads_coalescing__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Data Port Reads Coalescing Gen8 :: All Data Port 0 Reads from L3 */
static uint64_t
bdw__data_port_reads_coalescing__hdc0_l3_reads__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: C 3 READ C 2 READ USUB */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 - tmp1;

   return tmp2;
}

/* Data Port Reads Coalescing Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__data_port_reads_coalescing__shader_atomics__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

/* Data Port Reads Coalescing Gen8 :: EU Send Pipe Active */
static float
bdw__data_port_reads_coalescing__eu_send_active__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_reads_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog bdw_data_port_reads_coalescing_mux_regs[114];
static struct brw_perf_query_register_prog bdw_data_port_reads_coalescing_b_counter_regs[24];
static struct brw_perf_query_register_prog bdw_data_port_reads_coalescing_flex_regs[7];

static struct brw_perf_query_counter bdw_data_port_reads_coalescing_query_counters[35];
static struct brw_perf_query_info bdw_data_port_reads_coalescing_query = {
   .kind = OA_COUNTERS,
   .name = "Data Port Reads Coalescing Gen8",
   .guid = "857fc630-2f09-4804-85f1-084adfadd5ab",
   .counters = bdw_data_port_reads_coalescing_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_data_port_reads_coalescing_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_data_port_reads_coalescing_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_data_port_reads_coalescing_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_data_port_reads_coalescing_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_data_port_reads_coalescing_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (brw->perfquery.sys_vars.subslice_mask & 0x01) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103D0005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x163D240B };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1058022F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B5520 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198B0003 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063D00B0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083D0182 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3D10A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3D11A2 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E582242 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00586700 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0258004F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0658C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0858C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A58C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C58C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045B6300 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A5B0155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02AA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18381555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0639A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02392000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038B6300 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058B0062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B02A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B5555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAAA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0784C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800041 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0xBA98BA98 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0xBA98BA98 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00003377 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FFF2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x00007FF0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007FFE2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x00007FF0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007FFC2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x00007FF0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007FF82 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x00007FF0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000BFEF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000BFDF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000BFBF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007FFFA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000BF7F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00000003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00002001 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00778008 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00088078 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00808708 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00A08908 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 2;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__data_port_reads_coalescing__avg_gpu_core_frequency__max(brw);
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read;
      counter->name = "EU to Data Port 0 Reads 64";
      counter->desc = "The subslice 0 EU data reads from Data Port with 64B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read;
      counter->name = "Data Port 0 to L3 Data Reads";
      counter->desc = "The subslice 0 Data Port data and constant reads from L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_data_writes__read;
      counter->name = "Data Port 0 to L3 Data Writes";
      counter->desc = "The subslice 0 Data Port data writes to L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read;
      counter->name = "EU to Data Port 0 Reads 128";
      counter->desc = "The subslice 0 EU data reads from Data Port with 128B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_writes__read;
      counter->name = "All Data Port 0 Writes to L3";
      counter->desc = "The subslice 0 Data Port writes to L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read;
      counter->name = "EU to Data Port 0 Reads 32";
      counter->desc = "The subslice 0 EU data reads from Data Port with 32B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read;
      counter->name = "EU to Data Port 0 Reads 256";
      counter->desc = "The subslice 0 EU data reads from Data Port with 256B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read;
      counter->name = "EuBytesReadPerCacheLine";
      counter->desc = "Average EU bytes read per L3 cache line.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_data_reads_per_cache_line__read;
      counter->name = "EuDataReadsPerCacheLine";
      counter->desc = "Coalescing ratio of EU read requests to L3 cache lines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 196;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_reads__read;
      counter->name = "All Data Port 0 Reads from L3";
      counter->desc = "The subslice 0 Data Port reads from L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 248;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Data Port Writes Coalescing Gen8 :: GPU Core Clocks */
static uint64_t
bdw__data_port_writes_coalescing__gpu_core_clocks__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU Active */
static float
bdw__data_port_writes_coalescing__eu_active__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 128 */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: B 6 READ 2 UDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Writes Coalescing Gen8 :: EU Both FPU Pipes Active */
static float
bdw__data_port_writes_coalescing__eu_fpu_both_active__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Writes Coalescing Gen8 :: EU FPU0 Pipe Active */
static float
bdw__data_port_writes_coalescing__fpu0_active__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Writes Coalescing Gen8 :: EU FPU1 Pipe Active */
static float
bdw__data_port_writes_coalescing__fpu1_active__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Writes Coalescing Gen8 :: EU AVG IPC Rate */
static float
bdw__data_port_writes_coalescing__eu_avg_ipc_rate__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = accumulator[query->a_offset + 10];
   uint64_t tmp2 = accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Data Port Writes Coalescing Gen8 :: GS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__gs_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: VS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__vs_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: FS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__ps_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: Shader Barrier Messages */
static uint64_t
bdw__data_port_writes_coalescing__shader_barriers__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: Sampler Texels */
static uint64_t
bdw__data_port_writes_coalescing__sampler_texels__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 32B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: GPU Time Elapsed */
static uint64_t
bdw__data_port_writes_coalescing__gpu_time__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Data Port Writes Coalescing Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__data_port_writes_coalescing__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                               const struct brw_perf_query_info *query,
                                                               uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__data_port_writes_coalescing__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Writes Coalescing Gen8 :: AVG GPU Core Frequency */
static uint64_t
bdw__data_port_writes_coalescing__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Data Port Writes Coalescing Gen8 :: Sampler Texels Misses */
static uint64_t
bdw__data_port_writes_coalescing__sampler_texel_misses__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Data Port Writes Coalescing Gen8 :: CS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__cs_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: SLM Bytes Read */
static uint64_t
bdw__data_port_writes_coalescing__slm_bytes_read__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 256B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read(struct brw_context *brw,
                                                                   const struct brw_perf_query_info *query,
                                                                   uint64_t *accumulator)
{
   /* RPN equation: B 7 READ 2 UDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Writes Coalescing Gen8 :: Data Port 0 to L3 Data Reads */
static uint64_t
bdw__data_port_writes_coalescing__hdc0_l3_data_reads__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: Data Port 0 to L3 Data Writes */
static uint64_t
bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 64B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read(struct brw_context *brw,
                                                                   const struct brw_perf_query_info *query,
                                                                   uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 64B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 2 UDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Writes Coalescing Gen8 :: DS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__ds_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU Thread Occupancy */
static float
bdw__data_port_writes_coalescing__eu_thread_occupancy__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0 * 8;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = brw->perfquery.sys_vars.eu_threads_count;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Data Port Writes Coalescing Gen8 :: All Data Port 0 Writes to L3 */
static uint64_t
bdw__data_port_writes_coalescing__hdc0_l3_writes__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU Stall */
static float
bdw__data_port_writes_coalescing__eu_stall__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 64B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: B 1 READ B 4 READ UADD */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Data Port Writes Coalescing Gen8 :: EU to Data Port 0 Writes 128 */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 2 UDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Data Port Writes Coalescing Gen8 :: EuBytesWrittenPerCacheLine */
static float
bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read(struct brw_context *brw,
                                                                        const struct brw_perf_query_info *query,
                                                                        uint64_t *accumulator)
{
   /* RPN equation: $EuHdc0Writes32B 32 UMUL $EuHdc0Writes64B 64 UMUL $EuHdc0Writes96B 96 UMUL $EuHdc0Writes128B 128 UMUL $EuHdc0Writes128BSimd16 128 UMUL $EuHdc0Writes256BSimd16 256 UMUL $EuHdc0Writes192B 192 UMUL UADD UADD UADD UADD UADD UADD $Hdc0L3DataWrites FDIV */
   uint64_t tmp0 = bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read(brw, query, accumulator) * 32;
   uint64_t tmp1 = bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read(brw, query, accumulator) * 64;
   uint64_t tmp2 = bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read(brw, query, accumulator) * 96;
   uint64_t tmp3 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read(brw, query, accumulator) * 128;
   uint64_t tmp4 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read(brw, query, accumulator) * 128;
   uint64_t tmp5 = bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read(brw, query, accumulator) * 256;
   uint64_t tmp6 = bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read(brw, query, accumulator) * 192;
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = tmp4 + tmp7;
   uint64_t tmp9 = tmp3 + tmp8;
   uint64_t tmp10 = tmp2 + tmp9;
   uint64_t tmp11 = tmp1 + tmp10;
   uint64_t tmp12 = tmp0 + tmp11;
   double tmp13 = tmp12;
   double tmp14 = bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read(brw, query, accumulator);
   double tmp15 = tmp14 ? tmp13 / tmp14 : 0;

   return tmp15;
}

/* Data Port Writes Coalescing Gen8 :: EuDataWritesPerCacheLine */
static float
bdw__data_port_writes_coalescing__eu_data_writes_per_cache_line__read(struct brw_context *brw,
                                                                      const struct brw_perf_query_info *query,
                                                                      uint64_t *accumulator)
{
   /* RPN equation: $EuBytesWrittenPerCacheLine 64 FDIV */
   double tmp0 = bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read(brw, query, accumulator);
   double tmp1 = 64;
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Data Port Writes Coalescing Gen8 :: Shader Memory Accesses */
static uint64_t
bdw__data_port_writes_coalescing__shader_memory_accesses__read(struct brw_context *brw,
                                                               const struct brw_perf_query_info *query,
                                                               uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: HS Threads Dispatched */
static uint64_t
bdw__data_port_writes_coalescing__hs_threads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: SLM Bytes Written */
static uint64_t
bdw__data_port_writes_coalescing__slm_bytes_written__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Data Port Writes Coalescing Gen8 :: L3 Shader Throughput */
static uint64_t
bdw__data_port_writes_coalescing__l3_shader_throughput__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__data_port_writes_coalescing__shader_memory_accesses__read(brw, query, accumulator);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Data Port Writes Coalescing Gen8 :: All Data Port 0 Reads from L3 */
static uint64_t
bdw__data_port_writes_coalescing__hdc0_l3_reads__read(struct brw_context *brw,
                                                      const struct brw_perf_query_info *query,
                                                      uint64_t *accumulator)
{
   /* RPN equation: C 3 READ C 2 READ USUB */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 - tmp1;

   return tmp2;
}

/* Data Port Writes Coalescing Gen8 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__data_port_writes_coalescing__shader_atomics__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

/* Data Port Writes Coalescing Gen8 :: EU Send Pipe Active */
static float
bdw__data_port_writes_coalescing__eu_send_active__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__data_port_writes_coalescing__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog bdw_data_port_writes_coalescing_mux_regs[110];
static struct brw_perf_query_register_prog bdw_data_port_writes_coalescing_b_counter_regs[24];
static struct brw_perf_query_register_prog bdw_data_port_writes_coalescing_flex_regs[7];

static struct brw_perf_query_counter bdw_data_port_writes_coalescing_query_counters[38];
static struct brw_perf_query_info bdw_data_port_writes_coalescing_query = {
   .kind = OA_COUNTERS,
   .name = "Data Port Writes Coalescing Gen8",
   .guid = "343ebc99-4a55-414c-8c17-d8e259cf5e20",
   .counters = bdw_data_port_writes_coalescing_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_data_port_writes_coalescing_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_data_port_writes_coalescing_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_data_port_writes_coalescing_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_data_port_writes_coalescing_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_data_port_writes_coalescing_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (brw->perfquery.sys_vars.subslice_mask & 0x01) {
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103D0005 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143D0120 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x163D2400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1058022F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198B0003 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x065CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x085CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5CC000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025C4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045C8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063D0094 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083D0182 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3D1814 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x183D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3D0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E582242 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00586700 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0258004F };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0658C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0858C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A58C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045B6A80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x185B5400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1A5B0141 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5B4000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1FAAA0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F0282 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x16384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18381415 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C384000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0039A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0639A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0839A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E39A000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02392000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04398000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A82A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038B6300 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058B0062 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118B0000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B02A0 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B1555 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0014 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21852AAA };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x23850028 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830141 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0784C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00000D24, .val = 0x00000000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D801000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800001 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800420 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800421 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800041 };
         query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      }

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0xBA98BA98 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0xBA98BA98 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00003377 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FF72 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007FF62 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007FF42 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007FF02 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0005FFF2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0005FFE2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0005FFC2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000BFD0 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0005FF82 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000BFD0 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00000003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00002001 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00778008 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00088078 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00808708 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00A08908 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read;
      counter->name = "EU to Data Port 0 Writes 128";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 192B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 28;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 32;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 2;
      counter->offset = 36;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read;
      counter->name = "EU to Data Port 0 Writes 32B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 32B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__data_port_writes_coalescing__avg_gpu_core_frequency__max(brw);
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read;
      counter->name = "EU to Data Port 0 Writes 256B";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 256B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_data_reads__read;
      counter->name = "Data Port 0 to L3 Data Reads";
      counter->desc = "The subslice 0 Data Port data and constant reads from L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read;
      counter->name = "Data Port 0 to L3 Data Writes";
      counter->desc = "The subslice 0 Data Port data writes to L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 128B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 64B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_writes__read;
      counter->name = "All Data Port 0 Writes to L3";
      counter->desc = "The subslice 0 Data Port writes to L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 192;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 64B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read;
      counter->name = "EU to Data Port 0 Writes 128";
      counter->desc = "The subslice 0 EU data writes to Data Port with 128B per message.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read;
      counter->name = "EuBytesWrittenPerCacheLine";
      counter->desc = "Average EU bytes written per L3 cache line.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_data_writes_per_cache_line__read;
      counter->name = "EuDataWritesPerCacheLine";
      counter->desc = "Coalescing ratio of EU write requests to L3 cache lines.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 220;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_reads__read;
      counter->name = "All Data Port 0 Reads from L3";
      counter->desc = "The subslice 0 Data Port reads from L3 cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 272;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set HDCAndSF :: GPU Core Clocks */
static uint64_t
bdw__hdc_and_sf__gpu_core_clocks__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set HDCAndSF :: EU Active */
static float
bdw__hdc_and_sf__eu_active__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
static float
bdw__hdc_and_sf__eu_fpu_both_active__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: VS Send Pipe Active */
static float
bdw__hdc_and_sf__vs_send_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
static float
bdw__hdc_and_sf__vs_fpu1_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: GS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__gs_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
static uint64_t
bdw__hdc_and_sf__hi_depth_test_fails__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: FS Both FPU Active */
static float
bdw__hdc_and_sf__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: VS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__vs_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set HDCAndSF :: Polygon Data Ready */
static float
bdw__hdc_and_sf__poly_data_ready__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set HDCAndSF :: FS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__ps_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
static float
bdw__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Shader Barrier Messages */
static uint64_t
bdw__hdc_and_sf__shader_barriers__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set HDCAndSF :: Sampler Texels */
static uint64_t
bdw__hdc_and_sf__sampler_texels__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: Pixels Failing Tests */
static uint64_t
bdw__hdc_and_sf__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: GPU Time Elapsed */
static uint64_t
bdw__hdc_and_sf__gpu_time__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
static uint64_t
bdw__hdc_and_sf__avg_gpu_core_frequency__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__hdc_and_sf__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
static uint64_t
bdw__hdc_and_sf__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set HDCAndSF :: Sampler Texels Misses */
static uint64_t
bdw__hdc_and_sf__sampler_texel_misses__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
static float
bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: CS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__cs_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
static float
bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: SLM Bytes Read */
static uint64_t
bdw__hdc_and_sf__slm_bytes_read__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
static float
bdw__hdc_and_sf__ps_fpu1_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: PS Send Pipeline Active */
static float
bdw__hdc_and_sf__ps_send_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
static float
bdw__hdc_and_sf__vs_fpu0_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: GPU Busy */
static float
bdw__hdc_and_sf__gpu_busy__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
static float
bdw__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 5];
   uint64_t tmp1 = accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Rasterized Pixels */
static uint64_t
bdw__hdc_and_sf__rasterized_pixels__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
static float
bdw__hdc_and_sf__ps_fpu0_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: DS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__ds_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set HDCAndSF :: Samples Written */
static uint64_t
bdw__hdc_and_sf__samples_written__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: EU Stall */
static float
bdw__hdc_and_sf__eu_stall__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set HDCAndSF :: Samples Blended */
static uint64_t
bdw__hdc_and_sf__samples_blended__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: Early Depth Test Fails */
static uint64_t
bdw__hdc_and_sf__early_depth_test_fails__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: Shader Memory Accesses */
static uint64_t
bdw__hdc_and_sf__shader_memory_accesses__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set HDCAndSF :: HS Threads Dispatched */
static uint64_t
bdw__hdc_and_sf__hs_threads__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
static float
bdw__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 7];
   uint64_t tmp1 = accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: SLM Bytes Written */
static uint64_t
bdw__hdc_and_sf__slm_bytes_written__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set HDCAndSF :: L3 Shader Throughput */
static uint64_t
bdw__hdc_and_sf__l3_shader_throughput__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__hdc_and_sf__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set HDCAndSF :: Samples Killed in FS */
static uint64_t
bdw__hdc_and_sf__samples_killed_in_ps__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
static float
bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read(struct brw_context *brw,
                                                                 const struct brw_perf_query_info *query,
                                                                 uint64_t *accumulator)
{
   /* RPN equation: C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(brw, query, accumulator);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
static uint64_t
bdw__hdc_and_sf__shader_atomics__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_hdc_and_sf_mux_regs[98];
static struct brw_perf_query_register_prog bdw_hdc_and_sf_b_counter_regs[8];
static struct brw_perf_query_register_prog bdw_hdc_and_sf_flex_regs[7];

static struct brw_perf_query_counter bdw_hdc_and_sf_query_counters[42];
static struct brw_perf_query_info bdw_hdc_and_sf_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set HDCAndSF",
   .guid = "7bdafd88-a4fa-4ed5-bc09-1a977aa5be3e",
   .counters = bdw_hdc_and_sf_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_hdc_and_sf_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_hdc_and_sf_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_hdc_and_sf_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_hdc_and_sf_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_hdc_and_sf_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x105C0232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10580232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10380232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10DC0232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10D80232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10B80232 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x118E4400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025C6080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045C004B };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005C8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00582080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0258004B };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025B4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045B4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F00AA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04386080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0638404B };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02384000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08384000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A380000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C380000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00398000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0239A000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0439A000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06392000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDC25C1 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ADCC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AD825C1 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18DB4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1ADB0001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02AA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB825C1 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x18B80154 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB9A000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB9A000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB9A000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258BAA05 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2A80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C5400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098E05C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198F0020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AA0A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43800400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800C62 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F801042 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x418014A4 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x10800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00000002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FFF7 };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 72;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__hdc_and_sf__avg_gpu_core_frequency__max(brw);
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 128;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 144;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 160;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 164;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 172;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 176;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 192;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 216;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 256;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0)";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 288;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set L3_1 :: GPU Core Clocks */
static uint64_t
bdw__l3_1__gpu_core_clocks__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set L3_1 :: EU Active */
static float
bdw__l3_1__eu_active__read(struct brw_context *brw,
                           const struct brw_perf_query_info *query,
                           uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: Slice1 L3 Bank1 Stalled */
static float
bdw__l3_1__l31_bank1_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: EU Both FPU Pipes Active */
static float
bdw__l3_1__eu_fpu_both_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: VS Send Pipe Active */
static float
bdw__l3_1__vs_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: VS FPU1 Pipe Active */
static float
bdw__l3_1__vs_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: GS Threads Dispatched */
static uint64_t
bdw__l3_1__gs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__l3_1__hi_depth_test_fails__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: FS Both FPU Active */
static float
bdw__l3_1__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: VS Threads Dispatched */
static uint64_t
bdw__l3_1__vs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set L3_1 :: FS Threads Dispatched */
static uint64_t
bdw__l3_1__ps_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set L3_1 :: Shader Barrier Messages */
static uint64_t
bdw__l3_1__shader_barriers__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set L3_1 :: Slice1 L3 Bank0 Stalled */
static float
bdw__l3_1__l31_bank0_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: Sampler Texels */
static uint64_t
bdw__l3_1__sampler_texels__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: Pixels Failing Tests */
static uint64_t
bdw__l3_1__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: GPU Time Elapsed */
static uint64_t
bdw__l3_1__gpu_time__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_1__avg_gpu_core_frequency__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__l3_1__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set L3_1 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_1__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set L3_1 :: Sampler Texels Misses */
static uint64_t
bdw__l3_1__sampler_texel_misses__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: CS Threads Dispatched */
static uint64_t
bdw__l3_1__cs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set L3_1 :: SLM Bytes Read */
static uint64_t
bdw__l3_1__slm_bytes_read__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_1 :: PS FPU1 Pipe Active */
static float
bdw__l3_1__ps_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: PS Send Pipeline Active */
static float
bdw__l3_1__ps_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: VS FPU0 Pipe Active */
static float
bdw__l3_1__vs_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: GPU Busy */
static float
bdw__l3_1__gpu_busy__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: Rasterized Pixels */
static uint64_t
bdw__l3_1__rasterized_pixels__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: Slice1 L3 Bank1 Active */
static float
bdw__l3_1__l31_bank1_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: PS FPU0 Pipe Active */
static float
bdw__l3_1__ps_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: DS Threads Dispatched */
static uint64_t
bdw__l3_1__ds_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set L3_1 :: Samples Written */
static uint64_t
bdw__l3_1__samples_written__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: EU Stall */
static float
bdw__l3_1__eu_stall__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_1 :: Slice1 L3 Bank0 Active */
static float
bdw__l3_1__l31_bank0_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_1__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: Samples Blended */
static uint64_t
bdw__l3_1__samples_blended__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: Early Depth Test Fails */
static uint64_t
bdw__l3_1__early_depth_test_fails__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: Shader Memory Accesses */
static uint64_t
bdw__l3_1__shader_memory_accesses__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set L3_1 :: HS Threads Dispatched */
static uint64_t
bdw__l3_1__hs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set L3_1 :: SLM Bytes Written */
static uint64_t
bdw__l3_1__slm_bytes_written__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_1 :: L3 Shader Throughput */
static uint64_t
bdw__l3_1__l3_shader_throughput__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__l3_1__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set L3_1 :: Samples Killed in FS */
static uint64_t
bdw__l3_1__samples_killed_in_ps__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__l3_1__shader_atomics__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_l3_1_mux_regs[91];
static struct brw_perf_query_register_prog bdw_l3_1_b_counter_regs[22];
static struct brw_perf_query_register_prog bdw_l3_1_flex_regs[7];

static struct brw_perf_query_counter bdw_l3_1_query_counters[39];
static struct brw_perf_query_info bdw_l3_1_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set L3_1",
   .guid = "9385ebb2-f34f-4aa5-aec5-7e9cbbea0f0b",
   .counters = bdw_l3_1_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_l3_1_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_l3_1_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_l3_1_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_l3_1_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_l3_1_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF03DA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BF0001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12980340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12990340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBF1187 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EBF1205 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00BF0500 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02BF042B };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04BF002C };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00DA8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04DA4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04983400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10980000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06990034 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10990000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x009D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x029DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x049D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02A8 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F00BA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB88000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00B94000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04B91000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B92000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBA4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D880400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B800A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B5500 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185800A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830154 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800060 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00100070 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FFF1 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x00014002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000C3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00010002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000C7FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x00004002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000D3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x00100700 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FF1F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00001402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC3F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x00001002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC7F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x00000402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FD3F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank1_stalled__read;
         counter->name = "Slice1 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 12;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank0_stalled__read;
         counter->name = "Slice1 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 80;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__l3_1__avg_gpu_core_frequency__max(brw);
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 144;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 148;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 156;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank1_active__read;
         counter->name = "Slice1 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 168;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 172;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 192;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank0_active__read;
         counter->name = "Slice1 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 196;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set L3_2 :: GPU Core Clocks */
static uint64_t
bdw__l3_2__gpu_core_clocks__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set L3_2 :: EU Active */
static float
bdw__l3_2__eu_active__read(struct brw_context *brw,
                           const struct brw_perf_query_info *query,
                           uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: Slice0 L3 Bank1 Active */
static float
bdw__l3_2__l30_bank1_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: EU Both FPU Pipes Active */
static float
bdw__l3_2__eu_fpu_both_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: VS Send Pipe Active */
static float
bdw__l3_2__vs_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: VS FPU1 Pipe Active */
static float
bdw__l3_2__vs_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: GS Threads Dispatched */
static uint64_t
bdw__l3_2__gs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__l3_2__hi_depth_test_fails__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: FS Both FPU Active */
static float
bdw__l3_2__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: VS Threads Dispatched */
static uint64_t
bdw__l3_2__vs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set L3_2 :: Slice0 L3 Bank1 Stalled */
static float
bdw__l3_2__l30_bank1_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: FS Threads Dispatched */
static uint64_t
bdw__l3_2__ps_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set L3_2 :: Shader Barrier Messages */
static uint64_t
bdw__l3_2__shader_barriers__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set L3_2 :: Sampler Texels */
static uint64_t
bdw__l3_2__sampler_texels__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: Pixels Failing Tests */
static uint64_t
bdw__l3_2__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: GPU Time Elapsed */
static uint64_t
bdw__l3_2__gpu_time__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_2__avg_gpu_core_frequency__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__l3_2__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set L3_2 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_2__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set L3_2 :: Sampler Texels Misses */
static uint64_t
bdw__l3_2__sampler_texel_misses__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: CS Threads Dispatched */
static uint64_t
bdw__l3_2__cs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set L3_2 :: SLM Bytes Read */
static uint64_t
bdw__l3_2__slm_bytes_read__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_2 :: PS FPU1 Pipe Active */
static float
bdw__l3_2__ps_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: PS Send Pipeline Active */
static float
bdw__l3_2__ps_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: VS FPU0 Pipe Active */
static float
bdw__l3_2__vs_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: GPU Busy */
static float
bdw__l3_2__gpu_busy__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: Slice0 L3 Bank0 Active */
static float
bdw__l3_2__l30_bank0_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: Rasterized Pixels */
static uint64_t
bdw__l3_2__rasterized_pixels__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: PS FPU0 Pipe Active */
static float
bdw__l3_2__ps_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: Slice0 L3 Bank0 Stalled */
static float
bdw__l3_2__l30_bank0_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_2 :: DS Threads Dispatched */
static uint64_t
bdw__l3_2__ds_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set L3_2 :: Samples Written */
static uint64_t
bdw__l3_2__samples_written__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: EU Stall */
static float
bdw__l3_2__eu_stall__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_2__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_2 :: Samples Blended */
static uint64_t
bdw__l3_2__samples_blended__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: Early Depth Test Fails */
static uint64_t
bdw__l3_2__early_depth_test_fails__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: Shader Memory Accesses */
static uint64_t
bdw__l3_2__shader_memory_accesses__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set L3_2 :: HS Threads Dispatched */
static uint64_t
bdw__l3_2__hs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set L3_2 :: SLM Bytes Written */
static uint64_t
bdw__l3_2__slm_bytes_written__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_2 :: L3 Shader Throughput */
static uint64_t
bdw__l3_2__l3_shader_throughput__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__l3_2__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set L3_2 :: Samples Killed in FS */
static uint64_t
bdw__l3_2__samples_killed_in_ps__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__l3_2__shader_atomics__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_l3_2_mux_regs[78];
static struct brw_perf_query_register_prog bdw_l3_2_b_counter_regs[22];
static struct brw_perf_query_register_prog bdw_l3_2_flex_regs[7];

static struct brw_perf_query_counter bdw_l3_2_query_counters[39];
static struct brw_perf_query_info bdw_l3_2_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set L3_2",
   .guid = "446ae59b-ff2e-41c9-b49e-0184a54bf00a",
   .counters = bdw_l3_2_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_l3_2_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_l3_2_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_l3_2_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_l3_2_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_l3_2_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F03DA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143F0001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12180340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12190340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3F1187 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E3F1205 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003F0500 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x023F042B };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043F002C };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C5AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E5AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04183400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10180000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06190034 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10190000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x001D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x021DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F02A8 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F00BA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C388000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00394000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04391000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06392000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C3A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8AA800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D8A0002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B4005 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2A80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185800A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830154 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800060 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00100070 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FFF1 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x00014002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000C3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00010002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000C7FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x00004002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000D3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x00100700 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FF1F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00001402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC3F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x00001002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC7F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x00000402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FD3F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 12;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 64;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__l3_2__avg_gpu_core_frequency__max(brw);
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 144;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 148;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 156;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 160;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 180;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set L3_3 :: GPU Core Clocks */
static uint64_t
bdw__l3_3__gpu_core_clocks__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set L3_3 :: EU Active */
static float
bdw__l3_3__eu_active__read(struct brw_context *brw,
                           const struct brw_perf_query_info *query,
                           uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: EU Both FPU Pipes Active */
static float
bdw__l3_3__eu_fpu_both_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: VS Send Pipe Active */
static float
bdw__l3_3__vs_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: VS FPU1 Pipe Active */
static float
bdw__l3_3__vs_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: GS Threads Dispatched */
static uint64_t
bdw__l3_3__gs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__l3_3__hi_depth_test_fails__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: FS Both FPU Active */
static float
bdw__l3_3__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: VS Threads Dispatched */
static uint64_t
bdw__l3_3__vs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set L3_3 :: FS Threads Dispatched */
static uint64_t
bdw__l3_3__ps_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
static float
bdw__l3_3__l30_bank3_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: Shader Barrier Messages */
static uint64_t
bdw__l3_3__shader_barriers__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set L3_3 :: Sampler Texels */
static uint64_t
bdw__l3_3__sampler_texels__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: Pixels Failing Tests */
static uint64_t
bdw__l3_3__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: GPU Time Elapsed */
static uint64_t
bdw__l3_3__gpu_time__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_3__avg_gpu_core_frequency__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__l3_3__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set L3_3 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_3__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set L3_3 :: Sampler Texels Misses */
static uint64_t
bdw__l3_3__sampler_texel_misses__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: CS Threads Dispatched */
static uint64_t
bdw__l3_3__cs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set L3_3 :: SLM Bytes Read */
static uint64_t
bdw__l3_3__slm_bytes_read__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_3 :: PS FPU1 Pipe Active */
static float
bdw__l3_3__ps_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
static float
bdw__l3_3__l30_bank3_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: PS Send Pipeline Active */
static float
bdw__l3_3__ps_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: VS FPU0 Pipe Active */
static float
bdw__l3_3__vs_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: GPU Busy */
static float
bdw__l3_3__gpu_busy__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: Slice1 L3 Bank3 Active */
static float
bdw__l3_3__l31_bank3_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: Slice1 L3 Bank3 Stalled */
static float
bdw__l3_3__l31_bank3_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_3 :: Rasterized Pixels */
static uint64_t
bdw__l3_3__rasterized_pixels__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: PS FPU0 Pipe Active */
static float
bdw__l3_3__ps_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: DS Threads Dispatched */
static uint64_t
bdw__l3_3__ds_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set L3_3 :: Samples Written */
static uint64_t
bdw__l3_3__samples_written__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: EU Stall */
static float
bdw__l3_3__eu_stall__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_3__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_3 :: Samples Blended */
static uint64_t
bdw__l3_3__samples_blended__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: Early Depth Test Fails */
static uint64_t
bdw__l3_3__early_depth_test_fails__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: Shader Memory Accesses */
static uint64_t
bdw__l3_3__shader_memory_accesses__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set L3_3 :: HS Threads Dispatched */
static uint64_t
bdw__l3_3__hs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set L3_3 :: SLM Bytes Written */
static uint64_t
bdw__l3_3__slm_bytes_written__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_3 :: L3 Shader Throughput */
static uint64_t
bdw__l3_3__l3_shader_throughput__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__l3_3__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set L3_3 :: Samples Killed in FS */
static uint64_t
bdw__l3_3__samples_killed_in_ps__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__l3_3__shader_atomics__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_l3_3_mux_regs[94];
static struct brw_perf_query_register_prog bdw_l3_3_b_counter_regs[22];
static struct brw_perf_query_register_prog bdw_l3_3_flex_regs[7];

static struct brw_perf_query_counter bdw_l3_3_query_counters[39];
static struct brw_perf_query_info bdw_l3_3_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set L3_3",
   .guid = "84a7956f-1ea4-4d0d-837f-e39a0376e38c",
   .counters = bdw_l3_3_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_l3_3_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_l3_3_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_l3_3_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_l3_3_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_l3_3_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x121B0340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F0274 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x123F0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x129B0340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF0274 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12BF0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041B3400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045C8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003F0080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x023F0793 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043F0014 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04588000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A5B4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x001D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x021DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A384000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00394000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04399000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x069B0034 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06DC4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBD4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBF0981 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EBF0A0F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06D84000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDB4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02A8 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F0080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB84000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06B92000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D880400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B8009 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2A80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185800A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830154 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800C00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800C63 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F8014A5 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800045 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00100070 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FFF1 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x00014002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000C3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00010002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000C7FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x00004002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000D3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x00100700 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FF1F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00001402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC3F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x00001002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC7F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x00000402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FD3F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 64;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__l3_3__avg_gpu_core_frequency__max(brw);
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 136;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 140;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 144;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 148;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l31_bank3_active__read;
         counter->name = "Slice1 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 156;
         counter->size = sizeof(float);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l31_bank3_stalled__read;
         counter->name = "Slice1 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 160;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set L3_4 :: GPU Core Clocks */
static uint64_t
bdw__l3_4__gpu_core_clocks__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set L3_4 :: EU Active */
static float
bdw__l3_4__eu_active__read(struct brw_context *brw,
                           const struct brw_perf_query_info *query,
                           uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: EU Both FPU Pipes Active */
static float
bdw__l3_4__eu_fpu_both_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: VS Send Pipe Active */
static float
bdw__l3_4__vs_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: VS FPU1 Pipe Active */
static float
bdw__l3_4__vs_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: GS Threads Dispatched */
static uint64_t
bdw__l3_4__gs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set L3_4 :: Early Hi-Depth Test Fails */
static uint64_t
bdw__l3_4__hi_depth_test_fails__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: FS Both FPU Active */
static float
bdw__l3_4__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: VS Threads Dispatched */
static uint64_t
bdw__l3_4__vs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set L3_4 :: FS Threads Dispatched */
static uint64_t
bdw__l3_4__ps_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set L3_4 :: Shader Barrier Messages */
static uint64_t
bdw__l3_4__shader_barriers__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set L3_4 :: Sampler Texels */
static uint64_t
bdw__l3_4__sampler_texels__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: Pixels Failing Tests */
static uint64_t
bdw__l3_4__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: GPU Time Elapsed */
static uint64_t
bdw__l3_4__gpu_time__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_4__avg_gpu_core_frequency__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__l3_4__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set L3_4 :: AVG GPU Core Frequency */
static uint64_t
bdw__l3_4__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set L3_4 :: Sampler Texels Misses */
static uint64_t
bdw__l3_4__sampler_texel_misses__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: CS Threads Dispatched */
static uint64_t
bdw__l3_4__cs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set L3_4 :: SLM Bytes Read */
static uint64_t
bdw__l3_4__slm_bytes_read__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_4 :: PS FPU1 Pipe Active */
static float
bdw__l3_4__ps_fpu1_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: Slice0 L3 Bank2 Stalled */
static float
bdw__l3_4__l30_bank2_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: PS Send Pipeline Active */
static float
bdw__l3_4__ps_send_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: VS FPU0 Pipe Active */
static float
bdw__l3_4__vs_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: GPU Busy */
static float
bdw__l3_4__gpu_busy__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: Slice1 L3 Bank2 Active */
static float
bdw__l3_4__l31_bank2_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: Slice0 L3 Bank2 Active */
static float
bdw__l3_4__l30_bank2_active__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: Rasterized Pixels */
static uint64_t
bdw__l3_4__rasterized_pixels__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: Slice1 L3 Bank2 Stalled */
static float
bdw__l3_4__l31_bank2_stalled__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_4 :: PS FPU0 Pipe Active */
static float
bdw__l3_4__ps_fpu0_active__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: DS Threads Dispatched */
static uint64_t
bdw__l3_4__ds_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set L3_4 :: Samples Written */
static uint64_t
bdw__l3_4__samples_written__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: EU Stall */
static float
bdw__l3_4__eu_stall__read(struct brw_context *brw,
                          const struct brw_perf_query_info *query,
                          uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__l3_4__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set L3_4 :: Samples Blended */
static uint64_t
bdw__l3_4__samples_blended__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: Early Depth Test Fails */
static uint64_t
bdw__l3_4__early_depth_test_fails__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: Shader Memory Accesses */
static uint64_t
bdw__l3_4__shader_memory_accesses__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set L3_4 :: HS Threads Dispatched */
static uint64_t
bdw__l3_4__hs_threads__read(struct brw_context *brw,
                            const struct brw_perf_query_info *query,
                            uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set L3_4 :: SLM Bytes Written */
static uint64_t
bdw__l3_4__slm_bytes_written__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set L3_4 :: L3 Shader Throughput */
static uint64_t
bdw__l3_4__l3_shader_throughput__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__l3_4__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set L3_4 :: Samples Killed in FS */
static uint64_t
bdw__l3_4__samples_killed_in_ps__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set L3_4 :: Shader Atomic Memory Accesses */
static uint64_t
bdw__l3_4__shader_atomics__read(struct brw_context *brw,
                                const struct brw_perf_query_info *query,
                                uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_l3_4_mux_regs[93];
static struct brw_perf_query_register_prog bdw_l3_4_b_counter_regs[22];
static struct brw_perf_query_register_prog bdw_l3_4_flex_regs[7];

static struct brw_perf_query_counter bdw_l3_4_query_counters[39];
static struct brw_perf_query_info bdw_l3_4_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set L3_4",
   .guid = "92b493d9-df18-4bed-be06-5cac6f2a6f5f",
   .counters = bdw_l3_4_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_l3_4_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_l3_4_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_l3_4_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_l3_4_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_l3_4_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x121A0340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103F0017 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x123F0020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x129A0340 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BF0017 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12BF0020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041A3400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101A0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043B8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3E0010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003F0200 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x023F0113 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043F0014 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02592000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x005A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x025AC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045A4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1C8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x001D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x021DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041D4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1E8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F001A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x00394000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02395000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04391000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x069A0034 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109A0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06BB4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABE0040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CBF0984 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EBF0A02 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02D94000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EDAC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9C0400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9DC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9E0400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F02A8 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9F0040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0CB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0EB95000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D880400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B8009 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B2A80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0015 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078D2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185800A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA00 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830154 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01848000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07844000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x11808000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x17804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47800842 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F801084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800044 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00100070 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FFF1 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x00014002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000C3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00010002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000C7FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x00004002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000D3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x00100700 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FF1F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00001402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC3F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x00001002 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC7F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x00000402 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FD3F };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = bdw__l3_4__avg_gpu_core_frequency__max(brw);
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 128;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 132;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 136;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 140;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 144;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l31_bank2_active__read;
         counter->name = "Slice1 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 148;
         counter->size = sizeof(float);
      }

      if (brw->perfquery.sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 152;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l31_bank2_stalled__read;
         counter->name = "Slice1 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is stalled";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 168;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 172;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 192;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
static uint64_t
bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: EU Active */
static float
bdw__rasterizer_and_pixel_backend__eu_active__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
static float
bdw__rasterizer_and_pixel_backend__eu_fpu_both_active__read(struct brw_context *brw,
                                                            const struct brw_perf_query_info *query,
                                                            uint64_t *accumulator)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Input Available */
static float
bdw__rasterizer_and_pixel_backend__rasterizer1_input_available__read(struct brw_context *brw,
                                                                     const struct brw_perf_query_info *query,
                                                                     uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
static float
bdw__rasterizer_and_pixel_backend__vs_send_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
static float
bdw__rasterizer_and_pixel_backend__vs_fpu1_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__gs_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
static uint64_t
bdw__rasterizer_and_pixel_backend__hi_depth_test_fails__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
static float
bdw__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read(struct brw_context *brw,
                                                               const struct brw_perf_query_info *query,
                                                               uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__vs_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 1];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__ps_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 6];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
static float
bdw__rasterizer_and_pixel_backend__pixel_values0_ready__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
static uint64_t
bdw__rasterizer_and_pixel_backend__shader_barriers__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
static uint64_t
bdw__rasterizer_and_pixel_backend__sampler_texels__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
static uint64_t
bdw__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read(struct brw_context *brw,
                                                                      const struct brw_perf_query_info *query,
                                                                      uint64_t *accumulator)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
static uint64_t
bdw__rasterizer_and_pixel_backend__gpu_time__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
static uint64_t
bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                                const struct brw_perf_query_info *query,
                                                                uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__rasterizer_and_pixel_backend__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
static uint64_t
bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
static uint64_t
bdw__rasterizer_and_pixel_backend__sampler_texel_misses__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__cs_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 4];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
static float
bdw__rasterizer_and_pixel_backend__rasterizer0_input_available__read(struct brw_context *brw,
                                                                     const struct brw_perf_query_info *query,
                                                                     uint64_t *accumulator)
{
   /* RPN equation: B 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
static float
bdw__rasterizer_and_pixel_backend__pixel_data0_ready__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
static uint64_t
bdw__rasterizer_and_pixel_backend__slm_bytes_read__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
static float
bdw__rasterizer_and_pixel_backend__ps_fpu1_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
static float
bdw__rasterizer_and_pixel_backend__ps_send_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
static float
bdw__rasterizer_and_pixel_backend__vs_fpu0_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
static float
bdw__rasterizer_and_pixel_backend__gpu_busy__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Slice1 Pixel Values Ready */
static float
bdw__rasterizer_and_pixel_backend__pixel_values1_ready__read(struct brw_context *brw,
                                                             const struct brw_perf_query_info *query,
                                                             uint64_t *accumulator)
{
   /* RPN equation: C 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Slice1 PS Output Available */
static float
bdw__rasterizer_and_pixel_backend__ps_output1_available__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: C 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
static uint64_t
bdw__rasterizer_and_pixel_backend__rasterized_pixels__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
static float
bdw__rasterizer_and_pixel_backend__ps_fpu0_active__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__ds_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 3];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: Samples Written */
static uint64_t
bdw__rasterizer_and_pixel_backend__samples_written__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: EU Stall */
static float
bdw__rasterizer_and_pixel_backend__eu_stall__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
static float
bdw__rasterizer_and_pixel_backend__rasterizer0_output_ready__read(struct brw_context *brw,
                                                                  const struct brw_perf_query_info *query,
                                                                  uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
static uint64_t
bdw__rasterizer_and_pixel_backend__samples_blended__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
static uint64_t
bdw__rasterizer_and_pixel_backend__early_depth_test_fails__read(struct brw_context *brw,
                                                                const struct brw_perf_query_info *query,
                                                                uint64_t *accumulator)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Output Ready */
static float
bdw__rasterizer_and_pixel_backend__rasterizer1_output_ready__read(struct brw_context *brw,
                                                                  const struct brw_perf_query_info *query,
                                                                  uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Slice1 Post-EarlyZ Pixel Data Ready */
static float
bdw__rasterizer_and_pixel_backend__pixel_data1_ready__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
static uint64_t
bdw__rasterizer_and_pixel_backend__shader_memory_accesses__read(struct brw_context *brw,
                                                                const struct brw_perf_query_info *query,
                                                                uint64_t *accumulator)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 32];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
static uint64_t
bdw__rasterizer_and_pixel_backend__hs_threads__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 2];

   return tmp0;
}

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
static uint64_t
bdw__rasterizer_and_pixel_backend__slm_bytes_written__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
static uint64_t
bdw__rasterizer_and_pixel_backend__l3_shader_throughput__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses 64 UMUL UADD UADD */
   uint64_t tmp0 = accumulator[query->a_offset + 30];
   uint64_t tmp1 = accumulator[query->a_offset + 31];
   uint64_t tmp2 = bdw__rasterizer_and_pixel_backend__shader_memory_accesses__read(brw, query, accumulator) * 64;
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
static uint64_t
bdw__rasterizer_and_pixel_backend__samples_killed_in_ps__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
static float
bdw__rasterizer_and_pixel_backend__ps_output0_available__read(struct brw_context *brw,
                                                              const struct brw_perf_query_info *query,
                                                              uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
static uint64_t
bdw__rasterizer_and_pixel_backend__shader_atomics__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 34];

   return tmp0;
}

static struct brw_perf_query_register_prog bdw_rasterizer_and_pixel_backend_mux_regs[114];
static struct brw_perf_query_register_prog bdw_rasterizer_and_pixel_backend_b_counter_regs[18];
static struct brw_perf_query_register_prog bdw_rasterizer_and_pixel_backend_flex_regs[7];

static struct brw_perf_query_counter bdw_rasterizer_and_pixel_backend_query_counters[45];
static struct brw_perf_query_info bdw_rasterizer_and_pixel_backend_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set RasterizerAndPixelBackend",
   .guid = "14345c35-cc46-40d0-bb04-6ed1fbb43679",
   .counters = bdw_rasterizer_and_pixel_backend_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .gpu_clock_offset = 1,
   .a_offset = 2,
   .b_offset = 38,
   .c_offset = 46,
   .mux_regs = bdw_rasterizer_and_pixel_backend_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = bdw_rasterizer_and_pixel_backend_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
   .flex_regs = bdw_rasterizer_and_pixel_backend_flex_regs,
   .n_flex_regs = 0, /* Determined at runtime */
};

static void
register_rasterizer_and_pixel_backend_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &bdw_rasterizer_and_pixel_backend_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x143B000E };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043C55C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A1E0280 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1E0408 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10390000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12397A1F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x14BB000E };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04BC5000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A9E0296 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C9E0008 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10B90000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x12B97A1F };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x063B0042 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x103B0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x083C0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0A3E0040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x043F8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02594000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x045A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1C0400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x041D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x081E02C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0C1FA800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E1F0260 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x101F0014 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x003905E0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x06390BC0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02390018 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04394000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x04BB0042 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x10BB0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02BC05C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08BC0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0ABE0004 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02BF8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02D91000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02DA8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x089C8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x029D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x089E8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9E0000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0E9FA806 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x109F0142 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x08B90617 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0AB90BE0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x02B94000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D88F000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F88000C };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09888000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x018A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8A2800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D8A8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x238B52A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x258B6A95 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x278B0029 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x178C2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x198C1500 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B8C0014 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x078C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098C4000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x098DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F8DA000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x038D8000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x058D2000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F85AA80 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2185AAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x2385002A };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x01834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x19835400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B830155 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x03834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x05834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x07834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x09834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D834000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0184C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0784C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0984C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0B84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0D84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0F84C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0384C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x0584C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1180C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1780C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1980C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1B80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1D80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1F80C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1380C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x1580C000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4D800444 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x4F804000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x43801080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x51800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x45800084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x53800044 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x47801080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x21800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x31800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x3F800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009888, .val = 0x41800840 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x30800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x00006000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000F3FF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x00001800 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FCFF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x00000600 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FF3F };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x00000180 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FFCF };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x00000060 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FFF3 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x00000018 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FFFC };

      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E458, .val = 0x00005004 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E558, .val = 0x00010003 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E658, .val = 0x00012011 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E758, .val = 0x00015014 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E45C, .val = 0x00051050 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E55C, .val = 0x00053052 };
      query->flex_regs[query->n_flex_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000E65C, .val = 0x00055054 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__rasterizer1_input_available__read;
         counter->name = "Slice1 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice1 rasterizer input is available";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 16;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 20;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 24;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
 