/* Autogenerated file, DO NOT EDIT manually! generated by brw_oa.py
 *
 * Copyright (c) 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <stdbool.h>

#include "util/hash_table.h"

#include "brw_oa_hsw.h"
#include "brw_context.h"
#include "brw_performance_query.h"


#define MIN(a, b) ((a < b) ? (a) : (b))
#define MAX(a, b) ((a > b) ? (a) : (b))


/* Render Metrics Basic Gen7.5 :: GPU Core Clocks */
static uint64_t
hsw__render_basic__gpu_core_clocks__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: EU Active */
static float
hsw__render_basic__eu_active__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: TES EU Stall */
static float
hsw__render_basic__ds_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: Alpha Test Fails */
static uint64_t
hsw__render_basic__alpha_test_fails__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 37];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: Sampler 1 Bottleneck */
static float
hsw__render_basic__sampler1_bottleneck__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: TES Threads Dispatched */
static uint64_t
hsw__render_basic__ds_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 15];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: TES AVG Active per Thread */
static uint64_t
hsw__render_basic__ds_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: GS Threads Dispatched */
static uint64_t
hsw__render_basic__gs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 25];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: GS EU Stall */
static float
hsw__render_basic__gs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: CS EU Active */
static float
hsw__render_basic__cs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: VS EU Active */
static float
hsw__render_basic__vs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: TCS EU Active */
static float
hsw__render_basic__hs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: TES EU Active */
static float
hsw__render_basic__ds_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: GS EU Active */
static float
hsw__render_basic__gs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: FS EU Active */
static float
hsw__render_basic__ps_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: CS EU Stall */
static float
hsw__render_basic__cs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: EU Stall */
static float
hsw__render_basic__eu_stall__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: VS EU Stall */
static float
hsw__render_basic__vs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: TCS EU Stall */
static float
hsw__render_basic__hs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: FS EU Stall */
static float
hsw__render_basic__ps_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic Gen7.5 :: GPU Time Elapsed */
static uint64_t
hsw__render_basic__gpu_time__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: CS Duration */
static uint64_t
hsw__render_basic__cs_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 18];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: VS Threads Dispatched */
static uint64_t
hsw__render_basic__vs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: FS Threads Dispatched */
static uint64_t
hsw__render_basic__ps_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 30];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: Sampler 0 Busy */
static float
hsw__render_basic__sampler0_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: Sampler 1 Busy */
static float
hsw__render_basic__sampler1_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: Samplers Busy */
static float
hsw__render_basic__samplers_busy__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   double tmp6 = tmp5;
   double tmp7 = 2;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 * 100;

   return tmp9;
}

/* Render Metrics Basic Gen7.5 :: TES Duration */
static uint64_t
hsw__render_basic__ds_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 13];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: GTI Fixed Pipe Throughput */
static uint64_t
hsw__render_basic__gti_vf_throughput__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 1 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: GTI Read Throughput */
static uint64_t
hsw__render_basic__gti_read_throughput__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 128 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 128;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: CS Threads Dispatched */
static uint64_t
hsw__render_basic__cs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: CS AVG Active per Thread */
static uint64_t
hsw__render_basic__cs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Sampler 0 Bottleneck */
static float
hsw__render_basic__sampler0_bottleneck__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: GS AVG Stall per Thread */
static uint64_t
hsw__render_basic__gs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Sampler 0 Texels LOD0 */
static uint64_t
hsw__render_basic__sampler0_texels__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: B 4 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: Sampler 1 Texels LOD0 */
static uint64_t
hsw__render_basic__sampler1_texels__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: B 5 READ 4 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: Sampler Texels LOD0 */
static uint64_t
hsw__render_basic__sampler_texels__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: $Sampler0Texels $Sampler1Texels UADD $EuSlicesTotalCount UMUL */
   uint64_t tmp0 = hsw__render_basic__sampler0_texels__read(brw, query, accumulator) + hsw__render_basic__sampler1_texels__read(brw, query, accumulator);
   uint64_t tmp1 = tmp0 * brw->perfquery.sys_vars.n_eu_slices;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: GS Duration */
static uint64_t
hsw__render_basic__gs_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 23];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__render_basic__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__render_basic__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Render Metrics Basic Gen7.5 :: EU Idle */
static float
hsw__render_basic__eu_idle__read(struct brw_context *brw,
                                 const struct brw_perf_query_info *query,
                                 uint64_t *accumulator)
{
   /* RPN equation: 100 $EuActive $EuStall FADD FSUB */
   double tmp0 = hsw__render_basic__eu_active__read(brw, query, accumulator) + hsw__render_basic__eu_stall__read(brw, query, accumulator);
   double tmp1 = 100 - tmp0;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: GTI Depth Throughput */
static uint64_t
hsw__render_basic__gti_depth_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 0 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: GTI Write Throughput */
static uint64_t
hsw__render_basic__gti_write_throughput__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: C 7 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: FS AVG Stall per Thread */
static uint64_t
hsw__render_basic__ps_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: GTI L3 Throughput */
static uint64_t
hsw__render_basic__gti_l3_throughput__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 4 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: VS AVG Stall per Thread */
static uint64_t
hsw__render_basic__vs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Samples Blended */
static uint64_t
hsw__render_basic__samples_blended__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 5 READ $EuSlicesTotalCount 4 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = brw->perfquery.sys_vars.n_eu_slices * 4;
   uint64_t tmp2 = tmp0 * tmp1;

   return tmp2;
}

/* Render Metrics Basic Gen7.5 :: GPU Busy */
static float
hsw__render_basic__gpu_busy__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic Gen7.5 :: FS AVG Active per Thread */
static uint64_t
hsw__render_basic__ps_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Early Depth Test Fails */
static uint64_t
hsw__render_basic__early_depth_test_fails__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: TCS Duration */
static uint64_t
hsw__render_basic__hs_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 8];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: TES AVG Stall per Thread */
static uint64_t
hsw__render_basic__ds_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: GS AVG Active per Thread */
static uint64_t
hsw__render_basic__gs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: TCS Threads Dispatched */
static uint64_t
hsw__render_basic__hs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 10];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: TCS AVG Stall per Thread */
static uint64_t
hsw__render_basic__hs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Samples Killed in FS */
static uint64_t
hsw__render_basic__samples_killed_in_ps__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 36];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: Late Depth Test Fails */
static uint64_t
hsw__render_basic__post_ps_depth_test_fails__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__render_basic__samples_killed_in_ps__read(brw, query, accumulator);

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: Sampler Bottleneck */
static float
hsw__render_basic__sampler_bottleneck__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: $Sampler0Bottleneck $Sampler1Bottleneck FMAX */
   double tmp0 = hsw__render_basic__sampler0_bottleneck__read(brw, query, accumulator);
   double tmp1 = hsw__render_basic__sampler1_bottleneck__read(brw, query, accumulator);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Render Metrics Basic Gen7.5 :: TCS AVG Active per Thread */
static uint64_t
hsw__render_basic__hs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: FS Duration */
static uint64_t
hsw__render_basic__ps_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 28];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: Early Hi-Depth Test Fails */
static uint64_t
hsw__render_basic__hi_depth_test_fails__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 33];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: CS AVG Stall per Thread */
static uint64_t
hsw__render_basic__cs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: Late Stencil Test Fails */
static uint64_t
hsw__render_basic__post_ps_stencil_test_fails__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 38];

   return tmp0;
}

/* Render Metrics Basic Gen7.5 :: GTI RCC Throughput */
static uint64_t
hsw__render_basic__gti_rcc_throughput__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 3 READ 64 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic Gen7.5 :: L3 Sampler Throughput */
static uint64_t
hsw__render_basic__l3_sampler_throughput__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 6 READ B 7 READ UADD 2 UMUL $EuSlicesTotalCount UMUL 64 UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = tmp3 * brw->perfquery.sys_vars.n_eu_slices;
   uint64_t tmp5 = tmp4 * 64;

   return tmp5;
}

/* Render Metrics Basic Gen7.5 :: VS AVG Active per Thread */
static uint64_t
hsw__render_basic__vs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic Gen7.5 :: VS Duration */
static uint64_t
hsw__render_basic__vs_duration__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 3];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic Gen7.5 :: Samples Written */
static uint64_t
hsw__render_basic__samples_written__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 40];

   return tmp0;
}

static struct brw_perf_query_register_prog hsw_render_basic_mux_regs[60];
static struct brw_perf_query_register_prog hsw_render_basic_b_counter_regs[4];

static struct brw_perf_query_counter hsw_render_basic_query_counters[67];
static struct brw_perf_query_info hsw_render_basic_query = {
   .kind = OA_COUNTERS,
   .name = "Render Metrics Basic Gen7.5",
   .guid = "403d8832-1a27-4aa6-a64e-f5389ce7b212",
   .counters = hsw_render_basic_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_render_basic_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_render_basic_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_render_basic_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_render_basic_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000253A4, .val = 0x01600000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025440, .val = 0x00100000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025128, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002691C, .val = 0x00000800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026AA0, .val = 0x01500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B9C, .val = 0x00006000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002791C, .val = 0x00000800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027AA0, .val = 0x01500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B9C, .val = 0x00006000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002641C, .val = 0x00000400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025380, .val = 0x00000010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002538C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0x0800AAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025400, .val = 0x00000004 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002540C, .val = 0x06029000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025410, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0x5C30FFFF };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025100, .val = 0x00000016 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025110, .val = 0x00000400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025104, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026804, .val = 0x00001211 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026884, .val = 0x00000100 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026900, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026908, .val = 0x00700000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026904, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026984, .val = 0x00001022 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026A04, .val = 0x00000011 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026A80, .val = 0x00000006 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026A88, .val = 0x00000C02 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026A84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B04, .val = 0x00001000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B80, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B8C, .val = 0x00000007 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027804, .val = 0x00004844 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027884, .val = 0x00000400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027900, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027908, .val = 0x0E000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027904, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027984, .val = 0x00004088 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027A04, .val = 0x00000044 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027A80, .val = 0x00000006 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027A88, .val = 0x00018040 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027A84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B04, .val = 0x00004000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B80, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B8C, .val = 0x000000E0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026104, .val = 0x00002222 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026184, .val = 0x0C006666 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026284, .val = 0x04000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026304, .val = 0x04000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026400, .val = 0x00000002 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026410, .val = 0x000000A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026404, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025420, .val = 0x04108020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x1284A420 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x00042049 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which sampler 1 was bottlenecks.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 24;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 60;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 64;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 68;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 72;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 76;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 80;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 84;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 88;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 92;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 96;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 100;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_duration__read;
      counter->name = "CS Duration";
      counter->desc = "Total Compute Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which sampler 0 was busy.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 136;
         counter->size = sizeof(float);
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which sampler 1 was busy.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 140;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers were busy.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 144;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_duration__read;
      counter->name = "TES Duration";
      counter->desc = "Total Evaluation Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which sampler 0 was bottlenecks.";
         counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
         counter->raw_max = 100;
         counter->offset = 192;
         counter->size = sizeof(float);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__sampler0_texels__read;
         counter->name = "Sampler 0 Texels LOD0";
         counter->desc = "The total number of texels lookups in LOD0 in sampler 0 unit.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 208;
         counter->size = sizeof(uint64_t);
      }

      if (brw->perfquery.sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__sampler1_texels__read;
         counter->name = "Sampler 1 Texels LOD0";
         counter->desc = "The total number of texels lookups in LOD0 in sampler 1 unit.";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 216;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels LOD0";
      counter->desc = "The total number of texels lookups in LOD0 in all sampler units.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_duration__read;
      counter->name = "GS Duration";
      counter->desc = "Total Geometry Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = hsw__render_basic__avg_gpu_core_frequency__max(brw);
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_idle__read;
      counter->name = "EU Idle";
      counter->desc = "The percentage of time in which the Execution Units were idle.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 248;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 304;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_duration__read;
      counter->name = "TCS Duration";
      counter->desc = "Total Control Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 352;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 360;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 376;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__sampler_bottleneck__read;
      counter->name = "Sampler Bottleneck";
      counter->desc = "The percentage of time in which samplers were bottlenecks.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 384;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 392;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_duration__read;
      counter->name = "FS Duration";
      counter->desc = "Total Fragment Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 400;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 408;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 416;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 424;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 432;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 440;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 448;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_duration__read;
      counter->name = "VS Duration";
      counter->desc = "Total Vertex Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 456;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 464;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Compute Metrics Basic Gen7.5 :: GPU Core Clocks */
static uint64_t
hsw__compute_basic__gpu_core_clocks__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: EU Active */
static float
hsw__compute_basic__eu_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: TES EU Stall */
static float
hsw__compute_basic__ds_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: Typed Bytes Written */
static uint64_t
hsw__compute_basic__typed_bytes_written__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 6];
   uint64_t tmp1 = accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: Alpha Test Fails */
static uint64_t
hsw__compute_basic__alpha_test_fails__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 37];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: TCS Threads Dispatched */
static uint64_t
hsw__compute_basic__hs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 10];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: TCS AVG Active per Thread */
static uint64_t
hsw__compute_basic__hs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: GS Threads Dispatched */
static uint64_t
hsw__compute_basic__gs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 25];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: GS EU Stall */
static float
hsw__compute_basic__gs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: VS Threads Dispatched */
static uint64_t
hsw__compute_basic__vs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: FS Threads Dispatched */
static uint64_t
hsw__compute_basic__ps_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 30];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: CS Threads Dispatched */
static uint64_t
hsw__compute_basic__cs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: CS AVG Active per Thread */
static uint64_t
hsw__compute_basic__cs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: Untyped Bytes Read */
static uint64_t
hsw__compute_basic__untyped_bytes_read__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 0];
   uint64_t tmp1 = accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: GS AVG Stall per Thread */
static uint64_t
hsw__compute_basic__gs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: Typed Atomics */
static uint64_t
hsw__compute_basic__typed_atomics__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: C 0 READ C 1 READ UADD $EuSlicesTotalCount UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 0];
   uint64_t tmp1 = accumulator[query->c_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * brw->perfquery.sys_vars.n_eu_slices;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: GPU Time Elapsed */
static uint64_t
hsw__compute_basic__gpu_time__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__compute_basic__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__compute_basic__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Compute Metrics Basic Gen7.5 :: SLM Bytes Read */
static uint64_t
hsw__compute_basic__slm_bytes_read__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 6 READ C 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = accumulator[query->c_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: FS AVG Stall per Thread */
static uint64_t
hsw__compute_basic__ps_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: TES EU Active */
static float
hsw__compute_basic__ds_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: VS AVG Stall per Thread */
static uint64_t
hsw__compute_basic__vs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: TCS EU Stall */
static float
hsw__compute_basic__hs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: GPU Busy */
static float
hsw__compute_basic__gpu_busy__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: FS AVG Active per Thread */
static uint64_t
hsw__compute_basic__ps_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: Early Depth Test Fails */
static uint64_t
hsw__compute_basic__early_depth_test_fails__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: TES Threads Dispatched */
static uint64_t
hsw__compute_basic__ds_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 15];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: TES AVG Active per Thread */
static uint64_t
hsw__compute_basic__ds_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: GS EU Active */
static float
hsw__compute_basic__gs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: FS EU Active */
static float
hsw__compute_basic__ps_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: TES AVG Stall per Thread */
static uint64_t
hsw__compute_basic__ds_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: GS AVG Active per Thread */
static uint64_t
hsw__compute_basic__gs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: Untyped Writes */
static uint64_t
hsw__compute_basic__untyped_bytes_written__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 2 READ B 3 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 2];
   uint64_t tmp1 = accumulator[query->b_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: SLM Bytes Written */
static uint64_t
hsw__compute_basic__slm_bytes_written__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 4 READ C 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 4];
   uint64_t tmp1 = accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: TCS AVG Stall per Thread */
static uint64_t
hsw__compute_basic__hs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: TCS EU Active */
static float
hsw__compute_basic__hs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: Samples Killed in FS */
static uint64_t
hsw__compute_basic__samples_killed_in_ps__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 36];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: Late Depth Test Fails */
static uint64_t
hsw__compute_basic__post_ps_depth_test_fails__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__compute_basic__samples_killed_in_ps__read(brw, query, accumulator);

   return tmp1;
}

/* Compute Metrics Basic Gen7.5 :: FS EU Stall */
static float
hsw__compute_basic__ps_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: EU Stall */
static float
hsw__compute_basic__eu_stall__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: Early Hi-Depth Test Fails */
static uint64_t
hsw__compute_basic__hi_depth_test_fails__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 33];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: VS EU Active */
static float
hsw__compute_basic__vs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: CS EU Active */
static float
hsw__compute_basic__cs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: CS AVG Stall per Thread */
static uint64_t
hsw__compute_basic__cs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: Late Stencil Test Fails */
static uint64_t
hsw__compute_basic__post_ps_stencil_test_fails__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 38];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: VS AVG Active per Thread */
static uint64_t
hsw__compute_basic__vs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__compute_basic__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Basic Gen7.5 :: CS EU Stall */
static float
hsw__compute_basic__cs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics Basic Gen7.5 :: Typed Bytes Read */
static uint64_t
hsw__compute_basic__typed_bytes_read__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: B 4 READ B 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = accumulator[query->b_offset + 4];
   uint64_t tmp1 = accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic Gen7.5 :: Samples Written */
static uint64_t
hsw__compute_basic__samples_written__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 40];

   return tmp0;
}

/* Compute Metrics Basic Gen7.5 :: VS EU Stall */
static uint64_t
hsw__compute_basic__vs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__compute_basic__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog hsw_compute_basic_mux_regs[33];
static struct brw_perf_query_register_prog hsw_compute_basic_b_counter_regs[17];

static struct brw_perf_query_counter hsw_compute_basic_query_counters[50];
static struct brw_perf_query_info hsw_compute_basic_query = {
   .kind = OA_COUNTERS,
   .name = "Compute Metrics Basic Gen7.5",
   .guid = "39ad14bc-2380-45c4-91eb-fbcb3aa7ae7b",
   .counters = hsw_compute_basic_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_compute_basic_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_compute_basic_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_compute_basic_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_compute_basic_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000253A4, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002681C, .val = 0x01F00800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026820, .val = 0x00001000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002781C, .val = 0x01F00800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026520, .val = 0x00000007 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000265A0, .val = 0x00000007 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025380, .val = 0x00000010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002538C, .val = 0x00300000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0xAA8AAAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0xFFFFFFFF };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026800, .val = 0x00004202 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026808, .val = 0x00605817 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002680C, .val = 0x10001005 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026804, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027800, .val = 0x00000102 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027808, .val = 0x0C0701E0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002780C, .val = 0x000200A0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027804, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026484, .val = 0x44000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026704, .val = 0x44000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026500, .val = 0x00000006 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026510, .val = 0x00000001 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026504, .val = 0x88000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026580, .val = 0x00000006 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026590, .val = 0x00000020 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026584, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026104, .val = 0x55822222 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026184, .val = 0xAA866666 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025420, .val = 0x08320C83 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x06820C83 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x00000C03 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002718, .val = 0xAAAAAAAA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000271C, .val = 0xAAAAAAAA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002728, .val = 0xAAAAAAAA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000272C, .val = 0xAAAAAAAA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002750, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002754, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002758, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000275C, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000236C, .val = 0x00000000 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_atomics__read;
      counter->name = "Typed Atomics";
      counter->desc = "The total number of typed atomics.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = hsw__compute_basic__avg_gpu_core_frequency__max(brw);
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 172;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 208;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 212;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of byten written into shared local memory.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 256;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 280;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 284;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 296;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 300;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 304;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 328;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port.";
      counter->type = GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* unsupported (varies over time) */
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 352;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Compute Metrics Extended Gen7.5 :: EuUntypedWrites0 */
static uint64_t
hsw__compute_extended__eu_untyped_writes0__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 1 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: Untyped Writes 0 */
static uint64_t
hsw__compute_extended__untyped_writes0__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: C 1 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: UntypedWritesPerCacheLine */
static float
hsw__compute_extended__untyped_writes_per_cache_line__read(struct brw_context *brw,
                                                           const struct brw_perf_query_info *query,
                                                           uint64_t *accumulator)
{
   /* RPN equation: $EuUntypedWrites0 $UntypedWrites0 FDIV */
   double tmp0 = hsw__compute_extended__eu_untyped_writes0__read(brw, query, accumulator);
   double tmp1 = hsw__compute_extended__untyped_writes0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen7.5 :: Typed Reads 0 */
static uint64_t
hsw__compute_extended__typed_reads0__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 2];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: Typed Writes 0 */
static uint64_t
hsw__compute_extended__typed_writes0__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 0 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: EuTypedAtomics0 */
static uint64_t
hsw__compute_extended__eu_typed_atomics0__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: Typed Atomics 0 */
static uint64_t
hsw__compute_extended__typed_atomics0__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: TypedAtomicsPerCacheLine */
static float
hsw__compute_extended__typed_atomics_per_cache_line__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: $EuTypedAtomics0 $TypedAtomics0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_atomics0__read(brw, query, accumulator);
   double tmp1 = hsw__compute_extended__typed_atomics0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen7.5 :: EuUntypedReads0 */
static uint64_t
hsw__compute_extended__eu_untyped_reads0__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: EuUntypedAtomics0 */
static uint64_t
hsw__compute_extended__eu_untyped_atomics0__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: GPU Core Clocks */
static uint64_t
hsw__compute_extended__gpu_clocks__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: CS Threads Dispatched */
static uint64_t
hsw__compute_extended__cs_threads__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: EuTypedWrites0 */
static uint64_t
hsw__compute_extended__eu_typed_writes0__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: TypedWritesPerCacheLine */
static float
hsw__compute_extended__typed_writes_per_cache_line__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: $EuTypedWrites0 $TypedWrites0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_writes0__read(brw, query, accumulator);
   double tmp1 = hsw__compute_extended__typed_writes0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen7.5 :: EuTypedReads0 */
static uint64_t
hsw__compute_extended__eu_typed_reads0__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: EuUrbAtomics0 */
static uint64_t
hsw__compute_extended__eu_urb_atomics0__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: Untyped Reads 0 */
static uint64_t
hsw__compute_extended__untyped_reads0__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 3 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended Gen7.5 :: UntypedReadsPerCacheLine */
static float
hsw__compute_extended__untyped_reads_per_cache_line__read(struct brw_context *brw,
                                                          const struct brw_perf_query_info *query,
                                                          uint64_t *accumulator)
{
   /* RPN equation: $EuUntypedReads0 $UntypedReads0 FDIV */
   double tmp0 = hsw__compute_extended__eu_untyped_reads0__read(brw, query, accumulator);
   double tmp1 = hsw__compute_extended__untyped_reads0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended Gen7.5 :: GPU Time Elapsed */
static uint64_t
hsw__compute_extended__gpu_time__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Compute Metrics Extended Gen7.5 :: TypedReadsPerCacheLine */
static float
hsw__compute_extended__typed_reads_per_cache_line__read(struct brw_context *brw,
                                                        const struct brw_perf_query_info *query,
                                                        uint64_t *accumulator)
{
   /* RPN equation: $EuTypedReads0 $TypedReads0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_reads0__read(brw, query, accumulator);
   double tmp1 = hsw__compute_extended__typed_reads0__read(brw, query, accumulator);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

static struct brw_perf_query_register_prog hsw_compute_extended_mux_regs[16];
static struct brw_perf_query_register_prog hsw_compute_extended_b_counter_regs[20];

static struct brw_perf_query_counter hsw_compute_extended_query_counters[20];
static struct brw_perf_query_info hsw_compute_extended_query = {
   .kind = OA_COUNTERS,
   .name = "Compute Metrics Extended Gen7.5",
   .guid = "3865be28-6982-49fe-9494-e4d1b4795413",
   .counters = hsw_compute_extended_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_compute_extended_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_compute_extended_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_compute_extended_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_compute_extended_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002681C, .val = 0x3EB00800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026820, .val = 0x00900000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0x02AAAAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0x03FFFFFF };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026800, .val = 0x00142284 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026808, .val = 0x0E629062 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002680C, .val = 0x3F6F55CB };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026810, .val = 0x00000014 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026804, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026104, .val = 0x02AAAAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026184, .val = 0x02AAAAAA };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025420, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x00000000 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007FE2A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007FE6A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007FE92 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007FEA2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007FE32 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007FE9A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007FF23 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FF00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007FFF3 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FFFE };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 8;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__gpu_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_urb_atomics0__read;
      counter->name = "EuUrbAtomics0";
      counter->desc = "The subslice 0 EU URB Atomics subslice 0.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Memory Reads Distribution Gen7.5 :: GPU Core Clocks */
static uint64_t
hsw__memory_reads__gpu_core_clocks__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: C 7 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 7];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: EU Active */
static float
hsw__memory_reads__eu_active__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: TES EU Stall */
static float
hsw__memory_reads__ds_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: Alpha Test Fails */
static uint64_t
hsw__memory_reads__alpha_test_fails__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 37];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: TES Threads Dispatched */
static uint64_t
hsw__memory_reads__ds_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 15];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: TES AVG Active per Thread */
static uint64_t
hsw__memory_reads__ds_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GS Threads Dispatched */
static uint64_t
hsw__memory_reads__gs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 25];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GS EU Stall */
static float
hsw__memory_reads__gs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: VS Threads Dispatched */
static uint64_t
hsw__memory_reads__vs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: LLC GPU Read Accesses */
static uint64_t
hsw__memory_reads__llc_read_accesses__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: C 6 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: FS Threads Dispatched */
static uint64_t
hsw__memory_reads__ps_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 30];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GtiMemoryReads */
static uint64_t
hsw__memory_reads__gti_memory_reads__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: CS Threads Dispatched */
static uint64_t
hsw__memory_reads__cs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: CS AVG Active per Thread */
static uint64_t
hsw__memory_reads__cs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GtiRczMemoryReads */
static uint64_t
hsw__memory_reads__gti_rcz_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GS AVG Stall per Thread */
static uint64_t
hsw__memory_reads__gs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GPU Time Elapsed */
static uint64_t
hsw__memory_reads__gpu_time__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__memory_reads__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__memory_reads__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Memory Reads Distribution Gen7.5 :: GtiRccMemoryReads */
static uint64_t
hsw__memory_reads__gti_rcc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: FS AVG Stall per Thread */
static uint64_t
hsw__memory_reads__ps_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: TES EU Active */
static float
hsw__memory_reads__ds_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: VS AVG Stall per Thread */
static uint64_t
hsw__memory_reads__vs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: TCS EU Stall */
static float
hsw__memory_reads__hs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: GPU Busy */
static float
hsw__memory_reads__gpu_busy__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution Gen7.5 :: FS AVG Active per Thread */
static uint64_t
hsw__memory_reads__ps_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GtiCmdStreamerMemoryReads */
static uint64_t
hsw__memory_reads__gti_cmd_streamer_memory_reads__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: Early Depth Test Fails */
static uint64_t
hsw__memory_reads__early_depth_test_fails__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GS EU Active */
static float
hsw__memory_reads__gs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: FS EU Active */
static float
hsw__memory_reads__ps_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: TES AVG Stall per Thread */
static uint64_t
hsw__memory_reads__ds_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GS AVG Active per Thread */
static uint64_t
hsw__memory_reads__gs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GtiHiDepthMemoryReads */
static uint64_t
hsw__memory_reads__gti_hi_depth_memory_reads__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: TCS Threads Dispatched */
static uint64_t
hsw__memory_reads__hs_threads__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 10];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: TCS AVG Stall per Thread */
static uint64_t
hsw__memory_reads__hs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: TCS EU Active */
static float
hsw__memory_reads__hs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: Samples Killed in FS */
static uint64_t
hsw__memory_reads__samples_killed_in_ps__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 36];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: Late Depth Test Fails */
static uint64_t
hsw__memory_reads__post_ps_depth_test_fails__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__memory_reads__samples_killed_in_ps__read(brw, query, accumulator);

   return tmp1;
}

/* Memory Reads Distribution Gen7.5 :: TCS AVG Active per Thread */
static uint64_t
hsw__memory_reads__hs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: FS EU Stall */
static float
hsw__memory_reads__ps_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: EU Stall */
static float
hsw__memory_reads__eu_stall__read(struct brw_context *brw,
                                  const struct brw_perf_query_info *query,
                                  uint64_t *accumulator)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: GtiMscMemoryReads */
static uint64_t
hsw__memory_reads__gti_msc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: Early Hi-Depth Test Fails */
static uint64_t
hsw__memory_reads__hi_depth_test_fails__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 33];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: VS EU Active */
static float
hsw__memory_reads__vs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: CS EU Active */
static float
hsw__memory_reads__cs_eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: CS AVG Stall per Thread */
static uint64_t
hsw__memory_reads__cs_eu_stall_per_thread__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GtiVfMemoryReads */
static uint64_t
hsw__memory_reads__gti_vf_memory_reads__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 1 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 1];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: Late Stencil Test Fails */
static uint64_t
hsw__memory_reads__post_ps_stencil_test_fails__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 38];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GtiStcMemoryReads */
static uint64_t
hsw__memory_reads__gti_stc_memory_reads__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: VS AVG Active per Thread */
static uint64_t
hsw__memory_reads__vs_eu_active_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution Gen7.5 :: GtiRsMemoryReads */
static uint64_t
hsw__memory_reads__gti_rs_memory_reads__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: CS EU Stall */
static float
hsw__memory_reads__cs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution Gen7.5 :: Samples Written */
static uint64_t
hsw__memory_reads__samples_written__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 40];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: GtiL3Reads */
static uint64_t
hsw__memory_reads__gti_l3_reads__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Memory Reads Distribution Gen7.5 :: VS EU Stall */
static float
hsw__memory_reads__vs_eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog hsw_memory_reads_mux_regs[19];
static struct brw_perf_query_register_prog hsw_memory_reads_b_counter_regs[28];

static struct brw_perf_query_counter hsw_memory_reads_query_counters[54];
static struct brw_perf_query_info hsw_memory_reads_query = {
   .kind = OA_COUNTERS,
   .name = "Memory Reads Distribution Gen7.5",
   .guid = "bb5ed49b-2497-4095-94f6-26ba294db88a",
   .counters = hsw_memory_reads_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_memory_reads_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_memory_reads_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_memory_reads_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_memory_reads_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000253A4, .val = 0x34300000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025440, .val = 0x2D800000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025444, .val = 0x00000008 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025128, .val = 0x0E600000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025380, .val = 0x00000450 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025390, .val = 0x00052C43 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025400, .val = 0x00006144 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025408, .val = 0x0A418820 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002540C, .val = 0x000820E6 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0xFF500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025100, .val = 0x000005D6 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002510C, .val = 0x0EF00000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025104, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025420, .val = 0x02108421 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x00008421 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x00000000 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0x76543298 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0x98989898 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x000000E4 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000275C, .val = 0x98A98A98 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002758, .val = 0x88888888 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002754, .val = 0x000C5500 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002750, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007F81A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007F82A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007F872 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007F8BA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007F87A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007F8EA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007F8E2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007F8F2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FC00 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__llc_read_accesses__read;
      counter->name = "LLC GPU Read Accesses";
      counter->desc = "The total number of LLC cache lookups for reads done from the GPU.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads (64B each).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = hsw__memory_reads__avg_gpu_core_frequency__max(brw);
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 152;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 168;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 172;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 204;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_hi_depth_memory_reads__read;
      counter->name = "GtiHiDepthMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 248;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 280;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 284;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 304;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 308;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 352;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 360;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 misses).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 376;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 384;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Memory Writes Distribution Gen7.5 :: GPU Core Clocks */
static uint64_t
hsw__memory_writes__gpu_core_clocks__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 7 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 7];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: EU Active */
static float
hsw__memory_writes__eu_active__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: TES EU Stall */
static float
hsw__memory_writes__ds_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: Alpha Test Fails */
static uint64_t
hsw__memory_writes__alpha_test_fails__read(struct brw_context *brw,
                                           const struct brw_perf_query_info *query,
                                           uint64_t *accumulator)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 37];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: TES Threads Dispatched */
static uint64_t
hsw__memory_writes__ds_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 15];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: TES AVG Active per Thread */
static uint64_t
hsw__memory_writes__ds_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GS Threads Dispatched */
static uint64_t
hsw__memory_writes__gs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 25];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GS EU Stall */
static float
hsw__memory_writes__gs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: VS Threads Dispatched */
static uint64_t
hsw__memory_writes__vs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: FS Threads Dispatched */
static uint64_t
hsw__memory_writes__ps_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 30];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GtiMscMemoryWrites */
static uint64_t
hsw__memory_writes__gti_msc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 4];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: CS Threads Dispatched */
static uint64_t
hsw__memory_writes__cs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: CS AVG Active per Thread */
static uint64_t
hsw__memory_writes__cs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GtiCmdStreamerMemoryWrites */
static uint64_t
hsw__memory_writes__gti_cmd_streamer_memory_writes__read(struct brw_context *brw,
                                                         const struct brw_perf_query_info *query,
                                                         uint64_t *accumulator)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 0];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GS AVG Stall per Thread */
static uint64_t
hsw__memory_writes__gs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GtiL3Writes */
static uint64_t
hsw__memory_writes__gti_l3_writes__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 4];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GtiHizMemoryWrites */
static uint64_t
hsw__memory_writes__gti_hiz_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GPU Time Elapsed */
static uint64_t
hsw__memory_writes__gpu_time__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Writes Distribution Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__memory_writes__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: AVG GPU Core Frequency */
static uint64_t
hsw__memory_writes__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Memory Writes Distribution Gen7.5 :: FS AVG Stall per Thread */
static uint64_t
hsw__memory_writes__ps_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GtiRccMemoryWrites */
static uint64_t
hsw__memory_writes__gti_rcc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 3];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: TES EU Active */
static float
hsw__memory_writes__ds_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: VS AVG Stall per Thread */
static uint64_t
hsw__memory_writes__vs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: TCS EU Stall */
static float
hsw__memory_writes__hs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: GPU Busy */
static float
hsw__memory_writes__gpu_busy__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Writes Distribution Gen7.5 :: FS AVG Active per Thread */
static uint64_t
hsw__memory_writes__ps_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: Early Depth Test Fails */
static uint64_t
hsw__memory_writes__early_depth_test_fails__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: LLC GPU Write Accesses */
static uint64_t
hsw__memory_writes__llc_wr_accesses__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: C 6 READ 2 UMUL */
   uint64_t tmp0 = accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Memory Writes Distribution Gen7.5 :: GS EU Active */
static float
hsw__memory_writes__gs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: FS EU Active */
static float
hsw__memory_writes__ps_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: GtiStcMemoryWrites */
static uint64_t
hsw__memory_writes__gti_stc_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 6];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: TES AVG Stall per Thread */
static uint64_t
hsw__memory_writes__ds_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GS AVG Active per Thread */
static uint64_t
hsw__memory_writes__gs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: TCS Threads Dispatched */
static uint64_t
hsw__memory_writes__hs_threads__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 10];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: TCS AVG Stall per Thread */
static uint64_t
hsw__memory_writes__hs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: TCS EU Active */
static float
hsw__memory_writes__hs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: Samples Killed in FS */
static uint64_t
hsw__memory_writes__samples_killed_in_ps__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 36];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: Late Depth Test Fails */
static uint64_t
hsw__memory_writes__post_ps_depth_test_fails__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__memory_writes__samples_killed_in_ps__read(brw, query, accumulator);

   return tmp1;
}

/* Memory Writes Distribution Gen7.5 :: TCS AVG Active per Thread */
static uint64_t
hsw__memory_writes__hs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: FS EU Stall */
static float
hsw__memory_writes__ps_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: EU Stall */
static float
hsw__memory_writes__eu_stall__read(struct brw_context *brw,
                                   const struct brw_perf_query_info *query,
                                   uint64_t *accumulator)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: Early Hi-Depth Test Fails */
static uint64_t
hsw__memory_writes__hi_depth_test_fails__read(struct brw_context *brw,
                                              const struct brw_perf_query_info *query,
                                              uint64_t *accumulator)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 33];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: VS EU Active */
static float
hsw__memory_writes__vs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: CS EU Active */
static float
hsw__memory_writes__cs_eu_active__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: CS AVG Stall per Thread */
static uint64_t
hsw__memory_writes__cs_eu_stall_per_thread__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: Late Stencil Test Fails */
static uint64_t
hsw__memory_writes__post_ps_stencil_test_fails__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 38];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GtiSoMemoryWrites */
static uint64_t
hsw__memory_writes__gti_so_memory_writes__read(struct brw_context *brw,
                                               const struct brw_perf_query_info *query,
                                               uint64_t *accumulator)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 2];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: VS AVG Active per Thread */
static uint64_t
hsw__memory_writes__vs_eu_active_per_thread__read(struct brw_context *brw,
                                                  const struct brw_perf_query_info *query,
                                                  uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_writes__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Writes Distribution Gen7.5 :: GtiRczMemoryWrites */
static uint64_t
hsw__memory_writes__gti_rcz_memory_writes__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: CS EU Stall */
static float
hsw__memory_writes__cs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Writes Distribution Gen7.5 :: Samples Written */
static uint64_t
hsw__memory_writes__samples_written__read(struct brw_context *brw,
                                          const struct brw_perf_query_info *query,
                                          uint64_t *accumulator)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 40];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: GtiMemoryWrites */
static uint64_t
hsw__memory_writes__gti_memory_writes__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = accumulator[query->c_offset + 5];

   return tmp0;
}

/* Memory Writes Distribution Gen7.5 :: VS EU Stall */
static float
hsw__memory_writes__vs_eu_stall__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_writes__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

static struct brw_perf_query_register_prog hsw_memory_writes_mux_regs[19];
static struct brw_perf_query_register_prog hsw_memory_writes_b_counter_regs[28];

static struct brw_perf_query_counter hsw_memory_writes_query_counters[53];
static struct brw_perf_query_info hsw_memory_writes_query = {
   .kind = OA_COUNTERS,
   .name = "Memory Writes Distribution Gen7.5",
   .guid = "3358d639-9b5f-45ab-976d-9b08cbfc6240",
   .counters = hsw_memory_writes_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_memory_writes_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_memory_writes_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_memory_writes_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_memory_writes_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000253A4, .val = 0x34300000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025440, .val = 0x01500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025444, .val = 0x00000120 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025128, .val = 0x0C200000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025380, .val = 0x00000450 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025390, .val = 0x00052C43 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025400, .val = 0x00007184 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025408, .val = 0x0A418820 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002540C, .val = 0x000820E6 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0xFF500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025100, .val = 0x000005D6 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002510C, .val = 0x1E700000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025104, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025420, .val = 0x02108421 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x00008421 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x00000000 };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0xF0800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000274C, .val = 0x76543298 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002748, .val = 0x98989898 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x000000E4 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000275C, .val = 0xBABABABA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002758, .val = 0x88888888 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002754, .val = 0x000C5500 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002750, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002770, .val = 0x0007F81A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002774, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002778, .val = 0x0007F82A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000277C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002780, .val = 0x0007F822 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002784, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002788, .val = 0x0007F8BA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000278C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002790, .val = 0x0007F87A };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002794, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002798, .val = 0x0007F8EA };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0000279C, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A0, .val = 0x0007F8E2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A4, .val = 0x0000FC00 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027A8, .val = 0x0007F8F2 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000027AC, .val = 0x0000FC00 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 16;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 48;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 56;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 64;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 72;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 80;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 88;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 96;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = hsw__memory_writes__avg_gpu_core_frequency__max(brw);
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 160;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 176;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 180;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 184;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__llc_wr_accesses__read;
      counter->name = "LLC GPU Write Accesses";
      counter->desc = "The total number of LLC cache lookups for write done from the GPU (32B writes).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 200;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 208;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 212;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 224;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 256;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 288;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 292;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 304;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 308;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 320;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 352;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 360;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes (64B each).";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 376;
      counter->size = sizeof(float);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

/* Metric set SamplerBalance :: GPU Core Clocks */
static uint64_t
hsw__sampler_balance__gpu_core_clocks__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = accumulator[query->b_offset + 7];

   return tmp0;
}

/* Metric set SamplerBalance :: EU Active */
static float
hsw__sampler_balance__eu_active__read(struct brw_context *brw,
                                      const struct brw_perf_query_info *query,
                                      uint64_t *accumulator)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TES EU Stall */
static float
hsw__sampler_balance__ds_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss3) */
static uint64_t
hsw__sampler_balance__sampler3_l2_cache_misses__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: C 1 READ C 0 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 1];
   uint64_t tmp1 = accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: Alpha Test Fails */
static uint64_t
hsw__sampler_balance__alpha_test_fails__read(struct brw_context *brw,
                                             const struct brw_perf_query_info *query,
                                             uint64_t *accumulator)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 37];

   return tmp0;
}

/* Metric set SamplerBalance :: TES Threads Dispatched */
static uint64_t
hsw__sampler_balance__ds_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 15];

   return tmp0;
}

/* Metric set SamplerBalance :: TES AVG Active per Thread */
static uint64_t
hsw__sampler_balance__ds_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: GS Threads Dispatched */
static uint64_t
hsw__sampler_balance__gs_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 25];

   return tmp0;
}

/* Metric set SamplerBalance :: GS EU Stall */
static float
hsw__sampler_balance__gs_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: CS EU Active */
static float
hsw__sampler_balance__cs_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: VS EU Active */
static float
hsw__sampler_balance__vs_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TCS EU Active */
static float
hsw__sampler_balance__hs_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TES EU Active */
static float
hsw__sampler_balance__ds_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: GS EU Active */
static float
hsw__sampler_balance__gs_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: FS EU Active */
static float
hsw__sampler_balance__ps_eu_active__read(struct brw_context *brw,
                                         const struct brw_perf_query_info *query,
                                         uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: CS EU Stall */
static float
hsw__sampler_balance__cs_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: EU Stall */
static float
hsw__sampler_balance__eu_stall__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: VS EU Stall */
static float
hsw__sampler_balance__vs_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TCS EU Stall */
static float
hsw__sampler_balance__hs_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: FS EU Stall */
static float
hsw__sampler_balance__ps_eu_stall__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = brw->perfquery.sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: GPU Time Elapsed */
static uint64_t
hsw__sampler_balance__gpu_time__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = brw->perfquery.sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set SamplerBalance :: CS Duration */
static uint64_t
hsw__sampler_balance__cs_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 18];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: VS Threads Dispatched */
static uint64_t
hsw__sampler_balance__vs_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 5];

   return tmp0;
}

/* Metric set SamplerBalance :: FS Threads Dispatched */
static uint64_t
hsw__sampler_balance__ps_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 30];

   return tmp0;
}

/* Metric set SamplerBalance :: TES Duration */
static uint64_t
hsw__sampler_balance__ds_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 12];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 13];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: GS Duration */
static uint64_t
hsw__sampler_balance__gs_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 23];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: CS Threads Dispatched */
static uint64_t
hsw__sampler_balance__cs_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 20];

   return tmp0;
}

/* Metric set SamplerBalance :: CS AVG Active per Thread */
static uint64_t
hsw__sampler_balance__cs_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses */
static uint64_t
hsw__sampler_balance__sampler_l2_cache_misses__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD C 3 READ UADD C 2 READ UADD C 1 READ UADD C 0 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = accumulator[query->c_offset + 5];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = accumulator[query->c_offset + 4];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = accumulator[query->c_offset + 3];
   uint64_t tmp8 = tmp6 + tmp7;
   uint64_t tmp9 = accumulator[query->c_offset + 2];
   uint64_t tmp10 = tmp8 + tmp9;
   uint64_t tmp11 = accumulator[query->c_offset + 1];
   uint64_t tmp12 = tmp10 + tmp11;
   uint64_t tmp13 = accumulator[query->c_offset + 0];
   uint64_t tmp14 = tmp12 + tmp13;

   return tmp14;
}

/* Metric set SamplerBalance :: GS AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__gs_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss2) */
static uint64_t
hsw__sampler_balance__sampler2_l2_cache_misses__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: C 3 READ C 2 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 3];
   uint64_t tmp1 = accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: AVG GPU Core Frequency */
static uint64_t
hsw__sampler_balance__avg_gpu_core_frequency__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: AVG GPU Core Frequency */
static uint64_t
hsw__sampler_balance__avg_gpu_core_frequency__max(struct brw_context *brw)
{
   /* RPN equation: $GpuMaxFrequency */

   return brw->perfquery.sys_vars.gt_max_freq;
}

/* Metric set SamplerBalance :: EU Idle */
static float
hsw__sampler_balance__eu_idle__read(struct brw_context *brw,
                                    const struct brw_perf_query_info *query,
                                    uint64_t *accumulator)
{
   /* RPN equation: 100 $EuActive $EuStall FADD FSUB */
   double tmp0 = hsw__sampler_balance__eu_active__read(brw, query, accumulator) + hsw__sampler_balance__eu_stall__read(brw, query, accumulator);
   double tmp1 = 100 - tmp0;

   return tmp1;
}

/* Metric set SamplerBalance :: FS AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__ps_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: VS AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__vs_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: GPU Busy */
static float
hsw__sampler_balance__gpu_busy__read(struct brw_context *brw,
                                     const struct brw_perf_query_info *query,
                                     uint64_t *accumulator)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set SamplerBalance :: FS AVG Active per Thread */
static uint64_t
hsw__sampler_balance__ps_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__ps_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: Early Depth Test Fails */
static uint64_t
hsw__sampler_balance__early_depth_test_fails__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 35];

   return tmp0;
}

/* Metric set SamplerBalance :: TCS Duration */
static uint64_t
hsw__sampler_balance__hs_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 8];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: TES AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__ds_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__ds_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: GS AVG Active per Thread */
static uint64_t
hsw__sampler_balance__gs_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__gs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: TCS Threads Dispatched */
static uint64_t
hsw__sampler_balance__hs_threads__read(struct brw_context *brw,
                                       const struct brw_perf_query_info *query,
                                       uint64_t *accumulator)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 10];

   return tmp0;
}

/* Metric set SamplerBalance :: TCS AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__hs_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: Samples Killed in FS */
static uint64_t
hsw__sampler_balance__samples_killed_in_ps__read(struct brw_context *brw,
                                                 const struct brw_perf_query_info *query,
                                                 uint64_t *accumulator)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 36];

   return tmp0;
}

/* Metric set SamplerBalance :: Late Depth Test Fails */
static uint64_t
hsw__sampler_balance__post_ps_depth_test_fails__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__sampler_balance__samples_killed_in_ps__read(brw, query, accumulator);

   return tmp1;
}

/* Metric set SamplerBalance :: TCS AVG Active per Thread */
static uint64_t
hsw__sampler_balance__hs_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__hs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: FS Duration */
static uint64_t
hsw__sampler_balance__ps_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 27];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 28];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss1) */
static uint64_t
hsw__sampler_balance__sampler1_l2_cache_misses__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: C 5 READ C 4 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 5];
   uint64_t tmp1 = accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: Early Hi-Depth Test Fails */
static uint64_t
hsw__sampler_balance__hi_depth_test_fails__read(struct brw_context *brw,
                                                const struct brw_perf_query_info *query,
                                                uint64_t *accumulator)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 33];

   return tmp0;
}

/* Metric set SamplerBalance :: CS AVG Stall per Thread */
static uint64_t
hsw__sampler_balance__cs_eu_stall_per_thread__read(struct brw_context *brw,
                                                   const struct brw_perf_query_info *query,
                                                   uint64_t *accumulator)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__cs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: Late Stencil Test Fails */
static uint64_t
hsw__sampler_balance__post_ps_stencil_test_fails__read(struct brw_context *brw,
                                                       const struct brw_perf_query_info *query,
                                                       uint64_t *accumulator)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 38];

   return tmp0;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss0) */
static uint64_t
hsw__sampler_balance__sampler0_l2_cache_misses__read(struct brw_context *brw,
                                                     const struct brw_perf_query_info *query,
                                                     uint64_t *accumulator)
{
   /* RPN equation: C 7 READ C 6 READ UADD */
   uint64_t tmp0 = accumulator[query->c_offset + 7];
   uint64_t tmp1 = accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: VS AVG Active per Thread */
static uint64_t
hsw__sampler_balance__vs_eu_active_per_thread__read(struct brw_context *brw,
                                                    const struct brw_perf_query_info *query,
                                                    uint64_t *accumulator)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__vs_threads__read(brw, query, accumulator);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: VS Duration */
static uint64_t
hsw__sampler_balance__vs_duration__read(struct brw_context *brw,
                                        const struct brw_perf_query_info *query,
                                        uint64_t *accumulator)
{
   /* RPN equation: A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = accumulator[query->a_offset + 2];
   uint64_t tmp1 = accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = accumulator[query->a_offset + 2];
   uint64_t tmp4 = accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = accumulator[query->a_offset + 3];
   uint64_t tmp18 = accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = accumulator[query->a_offset + 3];
   uint64_t tmp21 = accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(brw, query, accumulator);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(brw, query, accumulator) * brw->perfquery.sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: Samples Written */
static uint64_t
hsw__sampler_balance__samples_written__read(struct brw_context *brw,
                                            const struct brw_perf_query_info *query,
                                            uint64_t *accumulator)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = accumulator[query->a_offset + 40];

   return tmp0;
}

static struct brw_perf_query_register_prog hsw_sampler_balance_mux_regs[41];
static struct brw_perf_query_register_prog hsw_sampler_balance_b_counter_regs[6];

static struct brw_perf_query_counter hsw_sampler_balance_query_counters[55];
static struct brw_perf_query_info hsw_sampler_balance_query = {
   .kind = OA_COUNTERS,
   .name = "Metric set SamplerBalance",
   .guid = "bc274488-b4b6-40c7-90da-b77d7ad16189",
   .counters = hsw_sampler_balance_query_counters,
   .n_counters = 0,
   .oa_metrics_set_id = 0, /* determined at runtime, via sysfs */
   .oa_format = I915_OA_FORMAT_A45_B8_C8,

   /* Accumulation buffer offsets... */
   .gpu_time_offset = 0,
   .a_offset = 1,
   .b_offset = 46,
   .c_offset = 54,
   .mux_regs = hsw_sampler_balance_mux_regs,
   .n_mux_regs = 0, /* Determined at runtime */
   .b_counter_regs = hsw_sampler_balance_b_counter_regs,
   .n_b_counter_regs = 0, /* Determined at runtime */
};

static void
register_sampler_balance_counter_query(struct brw_context *brw)
{
   static struct brw_perf_query_info *query = &hsw_sampler_balance_query;
   struct brw_perf_query_counter *counter;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00009840, .val = 0x00000080 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002EB9C, .val = 0x01906400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002FB9C, .val = 0x01906400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x000253A4, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B9C, .val = 0x01906400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B9C, .val = 0x01906400 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027104, .val = 0x00A00000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027184, .val = 0x00A50000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002E804, .val = 0x00500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002E984, .val = 0x00500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002EB04, .val = 0x00500000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002EB80, .val = 0x00000084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002EB8C, .val = 0x14200000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002EB84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002F804, .val = 0x00050000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002F984, .val = 0x00050000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002FB04, .val = 0x00050000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002FB80, .val = 0x00000084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002FB8C, .val = 0x00050800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002FB84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025380, .val = 0x00000010 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002538C, .val = 0x000000C0 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025384, .val = 0xAA550000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025404, .val = 0xFFFFC000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026804, .val = 0x50000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026984, .val = 0x50000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B04, .val = 0x50000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B80, .val = 0x00000084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B90, .val = 0x00050800 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026B84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027804, .val = 0x05000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027984, .val = 0x05000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B04, .val = 0x05000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B80, .val = 0x00000084 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B90, .val = 0x00000142 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00027B84, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026104, .val = 0xA0000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00026184, .val = 0xA5000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025424, .val = 0x00008620 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x0002541C, .val = 0x00000000 };
      query->mux_regs[query->n_mux_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00025428, .val = 0x0004A54A };

      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002740, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002744, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002710, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002714, .val = 0x00800000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002720, .val = 0x00000000 };
      query->b_counter_regs[query->n_b_counter_regs++] = (struct brw_perf_query_register_prog) { .reg = 0x00002724, .val = 0x00800000 };


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 0;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 8;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 12;
      counter->size = sizeof(float);

      if (brw->perfquery.sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler3_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss3)";
         counter->desc = "Number of sampler L2 cache misses (ss3)";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 16;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 24;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 32;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 40;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 48;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 56;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 60;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 64;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 68;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 72;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 76;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 80;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 84;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 88;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 92;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 96;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 100;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 104;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_duration__read;
      counter->name = "CS Duration";
      counter->desc = "Total Compute Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 112;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 120;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 128;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_duration__read;
      counter->name = "TES Duration";
      counter->desc = "Total Evaluation Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 136;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_duration__read;
      counter->name = "GS Duration";
      counter->desc = "Total Geometry Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 144;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 152;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 160;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler_l2_cache_misses__read;
      counter->name = "Sampler L2 cache misses";
      counter->desc = "Number of sampler L2 cache misses";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 168;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 176;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler2_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss2)";
         counter->desc = "Number of sampler L2 cache misses (ss2)";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 184;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = hsw__sampler_balance__avg_gpu_core_frequency__max(brw);
      counter->offset = 192;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_idle__read;
      counter->name = "EU Idle";
      counter->desc = "The percentage of time in which the Execution Units were idle.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 200;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 208;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 216;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
      counter->raw_max = 100;
      counter->offset = 224;
      counter->size = sizeof(float);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 232;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 240;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_duration__read;
      counter->name = "TCS Duration";
      counter->desc = "Total Control Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 248;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 256;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 264;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 272;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 280;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 288;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 296;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 304;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_duration__read;
      counter->name = "FS Duration";
      counter->desc = "Total Fragment Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 312;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler1_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss1)";
         counter->desc = "Number of sampler L2 cache misses (ss1)";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 320;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 328;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 336;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 344;
      counter->size = sizeof(uint64_t);

      if (brw->perfquery.sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler0_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss0)";
         counter->desc = "Number of sampler L2 cache misses (ss0)";
         counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
         counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
         counter->raw_max = 0; /* undefined */
         counter->offset = 352;
         counter->size = sizeof(uint64_t);
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 100;
      counter->offset = 360;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_duration__read;
      counter->name = "VS Duration";
      counter->desc = "Total Vertex Shader GPU duration.";
      counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 368;
      counter->size = sizeof(uint64_t);

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets.";
      counter->type = GL_PERFQUERY_COUNTER_EVENT_INTEL;
      counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
      counter->raw_max = 0; /* undefined */
      counter->offset = 376;
      counter->size = sizeof(uint64_t);

      query->data_size = counter->offset + counter->size;
   }

   _mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);
}

void
brw_oa_register_queries_hsw(struct brw_context *brw)
{
   register_render_basic_counter_query(brw);
   register_compute_basic_counter_query(brw);
   register_compute_extended_counter_query(brw);
   register_memory_reads_counter_query(brw);
   register_memory_writes_counter_query(brw);
   register_sampler_balance_counter_query(brw);
}
