untrusted comment: verify with openbsd-70-base.pub RWR3KL+gSr4QZzqmC8EqtqwQ9tL/A1wUClUK2zUvYksjbzy9TfzOM/EhopqFxlIO+RbOod07faFLShQqv6Epn9YmzUsqnuj5QQA= OpenBSD 7.0 errata 012, February 2, 2022: Userspace controlled code on GPU can access kernel memory on Intel gen 8 and later GPUs. Apply by doing: signify -Vep /etc/signify/openbsd-70-base.pub -x 012_gpuflush.patch.sig \ -m - | (cd /usr/src && patch -p0) And then rebuild and install a new kernel: KK=`sysctl -n kern.osversion | cut -d# -f1` cd /usr/src/sys/arch/`machine`/compile/$KK make obj make config make make install Index: sys/dev/pci/drm/i915/i915_reg.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_reg.h,v retrieving revision 1.24 diff -u -p -r1.24 i915_reg.h --- sys/dev/pci/drm/i915/i915_reg.h 12 Aug 2021 13:06:13 -0000 1.24 +++ sys/dev/pci/drm/i915/i915_reg.h 30 Jan 2022 00:43:46 -0000 @@ -2639,6 +2639,12 @@ static inline bool i915_mmio_reg_valid(i #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2727,6 +2733,11 @@ static inline bool i915_mmio_reg_valid(i #define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) + +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) Index: sys/dev/pci/drm/i915/i915_vma.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_vma.c,v retrieving revision 1.6 diff -u -p -r1.6 i915_vma.c --- sys/dev/pci/drm/i915/i915_vma.c 7 Jul 2021 02:38:33 -0000 1.6 +++ sys/dev/pci/drm/i915/i915_vma.c 30 Jan 2022 00:43:46 -0000 @@ -455,6 +455,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } Index: sys/dev/pci/drm/i915/intel_uncore.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_uncore.c,v retrieving revision 1.9 diff -u -p -r1.9 intel_uncore.c --- sys/dev/pci/drm/i915/intel_uncore.c 7 Jul 2021 02:38:34 -0000 1.9 +++ sys/dev/pci/drm/i915/intel_uncore.c 30 Jan 2022 00:43:46 -0000 @@ -727,7 +727,8 @@ void intel_uncore_forcewake_get__locked( } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -742,7 +743,11 @@ static void __intel_uncore_forcewake_put continue; } - uncore->funcs.force_wake_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + uncore->funcs.force_wake_put(uncore, domain->mask); } } @@ -763,7 +768,20 @@ void intel_uncore_forcewake_put(struct i return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->funcs.force_wake_put) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -805,7 +823,7 @@ void intel_uncore_forcewake_put__locked( if (!uncore->funcs.force_wake_put) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) Index: sys/dev/pci/drm/i915/intel_uncore.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_uncore.h,v retrieving revision 1.3 diff -u -p -r1.3 intel_uncore.h --- sys/dev/pci/drm/i915/intel_uncore.h 7 Jul 2021 02:38:34 -0000 1.3 +++ sys/dev/pci/drm/i915/intel_uncore.h 30 Jan 2022 00:43:46 -0000 @@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct i enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); Index: sys/dev/pci/drm/i915/gem/i915_gem_object_types.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h,v retrieving revision 1.2 diff -u -p -r1.2 i915_gem_object_types.h --- sys/dev/pci/drm/i915/gem/i915_gem_object_types.h 7 Jul 2021 02:38:34 -0000 1.2 +++ sys/dev/pci/drm/i915/gem/i915_gem_object_types.h 30 Jan 2022 00:43:46 -0000 @@ -159,6 +159,7 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) #define I915_BO_READONLY BIT(2) +#define I915_BO_WAS_BOUND_BIT 3 /* * Is the object to be mapped as read-only to the GPU Index: sys/dev/pci/drm/i915/gem/i915_gem_pages.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_pages.c,v retrieving revision 1.3 diff -u -p -r1.3 i915_gem_pages.c --- sys/dev/pci/drm/i915/gem/i915_gem_pages.c 7 Jul 2021 02:38:34 -0000 1.3 +++ sys/dev/pci/drm/i915/gem/i915_gem_pages.c 30 Jan 2022 00:43:47 -0000 @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -185,6 +187,14 @@ __i915_gem_object_unset_pages(struct drm __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } return pages; } Index: sys/dev/pci/drm/i915/gt/intel_gt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt.c,v retrieving revision 1.3 diff -u -p -r1.3 intel_gt.c --- sys/dev/pci/drm/i915/gt/intel_gt.c 7 Jul 2021 02:38:35 -0000 1.3 +++ sys/dev/pci/drm/i915/gt/intel_gt.c 30 Jan 2022 00:43:47 -0000 @@ -26,6 +26,8 @@ void intel_gt_init_early(struct intel_gt mtx_init(>->irq_lock, IPL_TTY); + rw_init(>->tlb_invalidate_lock, "itlbinv"); + INIT_LIST_HEAD(>->closed_vma); mtx_init(>->closed_lock, IPL_TTY); @@ -664,4 +666,103 @@ void intel_gt_info_print(const struct in drm_printf(p, "available engines: %x\n", info->engine_mask); intel_sseu_dump(&info->sseu, p); +} + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); } Index: sys/dev/pci/drm/i915/gt/intel_gt.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt.h,v retrieving revision 1.2 diff -u -p -r1.2 intel_gt.h --- sys/dev/pci/drm/i915/gt/intel_gt.h 7 Jul 2021 02:38:35 -0000 1.2 +++ sys/dev/pci/drm/i915/gt/intel_gt.h 30 Jan 2022 00:43:47 -0000 @@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(co void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ Index: sys/dev/pci/drm/i915/gt/intel_gt_types.h =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_gt_types.h,v retrieving revision 1.2 diff -u -p -r1.2 intel_gt_types.h --- sys/dev/pci/drm/i915/gt/intel_gt_types.h 7 Jul 2021 02:38:35 -0000 1.2 +++ sys/dev/pci/drm/i915/gt/intel_gt_types.h 30 Jan 2022 00:43:47 -0000 @@ -36,6 +36,8 @@ struct intel_gt { struct intel_uc uc; + struct rwlock tlb_invalidate_lock; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list;