Files
linux/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
Linus Torvalds b08494a8f7 Merge tag 'drm-next-2025-05-28' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie:
 "As part of building up nova-core/nova-drm pieces we've brought in some
  rust abstractions through this tree, aux bus being the main one, with
  devres changes also in the driver-core tree. Along with the drm core
  abstractions and enough nova-core/nova-drm to use them. This is still
  all stub work under construction, to build the nova driver upstream.

  The other big NVIDIA related one is nouveau adds support for
  Hopper/Blackwell GPUs, this required a new GSP firmware update to
  570.144, and a bunch of rework in order to support multiple fw
  interfaces.

  There is also the introduction of an asahi uapi header file as a
  precursor to getting the real driver in later, but to unblock
  userspace mesa packages while the driver is trapped behind rust
  enablement.

  Otherwise it's the usual mixture of stuff all over, amdgpu, i915/xe,
  and msm being the main ones, and some changes to vsprintf.

  new drivers:
   - bring in the asahi uapi header standalone
   - nova-drm: stub driver

  rust dependencies (for nova-core):
   - auxiliary
       - bus abstractions
       - driver registration
       - sample driver
   - devres changes from driver-core
   - revocable changes

  core:
   - add Apple fourcc modifiers
   - add virtio capset definitions
   - extend EXPORT_SYNC_FILE for timeline syncobjs
   - convert to devm_platform_ioremap_resource
   - refactor shmem helper page pinning
   - DP powerup/down link helpers
   - extended %p4cc in vsprintf.c to support fourcc prints
   - change vsprintf %p4cn to %p4chR, remove %p4cn
   - Add drm_file_err function
   - IN_FORMATS_ASYNC property
   - move sitronix from tiny to their own subdir

  rust:
   - add drm core infrastructure rust abstractions
     (device/driver, ioctl, file, gem)

  dma-buf:
   - adjust sg handling to not cache map on attach
   - allow setting dma-device for import
   - Add a helper to sort and deduplicate dma_fence arrays

  docs:
   - updated drm scheduler docs
   - fbdev todo update
   - fb rendering
   - actual brightness

  ttm:
   - fix delayed destroy resv object

  bridge:
   - add kunit tests
   - convert tc358775 to atomic
   - convert drivers to devm_drm_bridge_alloc
   - convert rk3066_hdmi to bridge driver

  scheduler:
   - add kunit tests

  panel:
   - refcount panels to improve lifetime handling
   - Powertip PH128800T004-ZZA01
   - NLT NL13676BC25-03F, Tianma TM070JDHG34-00
   - Himax HX8279/HX8279-D DDIC
   - Visionox G2647FB105
   - Sitronix ST7571
   - ZOTAC rotation quirk

  vkms:
   - allow attaching more displays

  i915:
   - xe3lpd display updates
   - vrr refactor
   - intel_display struct conversions
   - xe2hpd memory type identification
   - add link rate/count to i915_display_info
   - cleanup VGA plane handling
   - refactor HDCP GSC
   - fix SLPC wait boosting reference counting
   - add 20ms delay to engine reset
   - fix fence release on early probe errors

  xe:
   - SRIOV updates
   - BMG PCI ID update
   - support separate firmware for each GT
   - SVM fix, prelim SVM multi-device work
   - export fan speed
   - temp disable d3cold on BMG
   - backup VRAM in PM notifier instead of suspend/freeze
   - update xe_ttm_access_memory to use GPU for non-visible access
   - fix guc_info debugfs for VFs
   - use copy_from_user instead of __copy_from_user
   - append PCIe gen5 limitations to xe_firmware document

  amdgpu:
   - DSC cleanup
   - DC Scaling updates
   - Fused I2C-over-AUX updates
   - DMUB updates
   - Use drm_file_err in amdgpu
   - Enforce isolation updates
   - Use new dma_fence helpers
   - USERQ fixes
   - Documentation updates
   - SR-IOV updates
   - RAS updates
   - PSP 12 cleanups
   - GC 9.5 updates
   - SMU 13.x updates
   - VCN / JPEG SR-IOV updates

  amdkfd:
   - Update error messages for SDMA
   - Userptr updates
   - XNACK fixes

  radeon:
   - CIK doorbell cleanup

  nouveau:
   - add support for NVIDIA r570 GSP firmware
   - enable Hopper/Blackwell support

  nova-core:
   - fix task list
   - register definition infrastructure
   - move firmware into own rust module
   - register auxiliary device for nova-drm

  nova-drm:
   - initial driver skeleton

  msm:
   - GPU:
       - ACD (adaptive clock distribution) for X1-85
       - drop fictional address_space_size
       - improve GMU HFI response time out robustness
       - fix crash when throttling during boot
   - DPU:
       - use single CTL path for flushing on DPU 5.x+
       - improve SSPP allocation code for better sharing
       - Enabled SmartDMA on SM8150, SC8180X, SC8280XP, SM8550
       - Added SAR2130P support
       - Disabled DSC support on MSM8937, MSM8917, MSM8953, SDM660
   - DP:
       - switch to new audio helpers
       - better LTTPR handling
   - DSI:
       - Added support for SA8775P
       - Added SAR2130P support
   - HDMI:
       - Switched to use new helpers for ACR data
       - Fixed old standing issue of HPD not working in some cases

  amdxdna:
   - add dma-buf support
   - allow empty command submits

  renesas:
   - add dma-buf support
   - add zpos, alpha, blend support

  panthor:
   - fail properly for NO_MMAP bos
   - add SET_LABEL ioctl
   - debugfs BO dumping support

  imagination:
   - update DT bindings
   - support TI AM68 GPU

  hibmc:
   - improve interrupt handling and HPD support

  virtio:
   - add panic handler support

  rockchip:
   - add RK3588 support
   - add DP AUX bus panel support

  ivpu:
   - add heartbeat based hangcheck

  mediatek:
   - prepares support for MT8195/99 HDMIv2/DDCv2

  anx7625:
   - improve HPD

  tegra:
   - speed up firmware loading

* tag 'drm-next-2025-05-28' of https://gitlab.freedesktop.org/drm/kernel: (1627 commits)
  drm/nouveau/tegra: Fix error pointer vs NULL return in nvkm_device_tegra_resource_addr()
  drm/xe: Default auto_link_downgrade status to false
  drm/xe/guc: Make creation of SLPC debugfs files conditional
  drm/i915/display: Add check for alloc_ordered_workqueue() and alloc_workqueue()
  drm/i915/dp_mst: Work around Thunderbolt sink disconnect after SINK_COUNT_ESI read
  drm/i915/ptl: Use everywhere the correct DDI port clock select mask
  drm/nouveau/kms: add support for GB20x
  drm/dp: add option to disable zero sized address only transactions.
  drm/nouveau: add support for GB20x
  drm/nouveau/gsp: add hal for fifo.chan.doorbell_handle
  drm/nouveau: add support for GB10x
  drm/nouveau/gf100-: track chan progress with non-WFI semaphore release
  drm/nouveau/nv50-: separate CHANNEL_GPFIFO handling out from CHANNEL_DMA
  drm/nouveau: add helper functions for allocating pinned/cpu-mapped bos
  drm/nouveau: add support for GH100
  drm/nouveau: improve handling of 64-bit BARs
  drm/nouveau/gv100-: switch to volta semaphore methods
  drm/nouveau/gsp: support deeper page tables in COPY_SERVER_RESERVED_PDES
  drm/nouveau/gsp: init client VMMs with NV0080_CTRL_DMA_SET_PAGE_DIRECTORY
  drm/nouveau/gsp: fetch level shift and PDE from BAR2 VMM
  ...
2025-05-28 09:46:39 -07:00

717 lines
19 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2014-2016 Intel Corporation
*/
#include <linux/pagevec.h>
#include <linux/shmem_fs.h>
#include <linux/swap.h>
#include <drm/drm_cache.h>
#include "gem/i915_gem_region.h"
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_gem_tiling.h"
#include "i915_gemfs.h"
#include "i915_scatterlist.h"
#include "i915_trace.h"
/*
* Move folios to appropriate lru and release the batch, decrementing the
* ref count of those folios.
*/
static void check_release_folio_batch(struct folio_batch *fbatch)
{
check_move_unevictable_folios(fbatch);
__folio_batch_release(fbatch);
cond_resched();
}
void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
bool dirty, bool backup)
{
struct sgt_iter sgt_iter;
struct folio_batch fbatch;
struct folio *last = NULL;
struct page *page;
mapping_clear_unevictable(mapping);
folio_batch_init(&fbatch);
for_each_sgt_page(page, sgt_iter, st) {
struct folio *folio = page_folio(page);
if (folio == last)
continue;
last = folio;
if (dirty)
folio_mark_dirty(folio);
if (backup)
folio_mark_accessed(folio);
if (!folio_batch_add(&fbatch, folio))
check_release_folio_batch(&fbatch);
}
if (fbatch.nr)
check_release_folio_batch(&fbatch);
sg_free_table(st);
}
int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
size_t size, struct intel_memory_region *mr,
struct address_space *mapping,
unsigned int max_segment)
{
unsigned int page_count; /* restricted by sg_alloc_table */
unsigned long i;
struct scatterlist *sg;
unsigned long next_pfn = 0; /* suppress gcc warning */
gfp_t noreclaim;
int ret;
if (overflows_type(size / PAGE_SIZE, page_count))
return -E2BIG;
page_count = size / PAGE_SIZE;
/*
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
if (size > resource_size(&mr->region))
return -ENOMEM;
if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN))
return -ENOMEM;
/*
* Get the list of pages out of our struct file. They'll be pinned
* at this point until we release them.
*
* Fail silently without starting the shrinker
*/
mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
struct folio *folio;
unsigned long nr_pages;
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
0,
}, *s = shrink;
gfp_t gfp = noreclaim;
do {
cond_resched();
folio = shmem_read_folio_gfp(mapping, i, gfp);
if (!IS_ERR(folio))
break;
if (!*s) {
ret = PTR_ERR(folio);
goto err_sg;
}
i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++);
/*
* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
* go down in flames if truly OOM.
*
* However, since graphics tend to be disposable,
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
if (!*s) {
/* reclaim and warn, but no oom */
gfp = mapping_gfp_mask(mapping);
/*
* Our bo are always dirty and so we require
* kswapd to reclaim our pages (direct reclaim
* does not effectively begin pageout of our
* buffers on its own). However, direct reclaim
* only waits for kswapd when under allocation
* congestion. So as a result __GFP_RECLAIM is
* unreliable and fails to actually reclaim our
* dirty pages -- unless you try over and over
* again with !__GFP_NORETRY. However, we still
* want to fail this allocation rather than
* trigger the out-of-memory killer and for
* this we want __GFP_RETRY_MAYFAIL.
*/
gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
}
} while (1);
nr_pages = min_t(unsigned long,
folio_nr_pages(folio), page_count - i);
if (!i ||
sg->length >= max_segment ||
folio_pfn(folio) != next_pfn) {
if (i)
sg = sg_next(sg);
st->nents++;
sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
} else {
/* XXX: could overflow? */
sg->length += nr_pages * PAGE_SIZE;
}
next_pfn = folio_pfn(folio) + nr_pages;
i += nr_pages - 1;
/* Check that the i965g/gm workaround works. */
GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
}
if (sg) /* loop terminated early; short sg table */
sg_mark_end(sg);
/* Trim unused sg entries to avoid wasting memory. */
i915_sg_trim(st);
return 0;
err_sg:
sg_mark_end(sg);
if (sg != st->sgl) {
shmem_sg_free_table(st, mapping, false, false);
} else {
mapping_clear_unevictable(mapping);
sg_free_table(st);
}
/*
* shmemfs first checks if there is enough memory to allocate the page
* and reports ENOSPC should there be insufficient, along with the usual
* ENOMEM for a genuine allocation failure.
*
* We use ENOSPC in our driver to mean that we have run out of aperture
* space and so want to translate the error from shmemfs back to our
* usual understanding of ENOMEM.
*/
if (ret == -ENOSPC)
ret = -ENOMEM;
return ret;
}
static int shmem_get_pages(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
int ret;
/*
* Assert that the object is not currently in any GPU domain. As it
* wasn't in the GTT, there shouldn't be any way it could have been in
* a GPU cache
*/
GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
rebuild_st:
st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN);
if (!st)
return -ENOMEM;
ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
max_segment);
if (ret)
goto err_st;
ret = i915_gem_gtt_prepare_pages(obj, st);
if (ret) {
/*
* DMA remapping failed? One possible cause is that
* it could not reserve enough large entries, asking
* for PAGE_SIZE chunks instead may be helpful.
*/
if (max_segment > PAGE_SIZE) {
shmem_sg_free_table(st, mapping, false, false);
kfree(st);
max_segment = PAGE_SIZE;
goto rebuild_st;
} else {
dev_warn(i915->drm.dev,
"Failed to DMA remap %zu pages\n",
obj->base.size >> PAGE_SHIFT);
goto err_pages;
}
}
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
__i915_gem_object_set_pages(obj, st);
return 0;
err_pages:
shmem_sg_free_table(st, mapping, false, false);
/*
* shmemfs first checks if there is enough memory to allocate the page
* and reports ENOSPC should there be insufficient, along with the usual
* ENOMEM for a genuine allocation failure.
*
* We use ENOSPC in our driver to mean that we have run out of aperture
* space and so want to translate the error from shmemfs back to our
* usual understanding of ENOMEM.
*/
err_st:
if (ret == -ENOSPC)
ret = -ENOMEM;
kfree(st);
return ret;
}
static int
shmem_truncate(struct drm_i915_gem_object *obj)
{
/*
* Our goal here is to return as much of the memory as
* is possible back to the system as we are called from OOM.
* To do this we must instruct the shmfs to drop all of its
* backing pages, *now*.
*/
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
obj->mm.madv = __I915_MADV_PURGED;
obj->mm.pages = ERR_PTR(-EFAULT);
return 0;
}
void __shmem_writeback(size_t size, struct address_space *mapping)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_reclaim = 1,
};
struct folio *folio = NULL;
int error = 0;
/*
* Leave mmapings intact (GTT will have been revoked on unbinding,
* leaving only CPU mmapings around) and add those folios to the LRU
* instead of invoking writeback so they are aged and paged out
* as normal.
*/
while ((folio = writeback_iter(mapping, &wbc, folio, &error))) {
if (folio_mapped(folio))
folio_redirty_for_writepage(&wbc, folio);
else
error = shmem_writeout(folio, &wbc);
}
}
static void
shmem_writeback(struct drm_i915_gem_object *obj)
{
__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
}
static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
{
switch (obj->mm.madv) {
case I915_MADV_DONTNEED:
return i915_gem_object_truncate(obj);
case __I915_MADV_PURGED:
return 0;
}
if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
shmem_writeback(obj);
return 0;
}
void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
if (obj->mm.madv == I915_MADV_DONTNEED)
obj->mm.dirty = false;
if (needs_clflush &&
(obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages);
__start_cpu_write(obj);
/*
* On non-LLC igfx platforms, force the flush-on-acquire if this is ever
* swapped-in. Our async flush path is not trust worthy enough yet(and
* happens in the wrong order), and with some tricks it's conceivable
* for userspace to change the cache-level to I915_CACHE_NONE after the
* pages are swapped-in, and since execbuf binds the object before doing
* the async flush, we have a race window.
*/
if (!HAS_LLC(i915) && !IS_DGFX(i915))
obj->cache_dirty = true;
}
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
{
__i915_gem_object_release_shmem(obj, pages, true);
i915_gem_gtt_finish_pages(obj, pages);
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_save_bit_17_swizzle(obj, pages);
shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
kfree(pages);
obj->mm.dirty = false;
}
static void
shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
{
if (likely(i915_gem_object_has_struct_page(obj)))
i915_gem_object_put_pages_shmem(obj, pages);
else
i915_gem_object_put_pages_phys(obj, pages);
}
static int
shmem_pwrite(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg)
{
struct address_space *mapping = obj->base.filp->f_mapping;
const struct address_space_operations *aops = mapping->a_ops;
char __user *user_data = u64_to_user_ptr(arg->data_ptr);
u64 remain;
loff_t pos;
unsigned int pg;
/* Caller already validated user args */
GEM_BUG_ON(!access_ok(user_data, arg->size));
if (!i915_gem_object_has_struct_page(obj))
return i915_gem_object_pwrite_phys(obj, arg);
/*
* Before we instantiate/pin the backing store for our use, we
* can prepopulate the shmemfs filp efficiently using a write into
* the pagecache. We avoid the penalty of instantiating all the
* pages, important if the user is just writing to a few and never
* uses the object on the GPU, and using a direct write into shmemfs
* allows it to avoid the cost of retrieving a page (either swapin
* or clearing-before-use) before it is overwritten.
*/
if (i915_gem_object_has_pages(obj))
return -ENODEV;
if (obj->mm.madv != I915_MADV_WILLNEED)
return -EFAULT;
/*
* Before the pages are instantiated the object is treated as being
* in the CPU domain. The pages will be clflushed as required before
* use, and we can freely write into the pages directly. If userspace
* races pwrite with any other operation; corruption will ensue -
* that is userspace's prerogative!
*/
remain = arg->size;
pos = arg->offset;
pg = offset_in_page(pos);
do {
unsigned int len, unwritten;
struct folio *folio;
void *data, *vaddr;
int err;
char __maybe_unused c;
len = PAGE_SIZE - pg;
if (len > remain)
len = remain;
/* Prefault the user page to reduce potential recursion */
err = __get_user(c, user_data);
if (err)
return err;
err = __get_user(c, user_data + len - 1);
if (err)
return err;
err = aops->write_begin(obj->base.filp, mapping, pos, len,
&folio, &data);
if (err < 0)
return err;
vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos));
pagefault_disable();
unwritten = __copy_from_user_inatomic(vaddr, user_data, len);
pagefault_enable();
kunmap_local(vaddr);
err = aops->write_end(obj->base.filp, mapping, pos, len,
len - unwritten, folio, data);
if (err < 0)
return err;
/* We don't handle -EFAULT, leave it to the caller to check */
if (unwritten)
return -ENODEV;
remain -= len;
user_data += len;
pos += len;
pg = 0;
} while (remain);
return 0;
}
static int
shmem_pread(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pread *arg)
{
if (!i915_gem_object_has_struct_page(obj))
return i915_gem_object_pread_phys(obj, arg);
return -ENODEV;
}
static void shmem_release(struct drm_i915_gem_object *obj)
{
if (i915_gem_object_has_struct_page(obj))
i915_gem_object_release_memory_region(obj);
fput(obj->base.filp);
}
const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
.name = "i915_gem_object_shmem",
.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
.get_pages = shmem_get_pages,
.put_pages = shmem_put_pages,
.truncate = shmem_truncate,
.shrink = shmem_shrink,
.pwrite = shmem_pwrite,
.pread = shmem_pread,
.release = shmem_release,
};
static int __create_shmem(struct drm_i915_private *i915,
struct drm_gem_object *obj,
resource_size_t size)
{
unsigned long flags = VM_NORESERVE;
struct file *filp;
drm_gem_private_object_init(&i915->drm, obj, size);
/* XXX: The __shmem_file_setup() function returns -EINVAL if size is
* greater than MAX_LFS_FILESIZE.
* To handle the same error as other code that returns -E2BIG when
* the size is too large, we add a code that returns -E2BIG when the
* size is larger than the size that can be handled.
* If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false,
* so we only needs to check when BITS_PER_LONG is 64.
* If BITS_PER_LONG is 32, E2BIG checks are processed when
* i915_gem_object_size_2big() is called before init_object() callback
* is called.
*/
if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE)
return -E2BIG;
if (i915->mm.gemfs)
filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
flags);
else
filp = shmem_file_setup("i915", size, flags);
if (IS_ERR(filp))
return PTR_ERR(filp);
obj->filp = filp;
return 0;
}
static int shmem_object_init(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
resource_size_t offset,
resource_size_t size,
resource_size_t page_size,
unsigned int flags)
{
static struct lock_class_key lock_class;
struct drm_i915_private *i915 = mem->i915;
struct address_space *mapping;
unsigned int cache_level;
gfp_t mask;
int ret;
ret = __create_shmem(i915, &obj->base, size);
if (ret)
return ret;
mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
/* 965gm cannot relocate objects above 4GiB. */
mask &= ~__GFP_HIGHMEM;
mask |= __GFP_DMA32;
}
mapping = obj->base.filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
/*
* MTL doesn't snoop CPU cache by default for GPU access (namely
* 1-way coherency). However some UMD's are currently depending on
* that. Make 1-way coherent the default setting for MTL. A follow
* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
* caching mode at BO creation time
*/
if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
* display scanout are coherent with the CPU in
* accessing this cache. This means in this mode we
* don't need to clflush on the CPU side, and on the
* GPU side we only need to flush internal caches to
* get data visible to the CPU.
*
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
cache_level = I915_CACHE_LLC;
else
cache_level = I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
i915_gem_object_init_memory_region(obj, mem);
return 0;
}
struct drm_i915_gem_object *
i915_gem_object_create_shmem(struct drm_i915_private *i915,
resource_size_t size)
{
return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
size, 0, 0);
}
/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size)
{
struct drm_i915_gem_object *obj;
struct file *file;
const struct address_space_operations *aops;
loff_t pos;
int err;
GEM_WARN_ON(IS_DGFX(i915));
obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
file = obj->base.filp;
aops = file->f_mapping->a_ops;
pos = 0;
do {
unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
struct folio *folio;
void *fsdata;
err = aops->write_begin(file, file->f_mapping, pos, len,
&folio, &fsdata);
if (err < 0)
goto fail;
memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len);
err = aops->write_end(file, file->f_mapping, pos, len, len,
folio, fsdata);
if (err < 0)
goto fail;
size -= len;
data += len;
pos += len;
} while (size);
return obj;
fail:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
static int init_shmem(struct intel_memory_region *mem)
{
i915_gemfs_init(mem->i915);
intel_memory_region_set_name(mem, "system");
return 0; /* We have fallback to the kernel mnt if gemfs init failed. */
}
static int release_shmem(struct intel_memory_region *mem)
{
i915_gemfs_fini(mem->i915);
return 0;
}
static const struct intel_memory_region_ops shmem_region_ops = {
.init = init_shmem,
.release = release_shmem,
.init_object = shmem_object_init,
};
struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
u16 type, u16 instance)
{
return intel_memory_region_create(i915, 0,
totalram_pages() << PAGE_SHIFT,
PAGE_SIZE, 0, 0,
type, instance,
&shmem_region_ops);
}
bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj)
{
return obj->ops == &i915_gem_shmem_ops;
}