Files
linux/drivers/gpu/drm/i915/gt/selftest_migrate.c
Linus Torvalds bf4afc53b7 Convert 'alloc_obj' family to use the new default GFP_KERNEL argument
This was done entirely with mindless brute force, using

    git grep -l '\<k[vmz]*alloc_objs*(.*, GFP_KERNEL)' |
        xargs sed -i 's/\(alloc_objs*(.*\), GFP_KERNEL)/\1)/'

to convert the new alloc_obj() users that had a simple GFP_KERNEL
argument to just drop that argument.

Note that due to the extreme simplicity of the scripting, any slightly
more complex cases spread over multiple lines would not be triggered:
they definitely exist, but this covers the vast bulk of the cases, and
the resulting diff is also then easier to check automatically.

For the same reason the 'flex' versions will be done as a separate
conversion.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2026-02-21 17:09:51 -08:00

1040 lines
23 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2020 Intel Corporation
*/
#include <linux/sort.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "selftests/igt_spinner.h"
#include "selftests/i915_random.h"
static const unsigned int sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
CHUNK_SZ - SZ_4K,
CHUNK_SZ,
CHUNK_SZ + SZ_4K,
SZ_64M,
};
static struct drm_i915_gem_object *
create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
{
struct drm_i915_gem_object *obj;
obj = i915_gem_object_create_lmem(i915, size, 0);
if (!IS_ERR(obj))
return obj;
return i915_gem_object_create_internal(i915, size);
}
static int copy(struct intel_migrate *migrate,
int (*fn)(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out),
u32 sz, struct rnd_state *prng)
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *src, *dst;
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
u32 *vaddr;
int err = 0;
int i;
src = create_lmem_or_internal(i915, sz);
if (IS_ERR(src))
return 0;
sz = src->base.size;
dst = i915_gem_object_create_internal(i915, sz);
if (IS_ERR(dst))
goto err_free_src;
for_i915_gem_ww(&ww, err, true) {
err = i915_gem_object_lock(src, &ww);
if (err)
continue;
err = i915_gem_object_lock(dst, &ww);
if (err)
continue;
vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
continue;
}
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = i;
i915_gem_object_flush_map(src);
vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto unpin_src;
}
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = ~i;
i915_gem_object_flush_map(dst);
err = fn(migrate, &ww, src, dst, &rq);
if (!err)
continue;
if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
pr_err("%ps failed, size: %u\n", fn, sz);
if (rq) {
i915_request_wait(rq, 0, HZ);
i915_request_put(rq);
}
i915_gem_object_unpin_map(dst);
unpin_src:
i915_gem_object_unpin_map(src);
}
if (err)
goto err_out;
if (rq) {
if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("%ps timed out, size: %u\n", fn, sz);
err = -ETIME;
}
i915_request_put(rq);
}
for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
if (vaddr[x] != x) {
pr_err("%ps failed, size: %u, offset: %zu\n",
fn, sz, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
}
i915_gem_object_unpin_map(dst);
i915_gem_object_unpin_map(src);
err_out:
i915_gem_object_put(dst);
err_free_src:
i915_gem_object_put(src);
return err;
}
static int intel_context_copy_ccs(struct intel_context *ce,
const struct i915_deps *deps,
struct scatterlist *sg,
unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
struct sgt_dma it = sg_sgt(sg);
struct i915_request *rq;
u32 offset;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
*out = NULL;
GEM_BUG_ON(ce->ring->size < SZ_64K);
offset = 0;
if (HAS_64K_PAGES(ce->engine->i915))
offset = CHUNK_SZ;
do {
int len;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_ce;
}
if (deps) {
err = i915_request_await_deps(rq, deps);
if (err)
goto out_rq;
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
if (err)
goto out_rq;
}
deps = NULL;
}
/* The PTE updates + clear must not be interrupted. */
err = emit_no_arbitration(rq);
if (err)
goto out_rq;
len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
if (len <= 0) {
err = len;
goto out_rq;
}
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
goto out_rq;
err = emit_copy_ccs(rq, offset, dst_access,
offset, src_access, len);
if (err)
goto out_rq;
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
/* Arbitration is re-enabled between requests. */
out_rq:
if (*out)
i915_request_put(*out);
*out = i915_request_get(rq);
i915_request_add(rq);
if (err || !it.sg || !sg_dma_len(it.sg))
break;
cond_resched();
} while (1);
out_ce:
return err;
}
static int
intel_migrate_ccs_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
const struct i915_deps *deps,
struct scatterlist *sg,
unsigned int pat_index,
bool write_to_ccs,
struct i915_request **out)
{
struct intel_context *ce;
int err;
*out = NULL;
if (!m->context)
return -ENODEV;
ce = intel_migrate_create_context(m);
if (IS_ERR(ce))
ce = intel_context_get(m->context);
GEM_BUG_ON(IS_ERR(ce));
err = intel_context_pin_ww(ce, ww);
if (err)
goto out;
err = intel_context_copy_ccs(ce, deps, sg, pat_index,
write_to_ccs, out);
intel_context_unpin(ce);
out:
intel_context_put(ce);
return err;
}
static int clear(struct intel_migrate *migrate,
int (*fn)(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *obj,
u32 value,
struct i915_request **out),
u32 sz, struct rnd_state *prng)
{
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_gem_ww_ctx ww;
u32 *vaddr, val = 0;
bool ccs_cap = false;
int err = 0;
int i;
obj = create_lmem_or_internal(i915, sz);
if (IS_ERR(obj))
return 0;
/* Consider the rounded up memory too */
sz = obj->base.size;
if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
ccs_cap = true;
for_i915_gem_ww(&ww, err, true) {
int ccs_bytes, ccs_bytes_per_chunk;
err = i915_gem_object_lock(obj, &ww);
if (err)
continue;
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
continue;
}
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = ~i;
i915_gem_object_flush_map(obj);
if (ccs_cap && !val) {
/* Write the obj data into ccs surface */
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
obj->pat_index,
true, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("%ps timed out, size: %u\n",
fn, sz);
err = -ETIME;
}
i915_request_put(rq);
rq = NULL;
}
if (err)
continue;
}
err = fn(migrate, &ww, obj, val, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("%ps timed out, size: %u\n", fn, sz);
err = -ETIME;
}
i915_request_put(rq);
rq = NULL;
}
if (err)
continue;
i915_gem_object_flush_map(obj);
/* Verify the set/clear of the obj mem */
for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
int x = i * 1024 +
i915_prandom_u32_max_state(1024, prng);
if (vaddr[x] != val) {
pr_err("%ps failed, (%u != %u), offset: %zu\n",
fn, vaddr[x], val, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
}
if (err)
continue;
if (ccs_cap && !val) {
for (i = 0; i < sz / sizeof(u32); i++)
vaddr[i] = ~i;
i915_gem_object_flush_map(obj);
err = intel_migrate_ccs_copy(migrate, &ww, NULL,
obj->mm.pages->sgl,
obj->pat_index,
false, &rq);
if (rq && !err) {
if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("%ps timed out, size: %u\n",
fn, sz);
err = -ETIME;
}
i915_request_put(rq);
rq = NULL;
}
if (err)
continue;
ccs_bytes = GET_CCS_BYTES(i915, sz);
ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
i915_gem_object_flush_map(obj);
for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
int offset = ((i * PAGE_SIZE) /
ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
int x = i915_prandom_u32_max_state(min_t(int, 1024,
ccs_bytes_left), prng);
if (vaddr[offset + x]) {
pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
igt_hexdump(vaddr + offset,
min_t(int, 4096,
ccs_bytes_left * sizeof(u32)));
err = -EINVAL;
}
}
if (err)
continue;
}
i915_gem_object_unpin_map(obj);
}
if (err) {
if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
pr_err("%ps failed, size: %u\n", fn, sz);
if (rq && err != -EINVAL) {
i915_request_wait(rq, 0, HZ);
i915_request_put(rq);
}
i915_gem_object_unpin_map(obj);
}
i915_gem_object_put(obj);
return err;
}
static int __migrate_copy(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out)
{
return intel_migrate_copy(migrate, ww, NULL,
src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
static int __global_copy(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct i915_request **out)
{
return intel_context_migrate_copy(migrate->context, NULL,
src->mm.pages->sgl, src->pat_index,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl, dst->pat_index,
i915_gem_object_is_lmem(dst),
out);
}
static int
migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return copy(migrate, __migrate_copy, sz, prng);
}
static int
global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return copy(migrate, __global_copy, sz, prng);
}
static int __migrate_clear(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *obj,
u32 value,
struct i915_request **out)
{
return intel_migrate_clear(migrate, ww, NULL,
obj->mm.pages->sgl,
obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
static int __global_clear(struct intel_migrate *migrate,
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object *obj,
u32 value,
struct i915_request **out)
{
return intel_context_migrate_clear(migrate->context, NULL,
obj->mm.pages->sgl,
obj->pat_index,
i915_gem_object_is_lmem(obj),
value, out);
}
static int
migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return clear(migrate, __migrate_clear, sz, prng);
}
static int
global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
{
return clear(migrate, __global_clear, sz, prng);
}
static int live_migrate_copy(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
struct drm_i915_private *i915 = migrate->context->engine->i915;
I915_RND_STATE(prng);
int i;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
int err;
err = migrate_copy(migrate, sizes[i], &prng);
if (err == 0)
err = global_copy(migrate, sizes[i], &prng);
i915_gem_drain_freed_objects(i915);
if (err)
return err;
}
return 0;
}
static int live_migrate_clear(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
struct drm_i915_private *i915 = migrate->context->engine->i915;
I915_RND_STATE(prng);
int i;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
int err;
err = migrate_clear(migrate, sizes[i], &prng);
if (err == 0)
err = global_clear(migrate, sizes[i], &prng);
i915_gem_drain_freed_objects(i915);
if (err)
return err;
}
return 0;
}
struct spinner_timer {
struct timer_list timer;
struct igt_spinner spin;
};
static void spinner_kill(struct timer_list *timer)
{
struct spinner_timer *st = timer_container_of(st, timer, timer);
igt_spinner_end(&st->spin);
pr_info("%s\n", __func__);
}
static int live_emit_pte_full_ring(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
struct drm_i915_private *i915 = migrate->context->engine->i915;
struct drm_i915_gem_object *obj;
struct intel_context *ce;
struct i915_request *rq, *prev;
struct spinner_timer st;
struct sgt_dma it;
int len, sz, err;
u32 *cs;
/*
* Simple regression test to check that we don't trample the
* rq->reserved_space when returning from emit_pte(), if the ring is
* nearly full.
*/
if (igt_spinner_init(&st.spin, to_gt(i915)))
return -ENOMEM;
obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto out_spinner;
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err)
goto out_obj;
ce = intel_migrate_create_context(migrate);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out_obj;
}
ce->ring_size = SZ_4K; /* Not too big */
err = intel_context_pin(ce);
if (err)
goto out_put;
rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_unpin;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&st.spin, rq)) {
err = -EIO;
goto out_unpin;
}
/*
* Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
* ring->reserved_space at the end. To actually emit the PTEs we require
* slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
* greater than PAGE_SIZE. The correct behaviour is to wait for more
* ring space in emit_pte(), otherwise we trample on the reserved_space
* resulting in crashes when later submitting the rq.
*/
prev = NULL;
do {
if (prev)
i915_request_add(rq);
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_unpin;
}
sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
I915_EMIT_PTE_NUM_DWORDS;
sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
I915_EMIT_PTE_NUM_DWORDS);
cs = intel_ring_begin(rq, sz);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto out_rq;
}
memset32(cs, MI_NOOP, sz);
cs += sz;
intel_ring_advance(rq, cs);
pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
prev = rq;
} while (rq->ring->space > (rq->reserved_space +
I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
timer_setup_on_stack(&st.timer, spinner_kill, 0);
mod_timer(&st.timer, jiffies + 2 * HZ);
/*
* This should wait for the spinner to be killed, otherwise we should go
* down in flames when doing i915_request_add().
*/
pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
it = sg_sgt(obj->mm.pages->sgl);
len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
if (!len) {
err = -EINVAL;
goto out_rq;
}
if (len < 0) {
err = len;
goto out_rq;
}
out_rq:
i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
timer_delete_sync(&st.timer);
timer_destroy_on_stack(&st.timer);
out_unpin:
intel_context_unpin(ce);
out_put:
intel_context_put(ce);
out_obj:
i915_gem_object_put(obj);
out_spinner:
igt_spinner_fini(&st.spin);
return err;
}
struct threaded_migrate {
struct intel_migrate *migrate;
struct task_struct *tsk;
struct rnd_state prng;
};
static int threaded_migrate(struct intel_migrate *migrate,
int (*fn)(void *arg),
unsigned int flags)
{
const unsigned int n_cpus = num_online_cpus() + 1;
struct threaded_migrate *thread;
I915_RND_STATE(prng);
unsigned int i;
int err = 0;
thread = kzalloc_objs(*thread, n_cpus);
if (!thread)
return 0;
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk;
thread[i].migrate = migrate;
thread[i].prng =
I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
tsk = kthread_run(fn, &thread[i], "igt-%d", i);
if (IS_ERR(tsk)) {
err = PTR_ERR(tsk);
break;
}
get_task_struct(tsk);
thread[i].tsk = tsk;
}
/*
* Start all threads before we kthread_stop().
* In CHV / BXT+VTD environments, where VMA pinning is committed
* asynchronously, empirically determined 100ms delay is needed
* to avoid stopping threads that may still wait for completion of
* intel_ggtt_bind_vma and fail with -ERESTARTSYS when interrupted.
*/
msleep((intel_vm_no_concurrent_access_wa(migrate->context->vm->i915) ? 100 : 10) * n_cpus);
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
int status;
if (IS_ERR_OR_NULL(tsk))
continue;
status = kthread_stop_put(tsk);
if (status && !err)
err = status;
}
kfree(thread);
return err;
}
static int __thread_migrate_copy(void *arg)
{
struct threaded_migrate *tm = arg;
return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int thread_migrate_copy(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
return threaded_migrate(migrate, __thread_migrate_copy, 0);
}
static int __thread_global_copy(void *arg)
{
struct threaded_migrate *tm = arg;
return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int thread_global_copy(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
return threaded_migrate(migrate, __thread_global_copy, 0);
}
static int __thread_migrate_clear(void *arg)
{
struct threaded_migrate *tm = arg;
return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int __thread_global_clear(void *arg)
{
struct threaded_migrate *tm = arg;
return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
}
static int thread_migrate_clear(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
return threaded_migrate(migrate, __thread_migrate_clear, 0);
}
static int thread_global_clear(void *arg)
{
struct intel_gt *gt = arg;
struct intel_migrate *migrate = &gt->migrate;
return threaded_migrate(migrate, __thread_global_clear, 0);
}
int intel_migrate_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_migrate_copy),
SUBTEST(live_migrate_clear),
SUBTEST(live_emit_pte_full_ring),
SUBTEST(thread_migrate_copy),
SUBTEST(thread_migrate_clear),
SUBTEST(thread_global_copy),
SUBTEST(thread_global_clear),
};
struct intel_gt *gt = to_gt(i915);
if (!gt->migrate.context)
return 0;
return intel_gt_live_subtests(tests, gt);
}
static struct drm_i915_gem_object *
create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
{
struct drm_i915_gem_object *obj = NULL;
int err;
if (try_lmem)
obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
if (IS_ERR_OR_NULL(obj)) {
obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj))
return obj;
}
i915_gem_object_trylock(obj, NULL);
err = i915_gem_object_pin_pages(obj);
if (err) {
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
return ERR_PTR(err);
}
return obj;
}
static int wrap_ktime_compare(const void *A, const void *B)
{
const ktime_t *a = A, *b = B;
return ktime_compare(*a, *b);
}
static int __perf_clear_blt(struct intel_context *ce,
struct scatterlist *sg,
unsigned int pat_index,
bool is_lmem,
size_t sz)
{
ktime_t t[5];
int pass;
int err = 0;
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct i915_request *rq;
ktime_t t0, t1;
t0 = ktime_get();
err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
is_lmem, 0, &rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
err = -EIO;
i915_request_put(rq);
}
if (err)
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
}
if (err)
return err;
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
pr_info("%s: %zd KiB fill: %lld MiB/s\n",
ce->engine->name, sz >> 10,
div64_u64(mul_u32_u32(4 * sz,
1000 * 1000 * 1000),
t[1] + 2 * t[2] + t[3]) >> 20);
return 0;
}
static int perf_clear_blt(void *arg)
{
struct intel_gt *gt = arg;
static const unsigned long sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
SZ_64M
};
int i;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *dst;
int err;
dst = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(dst))
return PTR_ERR(dst);
err = __perf_clear_blt(gt->migrate.context,
dst->mm.pages->sgl,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sizes[i]);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);
if (err)
return err;
}
return 0;
}
static int __perf_copy_blt(struct intel_context *ce,
struct scatterlist *src,
unsigned int src_pat_index,
bool src_is_lmem,
struct scatterlist *dst,
unsigned int dst_pat_index,
bool dst_is_lmem,
size_t sz)
{
ktime_t t[5];
int pass;
int err = 0;
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct i915_request *rq;
ktime_t t0, t1;
t0 = ktime_get();
err = intel_context_migrate_copy(ce, NULL,
src, src_pat_index,
src_is_lmem,
dst, dst_pat_index,
dst_is_lmem,
&rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
err = -EIO;
i915_request_put(rq);
}
if (err)
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
}
if (err)
return err;
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
pr_info("%s: %zd KiB copy: %lld MiB/s\n",
ce->engine->name, sz >> 10,
div64_u64(mul_u32_u32(4 * sz,
1000 * 1000 * 1000),
t[1] + 2 * t[2] + t[3]) >> 20);
return 0;
}
static int perf_copy_blt(void *arg)
{
struct intel_gt *gt = arg;
static const unsigned long sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
SZ_64M
};
int i;
for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *src, *dst;
size_t sz;
int err;
src = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(src))
return PTR_ERR(src);
sz = src->base.size;
dst = create_init_lmem_internal(gt, sz, false);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_src;
}
err = __perf_copy_blt(gt->migrate.context,
src->mm.pages->sgl,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl,
i915_gem_get_pat_index(gt->i915,
I915_CACHE_NONE),
i915_gem_object_is_lmem(dst),
sz);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);
err_src:
i915_gem_object_unlock(src);
i915_gem_object_put(src);
if (err)
return err;
}
return 0;
}
int intel_migrate_perf_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(perf_clear_blt),
SUBTEST(perf_copy_blt),
};
struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
if (!gt->migrate.context)
return 0;
return intel_gt_live_subtests(tests, gt);
}