Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull CRC updates from Eric Biggers:

 - Several improvements related to crc_kunit, to align with the standard
   KUnit conventions and make it easier for developers and CI systems to
   run this test suite

 - Add an arm64-optimized implementation of CRC64-NVME

 - Remove unused code for big endian arm64

* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  lib/crc: arm64: Simplify intrinsics implementation
  lib/crc: arm64: Use existing macros for kernel-mode FPU cflags
  lib/crc: arm64: Drop unnecessary chunking logic from crc64
  lib/crc: arm64: Assume a little-endian kernel
  lib/crc: arm64: add NEON accelerated CRC64-NVMe implementation
  lib/crc: arm64: Drop check for CONFIG_KERNEL_MODE_NEON
  crypto: crc32c - Remove another outdated comment
  crypto: crc32c - Remove more outdated usage information
  kunit: configs: Enable all CRC tests in all_tests.config
  lib/crc: tests: Add a .kunitconfig file
  lib/crc: tests: Add CRC_ENABLE_ALL_FOR_KUNIT
  lib/crc: tests: Make crc_kunit test only the enabled CRC variants
This commit is contained in:
Linus Torvalds
2026-04-13 17:36:04 -07:00
10 changed files with 174 additions and 67 deletions

View File

@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Cryptographic API.
*
* CRC32C chksum
* crypto_shash support for CRC-32C
*
*@Article{castagnoli-crc,
* author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
@@ -15,16 +13,6 @@
* pages = {},
* month = {June},
*}
* Used by the iSCSI driver, possibly others, and derived from
* the iscsi-crc.c module of the linux-iscsi driver at
* http://linux-iscsi.sourceforge.net.
*
* Following the example of lib/crc32, this function is intended to be
* flexible and useful for all users. Modules that currently have their
* own crc32c, but hopefully may be able to use this one are:
* net/sctp (please add all your doco to here if you change to
* use this one!)
* <endoflist>
*
* Copyright (c) 2004 Cisco Systems, Inc.
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
@@ -49,11 +37,6 @@ struct chksum_desc_ctx {
u32 crc;
};
/*
* Steps through buffer one byte at a time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);

3
lib/crc/.kunitconfig Normal file
View File

@@ -0,0 +1,3 @@
CONFIG_KUNIT=y
CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
CONFIG_CRC_KUNIT_TEST=y

View File

@@ -48,7 +48,7 @@ config CRC_T10DIF_ARCH
bool
depends on CRC_T10DIF && CRC_OPTIMIZATIONS
default y if ARM && KERNEL_MODE_NEON
default y if ARM64 && KERNEL_MODE_NEON
default y if ARM64
default y if PPC64 && ALTIVEC
default y if RISCV && RISCV_ISA_ZBC
default y if X86
@@ -82,6 +82,7 @@ config CRC64
config CRC64_ARCH
bool
depends on CRC64 && CRC_OPTIMIZATIONS
default y if ARM64
default y if RISCV && RISCV_ISA_ZBC && 64BIT
default y if X86_64
@@ -99,19 +100,28 @@ config CRC_OPTIMIZATIONS
config CRC_KUNIT_TEST
tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS
depends on KUNIT
depends on KUNIT && (CRC7 || CRC16 || CRC_T10DIF || CRC32 || CRC64)
default KUNIT_ALL_TESTS
select CRC7
select CRC16
select CRC_T10DIF
select CRC32
select CRC64
help
Unit tests for the CRC library functions.
This is intended to help people writing architecture-specific
optimized versions. If unsure, say N.
config CRC_ENABLE_ALL_FOR_KUNIT
tristate "Enable all CRC functions for KUnit test"
depends on KUNIT
select CRC7
select CRC16
select CRC_T10DIF
select CRC32
select CRC64
help
Enable all CRC functions that have test code in CRC_KUNIT_TEST.
Enable this only if you'd like the CRC KUnit test suite to test all
the CRC variants, even ones that wouldn't otherwise need to be built.
config CRC_BENCHMARK
bool "Benchmark for the CRC functions"
depends on CRC_KUNIT_TEST

View File

@@ -38,9 +38,14 @@ obj-$(CONFIG_CRC64) += crc64.o
crc64-y := crc64-main.o
ifeq ($(CONFIG_CRC64_ARCH),y)
CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
crc64-$(CONFIG_X86) += x86/crc64-pclmul.o
endif
endif # CONFIG_CRC64_ARCH
obj-y += tests/

View File

@@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
pmull16x64_\p fold_consts, \reg1, v8
CPU_LE( rev64 v11.16b, v11.16b )
CPU_LE( rev64 v12.16b, v12.16b )
rev64 v11.16b, v11.16b
rev64 v12.16b, v12.16b
pmull16x64_\p fold_consts, \reg2, v9
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ext v11.16b, v11.16b, v11.16b, #8
ext v12.16b, v12.16b, v12.16b, #8
eor \reg1\().16b, \reg1\().16b, v8.16b
eor \reg2\().16b, \reg2\().16b, v9.16b
@@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ldp q4, q5, [buf, #0x40]
ldp q6, q7, [buf, #0x60]
add buf, buf, #0x80
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( rev64 v1.16b, v1.16b )
CPU_LE( rev64 v2.16b, v2.16b )
CPU_LE( rev64 v3.16b, v3.16b )
CPU_LE( rev64 v4.16b, v4.16b )
CPU_LE( rev64 v5.16b, v5.16b )
CPU_LE( rev64 v6.16b, v6.16b )
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v0.16b, v0.16b
rev64 v1.16b, v1.16b
rev64 v2.16b, v2.16b
rev64 v3.16b, v3.16b
rev64 v4.16b, v4.16b
rev64 v5.16b, v5.16b
rev64 v6.16b, v6.16b
rev64 v7.16b, v7.16b
ext v0.16b, v0.16b, v0.16b, #8
ext v1.16b, v1.16b, v1.16b, #8
ext v2.16b, v2.16b, v2.16b, #8
ext v3.16b, v3.16b, v3.16b, #8
ext v4.16b, v4.16b, v4.16b, #8
ext v5.16b, v5.16b, v5.16b, #8
ext v6.16b, v6.16b, v6.16b, #8
ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v8.16b, #0
@@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
pmull16x64_\p fold_consts, v7, v8
eor v7.16b, v7.16b, v8.16b
ldr q0, [buf], #16
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
rev64 v0.16b, v0.16b
ext v0.16b, v0.16b, v0.16b, #8
eor v7.16b, v7.16b, v0.16b
subs len, len, #16
b.ge .Lfold_16_bytes_loop_\@
@@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// v0 = last 16 original data bytes
add buf, buf, len
ldr q0, [buf, #-16]
CPU_LE( rev64 v0.16b, v0.16b )
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
rev64 v0.16b, v0.16b
ext v0.16b, v0.16b, v0.16b, #8
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
adr_l x4, .Lbyteshift_table + 16
@@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// Load the first 16 data bytes.
ldr q7, [buf], #0x10
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v7.16b, v7.16b
ext v7.16b, v7.16b, v7.16b, #8
// XOR the first 16 data *bits* with the initial CRC value.
movi v0.16b, #0
@@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
crc_t10dif_pmull p8
CPU_LE( rev64 v7.16b, v7.16b )
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
rev64 v7.16b, v7.16b
ext v7.16b, v7.16b, v7.16b, #8
str q7, [x3]
frame_pop

View File

@@ -29,24 +29,19 @@
.endm
.macro hwordle, reg
CPU_BE( rev16 \reg, \reg )
.endm
.macro hwordbe, reg
CPU_LE( rev \reg, \reg )
rev \reg, \reg
rbit \reg, \reg
CPU_BE( lsr \reg, \reg, #16 )
.endm
.macro le, regs:vararg
.irp r, \regs
CPU_BE( rev \r, \r )
.endr
.endm
.macro be, regs:vararg
.irp r, \regs
CPU_LE( rev \r, \r )
rev \r, \r
.endr
.irp r, \regs
rbit \r, \r

View File

@@ -0,0 +1,65 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Accelerated CRC64 (NVMe) using ARM NEON C intrinsics
*/
#include <linux/types.h>
#include <asm/neon-intrinsics.h>
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
/* x^191 mod G, x^127 mod G */
static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
0x21e9761e252621acULL };
/* floor(x^127 / G), (G - x^64) / x */
static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
0x34d926535897936aULL };
static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
{
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
vgetq_lane_u64(b, 0)));
}
static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
{
poly64x2_t l = vreinterpretq_p64_u64(a);
poly64x2_t m = vreinterpretq_p64_u64(b);
return vreinterpretq_u64_p128(vmull_high_p64(l, m));
}
static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
{
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
vgetq_lane_u64(b, 0)));
}
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
{
uint64x2_t fold_consts = vld1q_u64(fold_consts_val);
uint64x2_t v0 = { crc, 0 };
uint64x2_t zero = { };
for (;;) {
v0 ^= vreinterpretq_u64_u8(vld1q_u8(p));
p += 16;
len -= 16;
if (len < 16)
break;
v0 = pmull64(fold_consts, v0) ^ pmull64_high(fold_consts, v0);
}
/* Multiply the 128-bit value by x^64 and reduce it back to 128 bits. */
v0 = vextq_u64(v0, zero, 1) ^ pmull64_hi_lo(fold_consts, v0);
/* Final Barrett reduction */
uint64x2_t bconsts = vld1q_u64(bconsts_val);
uint64x2_t final = pmull64(bconsts, v0);
v0 ^= vextq_u64(zero, final, 1) ^ pmull64_hi_lo(bconsts, final);
return vgetq_lane_u64(v0, 1);
}

28
lib/crc/arm64/crc64.h Normal file
View File

@@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* CRC64 using ARM64 PMULL instructions
*/
#include <linux/cpufeature.h>
#include <asm/simd.h>
#include <linux/minmax.h>
#include <linux/sizes.h>
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
#define crc64_be_arch crc64_be_generic
static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
{
if (len >= 128 && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
size_t chunk = len & ~15;
scoped_ksimd()
crc = crc64_nvme_arm64_c(crc, p, chunk);
p += chunk;
len &= 15;
}
return crc64_nvme_generic(crc, p, len);
}

View File

@@ -268,8 +268,7 @@ crc_benchmark(struct kunit *test,
}
}
/* crc7_be */
#if IS_REACHABLE(CONFIG_CRC7)
static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len)
{
/*
@@ -294,9 +293,9 @@ static void crc7_be_benchmark(struct kunit *test)
{
crc_benchmark(test, crc7_be_wrapper);
}
#endif /* CONFIG_CRC7 */
/* crc16 */
#if IS_REACHABLE(CONFIG_CRC16)
static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len)
{
return crc16(crc, p, len);
@@ -318,9 +317,9 @@ static void crc16_benchmark(struct kunit *test)
{
crc_benchmark(test, crc16_wrapper);
}
#endif /* CONFIG_CRC16 */
/* crc_t10dif */
#if IS_REACHABLE(CONFIG_CRC_T10DIF)
static u64 crc_t10dif_wrapper(u64 crc, const u8 *p, size_t len)
{
return crc_t10dif_update(crc, p, len);
@@ -342,6 +341,9 @@ static void crc_t10dif_benchmark(struct kunit *test)
{
crc_benchmark(test, crc_t10dif_wrapper);
}
#endif /* CONFIG_CRC_T10DIF */
#if IS_REACHABLE(CONFIG_CRC32)
/* crc32_le */
@@ -414,6 +416,9 @@ static void crc32c_benchmark(struct kunit *test)
{
crc_benchmark(test, crc32c_wrapper);
}
#endif /* CONFIG_CRC32 */
#if IS_REACHABLE(CONFIG_CRC64)
/* crc64_be */
@@ -463,24 +468,35 @@ static void crc64_nvme_benchmark(struct kunit *test)
{
crc_benchmark(test, crc64_nvme_wrapper);
}
#endif /* CONFIG_CRC64 */
static struct kunit_case crc_test_cases[] = {
#if IS_REACHABLE(CONFIG_CRC7)
KUNIT_CASE(crc7_be_test),
KUNIT_CASE(crc7_be_benchmark),
#endif
#if IS_REACHABLE(CONFIG_CRC16)
KUNIT_CASE(crc16_test),
KUNIT_CASE(crc16_benchmark),
#endif
#if IS_REACHABLE(CONFIG_CRC_T10DIF)
KUNIT_CASE(crc_t10dif_test),
KUNIT_CASE(crc_t10dif_benchmark),
#endif
#if IS_REACHABLE(CONFIG_CRC32)
KUNIT_CASE(crc32_le_test),
KUNIT_CASE(crc32_le_benchmark),
KUNIT_CASE(crc32_be_test),
KUNIT_CASE(crc32_be_benchmark),
KUNIT_CASE(crc32c_test),
KUNIT_CASE(crc32c_benchmark),
#endif
#if IS_REACHABLE(CONFIG_CRC64)
KUNIT_CASE(crc64_be_test),
KUNIT_CASE(crc64_be_benchmark),
KUNIT_CASE(crc64_nvme_test),
KUNIT_CASE(crc64_nvme_benchmark),
#endif
{},
};

View File

@@ -48,6 +48,8 @@ CONFIG_CRYPTO_LIB_ENABLE_ALL_FOR_KUNIT=y
CONFIG_PRIME_NUMBERS=y
CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y