mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers: - Several improvements related to crc_kunit, to align with the standard KUnit conventions and make it easier for developers and CI systems to run this test suite - Add an arm64-optimized implementation of CRC64-NVME - Remove unused code for big endian arm64 * tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: lib/crc: arm64: Simplify intrinsics implementation lib/crc: arm64: Use existing macros for kernel-mode FPU cflags lib/crc: arm64: Drop unnecessary chunking logic from crc64 lib/crc: arm64: Assume a little-endian kernel lib/crc: arm64: add NEON accelerated CRC64-NVMe implementation lib/crc: arm64: Drop check for CONFIG_KERNEL_MODE_NEON crypto: crc32c - Remove another outdated comment crypto: crc32c - Remove more outdated usage information kunit: configs: Enable all CRC tests in all_tests.config lib/crc: tests: Add a .kunitconfig file lib/crc: tests: Add CRC_ENABLE_ALL_FOR_KUNIT lib/crc: tests: Make crc_kunit test only the enabled CRC variants
This commit is contained in:
@@ -1,8 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* CRC32C chksum
|
||||
* crypto_shash support for CRC-32C
|
||||
*
|
||||
*@Article{castagnoli-crc,
|
||||
* author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
|
||||
@@ -15,16 +13,6 @@
|
||||
* pages = {},
|
||||
* month = {June},
|
||||
*}
|
||||
* Used by the iSCSI driver, possibly others, and derived from
|
||||
* the iscsi-crc.c module of the linux-iscsi driver at
|
||||
* http://linux-iscsi.sourceforge.net.
|
||||
*
|
||||
* Following the example of lib/crc32, this function is intended to be
|
||||
* flexible and useful for all users. Modules that currently have their
|
||||
* own crc32c, but hopefully may be able to use this one are:
|
||||
* net/sctp (please add all your doco to here if you change to
|
||||
* use this one!)
|
||||
* <endoflist>
|
||||
*
|
||||
* Copyright (c) 2004 Cisco Systems, Inc.
|
||||
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
|
||||
@@ -49,11 +37,6 @@ struct chksum_desc_ctx {
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
/*
|
||||
* Steps through buffer one byte at a time, calculates reflected
|
||||
* crc using table.
|
||||
*/
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
3
lib/crc/.kunitconfig
Normal file
3
lib/crc/.kunitconfig
Normal file
@@ -0,0 +1,3 @@
|
||||
CONFIG_KUNIT=y
|
||||
CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
|
||||
CONFIG_CRC_KUNIT_TEST=y
|
||||
@@ -48,7 +48,7 @@ config CRC_T10DIF_ARCH
|
||||
bool
|
||||
depends on CRC_T10DIF && CRC_OPTIMIZATIONS
|
||||
default y if ARM && KERNEL_MODE_NEON
|
||||
default y if ARM64 && KERNEL_MODE_NEON
|
||||
default y if ARM64
|
||||
default y if PPC64 && ALTIVEC
|
||||
default y if RISCV && RISCV_ISA_ZBC
|
||||
default y if X86
|
||||
@@ -82,6 +82,7 @@ config CRC64
|
||||
config CRC64_ARCH
|
||||
bool
|
||||
depends on CRC64 && CRC_OPTIMIZATIONS
|
||||
default y if ARM64
|
||||
default y if RISCV && RISCV_ISA_ZBC && 64BIT
|
||||
default y if X86_64
|
||||
|
||||
@@ -99,19 +100,28 @@ config CRC_OPTIMIZATIONS
|
||||
|
||||
config CRC_KUNIT_TEST
|
||||
tristate "KUnit tests for CRC functions" if !KUNIT_ALL_TESTS
|
||||
depends on KUNIT
|
||||
depends on KUNIT && (CRC7 || CRC16 || CRC_T10DIF || CRC32 || CRC64)
|
||||
default KUNIT_ALL_TESTS
|
||||
select CRC7
|
||||
select CRC16
|
||||
select CRC_T10DIF
|
||||
select CRC32
|
||||
select CRC64
|
||||
help
|
||||
Unit tests for the CRC library functions.
|
||||
|
||||
This is intended to help people writing architecture-specific
|
||||
optimized versions. If unsure, say N.
|
||||
|
||||
config CRC_ENABLE_ALL_FOR_KUNIT
|
||||
tristate "Enable all CRC functions for KUnit test"
|
||||
depends on KUNIT
|
||||
select CRC7
|
||||
select CRC16
|
||||
select CRC_T10DIF
|
||||
select CRC32
|
||||
select CRC64
|
||||
help
|
||||
Enable all CRC functions that have test code in CRC_KUNIT_TEST.
|
||||
|
||||
Enable this only if you'd like the CRC KUnit test suite to test all
|
||||
the CRC variants, even ones that wouldn't otherwise need to be built.
|
||||
|
||||
config CRC_BENCHMARK
|
||||
bool "Benchmark for the CRC functions"
|
||||
depends on CRC_KUNIT_TEST
|
||||
|
||||
@@ -38,9 +38,14 @@ obj-$(CONFIG_CRC64) += crc64.o
|
||||
crc64-y := crc64-main.o
|
||||
ifeq ($(CONFIG_CRC64_ARCH),y)
|
||||
CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
|
||||
|
||||
CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
|
||||
CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
|
||||
crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
|
||||
|
||||
crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
|
||||
crc64-$(CONFIG_X86) += x86/crc64-pclmul.o
|
||||
endif
|
||||
endif # CONFIG_CRC64_ARCH
|
||||
|
||||
obj-y += tests/
|
||||
|
||||
|
||||
@@ -181,13 +181,13 @@ SYM_FUNC_END(__pmull_p8_16x64)
|
||||
|
||||
pmull16x64_\p fold_consts, \reg1, v8
|
||||
|
||||
CPU_LE( rev64 v11.16b, v11.16b )
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
rev64 v11.16b, v11.16b
|
||||
rev64 v12.16b, v12.16b
|
||||
|
||||
pmull16x64_\p fold_consts, \reg2, v9
|
||||
|
||||
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
|
||||
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
ext v11.16b, v11.16b, v11.16b, #8
|
||||
ext v12.16b, v12.16b, v12.16b, #8
|
||||
|
||||
eor \reg1\().16b, \reg1\().16b, v8.16b
|
||||
eor \reg2\().16b, \reg2\().16b, v9.16b
|
||||
@@ -220,22 +220,22 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
ldp q4, q5, [buf, #0x40]
|
||||
ldp q6, q7, [buf, #0x60]
|
||||
add buf, buf, #0x80
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( rev64 v1.16b, v1.16b )
|
||||
CPU_LE( rev64 v2.16b, v2.16b )
|
||||
CPU_LE( rev64 v3.16b, v3.16b )
|
||||
CPU_LE( rev64 v4.16b, v4.16b )
|
||||
CPU_LE( rev64 v5.16b, v5.16b )
|
||||
CPU_LE( rev64 v6.16b, v6.16b )
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
|
||||
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
|
||||
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
|
||||
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
|
||||
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
|
||||
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
rev64 v0.16b, v0.16b
|
||||
rev64 v1.16b, v1.16b
|
||||
rev64 v2.16b, v2.16b
|
||||
rev64 v3.16b, v3.16b
|
||||
rev64 v4.16b, v4.16b
|
||||
rev64 v5.16b, v5.16b
|
||||
rev64 v6.16b, v6.16b
|
||||
rev64 v7.16b, v7.16b
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
ext v4.16b, v4.16b, v4.16b, #8
|
||||
ext v5.16b, v5.16b, v5.16b, #8
|
||||
ext v6.16b, v6.16b, v6.16b, #8
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
|
||||
// XOR the first 16 data *bits* with the initial CRC value.
|
||||
movi v8.16b, #0
|
||||
@@ -288,8 +288,8 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
pmull16x64_\p fold_consts, v7, v8
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
ldr q0, [buf], #16
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
rev64 v0.16b, v0.16b
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
subs len, len, #16
|
||||
b.ge .Lfold_16_bytes_loop_\@
|
||||
@@ -310,8 +310,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
// v0 = last 16 original data bytes
|
||||
add buf, buf, len
|
||||
ldr q0, [buf, #-16]
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
rev64 v0.16b, v0.16b
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
|
||||
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
|
||||
adr_l x4, .Lbyteshift_table + 16
|
||||
@@ -344,8 +344,8 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
|
||||
// Load the first 16 data bytes.
|
||||
ldr q7, [buf], #0x10
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
rev64 v7.16b, v7.16b
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
|
||||
// XOR the first 16 data *bits* with the initial CRC value.
|
||||
movi v0.16b, #0
|
||||
@@ -382,8 +382,8 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
|
||||
|
||||
crc_t10dif_pmull p8
|
||||
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
rev64 v7.16b, v7.16b
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
str q7, [x3]
|
||||
|
||||
frame_pop
|
||||
|
||||
@@ -29,24 +29,19 @@
|
||||
.endm
|
||||
|
||||
.macro hwordle, reg
|
||||
CPU_BE( rev16 \reg, \reg )
|
||||
.endm
|
||||
|
||||
.macro hwordbe, reg
|
||||
CPU_LE( rev \reg, \reg )
|
||||
rev \reg, \reg
|
||||
rbit \reg, \reg
|
||||
CPU_BE( lsr \reg, \reg, #16 )
|
||||
.endm
|
||||
|
||||
.macro le, regs:vararg
|
||||
.irp r, \regs
|
||||
CPU_BE( rev \r, \r )
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro be, regs:vararg
|
||||
.irp r, \regs
|
||||
CPU_LE( rev \r, \r )
|
||||
rev \r, \r
|
||||
.endr
|
||||
.irp r, \regs
|
||||
rbit \r, \r
|
||||
|
||||
65
lib/crc/arm64/crc64-neon-inner.c
Normal file
65
lib/crc/arm64/crc64-neon-inner.c
Normal file
@@ -0,0 +1,65 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Accelerated CRC64 (NVMe) using ARM NEON C intrinsics
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <asm/neon-intrinsics.h>
|
||||
|
||||
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
|
||||
|
||||
/* x^191 mod G, x^127 mod G */
|
||||
static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
|
||||
0x21e9761e252621acULL };
|
||||
/* floor(x^127 / G), (G - x^64) / x */
|
||||
static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
|
||||
0x34d926535897936aULL };
|
||||
|
||||
static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
|
||||
vgetq_lane_u64(b, 0)));
|
||||
}
|
||||
|
||||
static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
poly64x2_t l = vreinterpretq_p64_u64(a);
|
||||
poly64x2_t m = vreinterpretq_p64_u64(b);
|
||||
|
||||
return vreinterpretq_u64_p128(vmull_high_p64(l, m));
|
||||
}
|
||||
|
||||
static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
|
||||
vgetq_lane_u64(b, 0)));
|
||||
}
|
||||
|
||||
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
uint64x2_t fold_consts = vld1q_u64(fold_consts_val);
|
||||
uint64x2_t v0 = { crc, 0 };
|
||||
uint64x2_t zero = { };
|
||||
|
||||
for (;;) {
|
||||
v0 ^= vreinterpretq_u64_u8(vld1q_u8(p));
|
||||
|
||||
p += 16;
|
||||
len -= 16;
|
||||
if (len < 16)
|
||||
break;
|
||||
|
||||
v0 = pmull64(fold_consts, v0) ^ pmull64_high(fold_consts, v0);
|
||||
}
|
||||
|
||||
/* Multiply the 128-bit value by x^64 and reduce it back to 128 bits. */
|
||||
v0 = vextq_u64(v0, zero, 1) ^ pmull64_hi_lo(fold_consts, v0);
|
||||
|
||||
/* Final Barrett reduction */
|
||||
uint64x2_t bconsts = vld1q_u64(bconsts_val);
|
||||
uint64x2_t final = pmull64(bconsts, v0);
|
||||
|
||||
v0 ^= vextq_u64(zero, final, 1) ^ pmull64_hi_lo(bconsts, final);
|
||||
|
||||
return vgetq_lane_u64(v0, 1);
|
||||
}
|
||||
28
lib/crc/arm64/crc64.h
Normal file
28
lib/crc/arm64/crc64.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* CRC64 using ARM64 PMULL instructions
|
||||
*/
|
||||
|
||||
#include <linux/cpufeature.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
|
||||
|
||||
#define crc64_be_arch crc64_be_generic
|
||||
|
||||
static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (len >= 128 && cpu_have_named_feature(PMULL) &&
|
||||
likely(may_use_simd())) {
|
||||
size_t chunk = len & ~15;
|
||||
|
||||
scoped_ksimd()
|
||||
crc = crc64_nvme_arm64_c(crc, p, chunk);
|
||||
|
||||
p += chunk;
|
||||
len &= 15;
|
||||
}
|
||||
return crc64_nvme_generic(crc, p, len);
|
||||
}
|
||||
@@ -268,8 +268,7 @@ crc_benchmark(struct kunit *test,
|
||||
}
|
||||
}
|
||||
|
||||
/* crc7_be */
|
||||
|
||||
#if IS_REACHABLE(CONFIG_CRC7)
|
||||
static u64 crc7_be_wrapper(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
/*
|
||||
@@ -294,9 +293,9 @@ static void crc7_be_benchmark(struct kunit *test)
|
||||
{
|
||||
crc_benchmark(test, crc7_be_wrapper);
|
||||
}
|
||||
#endif /* CONFIG_CRC7 */
|
||||
|
||||
/* crc16 */
|
||||
|
||||
#if IS_REACHABLE(CONFIG_CRC16)
|
||||
static u64 crc16_wrapper(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
return crc16(crc, p, len);
|
||||
@@ -318,9 +317,9 @@ static void crc16_benchmark(struct kunit *test)
|
||||
{
|
||||
crc_benchmark(test, crc16_wrapper);
|
||||
}
|
||||
#endif /* CONFIG_CRC16 */
|
||||
|
||||
/* crc_t10dif */
|
||||
|
||||
#if IS_REACHABLE(CONFIG_CRC_T10DIF)
|
||||
static u64 crc_t10dif_wrapper(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
return crc_t10dif_update(crc, p, len);
|
||||
@@ -342,6 +341,9 @@ static void crc_t10dif_benchmark(struct kunit *test)
|
||||
{
|
||||
crc_benchmark(test, crc_t10dif_wrapper);
|
||||
}
|
||||
#endif /* CONFIG_CRC_T10DIF */
|
||||
|
||||
#if IS_REACHABLE(CONFIG_CRC32)
|
||||
|
||||
/* crc32_le */
|
||||
|
||||
@@ -414,6 +416,9 @@ static void crc32c_benchmark(struct kunit *test)
|
||||
{
|
||||
crc_benchmark(test, crc32c_wrapper);
|
||||
}
|
||||
#endif /* CONFIG_CRC32 */
|
||||
|
||||
#if IS_REACHABLE(CONFIG_CRC64)
|
||||
|
||||
/* crc64_be */
|
||||
|
||||
@@ -463,24 +468,35 @@ static void crc64_nvme_benchmark(struct kunit *test)
|
||||
{
|
||||
crc_benchmark(test, crc64_nvme_wrapper);
|
||||
}
|
||||
#endif /* CONFIG_CRC64 */
|
||||
|
||||
static struct kunit_case crc_test_cases[] = {
|
||||
#if IS_REACHABLE(CONFIG_CRC7)
|
||||
KUNIT_CASE(crc7_be_test),
|
||||
KUNIT_CASE(crc7_be_benchmark),
|
||||
#endif
|
||||
#if IS_REACHABLE(CONFIG_CRC16)
|
||||
KUNIT_CASE(crc16_test),
|
||||
KUNIT_CASE(crc16_benchmark),
|
||||
#endif
|
||||
#if IS_REACHABLE(CONFIG_CRC_T10DIF)
|
||||
KUNIT_CASE(crc_t10dif_test),
|
||||
KUNIT_CASE(crc_t10dif_benchmark),
|
||||
#endif
|
||||
#if IS_REACHABLE(CONFIG_CRC32)
|
||||
KUNIT_CASE(crc32_le_test),
|
||||
KUNIT_CASE(crc32_le_benchmark),
|
||||
KUNIT_CASE(crc32_be_test),
|
||||
KUNIT_CASE(crc32_be_benchmark),
|
||||
KUNIT_CASE(crc32c_test),
|
||||
KUNIT_CASE(crc32c_benchmark),
|
||||
#endif
|
||||
#if IS_REACHABLE(CONFIG_CRC64)
|
||||
KUNIT_CASE(crc64_be_test),
|
||||
KUNIT_CASE(crc64_be_benchmark),
|
||||
KUNIT_CASE(crc64_nvme_test),
|
||||
KUNIT_CASE(crc64_nvme_benchmark),
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
||||
|
||||
@@ -48,6 +48,8 @@ CONFIG_CRYPTO_LIB_ENABLE_ALL_FOR_KUNIT=y
|
||||
|
||||
CONFIG_PRIME_NUMBERS=y
|
||||
|
||||
CONFIG_CRC_ENABLE_ALL_FOR_KUNIT=y
|
||||
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_APPARMOR=y
|
||||
CONFIG_SECURITY_LANDLOCK=y
|
||||
|
||||
Reference in New Issue
Block a user