From 38d581ba76545b4712ecdb63f2b2612d5b7601c9 Mon Sep 17 00:00:00 2001 From: Jaime Saguillo Revilla Date: Sun, 15 Feb 2026 12:45:11 +0000 Subject: [PATCH 001/127] proc: array: drop stale FIXME about RCU in task_sig() task_sig() already wraps the SigQ rlimit read in an explicit RCU read-side critical section. Drop the stale FIXME comment and keep using task_ucounts() for the ucounts access. No functional change. Link: https://lkml.kernel.org/r/20260215124511.14227-1-jaime.saguillo@gmail.com Signed-off-by: Jaime Saguillo Revilla Acked-by: Oleg Nesterov Cc: Christian Brauner Signed-off-by: Andrew Morton --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index f447e734612a..90fb0c6b5f99 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -280,7 +280,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = get_nr_threads(p); - rcu_read_lock(); /* FIXME: is this correct? */ + rcu_read_lock(); qsize = get_rlimit_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING); rcu_read_unlock(); qlim = task_rlimit(p, RLIMIT_SIGPENDING); From c970a863ac17386fe56d493cb56cbd0944f1351b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Sat, 14 Feb 2026 15:08:54 +0100 Subject: [PATCH 002/127] scripts/spelling.txt: add "binded||bound" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The correct passive of "to bind" is "bound", not "binded". This is often used in the context of the BSD socket bind(2) operation. Link: https://lkml.kernel.org/r/20260214140854.42247-1-gnoack3000@gmail.com Signed-off-by: Günther Noack Signed-off-by: Andrew Morton --- scripts/spelling.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 1e89b92c2f9a..e164d147f541 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -257,6 +257,7 @@ begining||beginning beter||better betweeen||between bianries||binaries +binded||bound bitmast||bitmask bitwiedh||bitwidth boardcast||broadcast From a98621a0f187a934c115dcfe79a49520ae892111 Mon Sep 17 00:00:00 2001 From: Michal Grzedzicki Date: Fri, 13 Feb 2026 11:39:59 -0800 Subject: [PATCH 003/127] unshare: fix nsproxy leak in ksys_unshare() on set_cred_ucounts() failure When set_cred_ucounts() fails in ksys_unshare() new_nsproxy is leaked. Let's call put_nsproxy() if that happens. Link: https://lkml.kernel.org/r/20260213193959.2556730-1-mge@meta.com Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") Signed-off-by: Michal Grzedzicki Reviewed-by: Andrew Morton Cc: Alexey Gladkov (Intel) Cc: Ben Segall Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: "Liam R. Howlett" Cc: Lorenzo Stoakes (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/fork.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index bc2bf58b93b6..1ec0caea6a7e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3174,11 +3174,10 @@ int ksys_unshare(unsigned long unshare_flags) new_cred, new_fs); if (err) goto bad_unshare_cleanup_cred; - if (new_cred) { err = set_cred_ucounts(new_cred); if (err) - goto bad_unshare_cleanup_cred; + goto bad_unshare_cleanup_nsproxy; } if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { @@ -3194,8 +3193,10 @@ int ksys_unshare(unsigned long unshare_flags) shm_init_task(current); } - if (new_nsproxy) + if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); + new_nsproxy = NULL; + } task_lock(current); @@ -3224,13 +3225,15 @@ int ksys_unshare(unsigned long unshare_flags) perf_event_namespaces(current); +bad_unshare_cleanup_nsproxy: + if (new_nsproxy) + put_nsproxy(new_nsproxy); bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); - bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); From 3f80aa1a2a44298c59e8e2ab9b10d9971cfd8c48 Mon Sep 17 00:00:00 2001 From: Valtteri Koskivuori Date: Thu, 12 Feb 2026 23:39:33 +0200 Subject: [PATCH 004/127] scripts/bloat-o-meter: rename file arguments to match output The output of bloat-o-meter already uses the words 'old' and 'new' for symbol size in the table header, so reflect that in the corresponding argument names. Link: https://lkml.kernel.org/r/20260212213941.3984330-1-vkoskiv@gmail.com Signed-off-by: Valtteri Koskivuori Signed-off-by: Andrew Morton --- scripts/bloat-o-meter | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index db5dd18dc2d5..9b4fb996d95b 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -18,8 +18,8 @@ group.add_argument('-c', help='categorize output based on symbol type', action=' group.add_argument('-d', help='Show delta of Data Section', action='store_true') group.add_argument('-t', help='Show delta of text Section', action='store_true') parser.add_argument('-p', dest='prefix', help='Arch prefix for the tool being used. Useful in cross build scenarios') -parser.add_argument('file1', help='First file to compare') -parser.add_argument('file2', help='Second file to compare') +parser.add_argument('file_old', help='First file to compare') +parser.add_argument('file_new', help='Second file to compare') args = parser.parse_args() @@ -86,7 +86,7 @@ def calc(oldfile, newfile, format): def print_result(symboltype, symbolformat): grow, shrink, add, remove, up, down, delta, old, new, otot, ntot = \ - calc(args.file1, args.file2, symbolformat) + calc(args.file_old, args.file_new, symbolformat) print("add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ (add, remove, grow, shrink, up, -down, up-down)) From a75d207916de0909e2244bc66a44d72fadbcf383 Mon Sep 17 00:00:00 2001 From: Rio Date: Fri, 20 Feb 2026 20:45:00 +0530 Subject: [PATCH 005/127] kernel/panic: increase buffer size for verbose taint logging The verbose 'Tainted: ...' string in print_tainted_seq can total to 327 characters while the buffer defined in _print_tainted is 320 bytes. Increase its size to 350 characters to hold all flags, along with some headroom. [akpm@linux-foundation.org: fix spello, add comment] Link: https://lkml.kernel.org/r/20260220151500.13585-1-rioo.tsukatsukii@gmail.com Signed-off-by: Rio Cc: Joel Granados Cc: Petr Mladek Cc: Wang Jinchao Signed-off-by: Andrew Morton --- kernel/panic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index c78600212b6c..75368738d32d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -801,6 +801,8 @@ EXPORT_SYMBOL(panic); * Documentation/admin-guide/tainted-kernels.rst, including its * small shell script that prints the TAINT_FLAGS_COUNT bits of * /proc/sys/kernel/tainted. + * + * Also, update TAINT_BUF_MAX below. */ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G'), @@ -854,10 +856,12 @@ static void print_tainted_seq(struct seq_buf *s, bool verbose) } } +/* 350 can accommodate all taint flags in verbose mode, with some headroom */ +#define TAINT_BUF_MAX 350 + static const char *_print_tainted(bool verbose) { - /* FIXME: what should the size be? */ - static char buf[sizeof(taint_flags)]; + static char buf[TAINT_BUF_MAX]; struct seq_buf s; BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); From a9dff0d0d11ce9aeebdd52ecf1469895e336c87e Mon Sep 17 00:00:00 2001 From: Rio Date: Sun, 22 Feb 2026 19:38:04 +0530 Subject: [PATCH 006/127] kernel/panic: allocate taint string buffer dynamically The buffer used to hold the taint string is statically allocated, which requires updating whenever a new taint flag is added. Instead, allocate the exact required length at boot once the allocator is available in an init function. The allocation sums the string lengths in taint_flags[], along with space for separators and formatting. print_tainted() is switched to use this dynamically allocated buffer. If allocation fails, print_tainted() warns about the failure and continues to use the original static buffer as a fallback. Link: https://lkml.kernel.org/r/20260222140804.22225-1-rioo.tsukatsukii@gmail.com Signed-off-by: Rio Cc: Joel Granados Cc: Petr Mladek Cc: Wang Jinchao Signed-off-by: Andrew Morton --- kernel/panic.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 75368738d32d..5d498ff8a18b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -802,7 +802,7 @@ EXPORT_SYMBOL(panic); * small shell script that prints the TAINT_FLAGS_COUNT bits of * /proc/sys/kernel/tainted. * - * Also, update TAINT_BUF_MAX below. + * Also, update INIT_TAINT_BUF_MAX below. */ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G'), @@ -856,17 +856,58 @@ static void print_tainted_seq(struct seq_buf *s, bool verbose) } } -/* 350 can accommodate all taint flags in verbose mode, with some headroom */ -#define TAINT_BUF_MAX 350 +/* The initial buffer can accommodate all taint flags in verbose + * mode, with some headroom. Once the allocator is available, the + * exact size is allocated dynamically; the initial buffer remains + * as a fallback if allocation fails. + * + * The verbose taint string currently requires up to 327 characters. + */ +#define INIT_TAINT_BUF_MAX 350 + +static char init_taint_buf[INIT_TAINT_BUF_MAX]; +static char *taint_buf = init_taint_buf; +static size_t taint_buf_size = INIT_TAINT_BUF_MAX; + +static __init int alloc_taint_buf(void) +{ + int i; + char *buf; + size_t size = 0; + + size += sizeof("Tainted: ") - 1; + for (i = 0; i < TAINT_FLAGS_COUNT; i++) { + size += 2; /* For ", " */ + size += 4; /* For "[%c]=" */ + size += strlen(taint_flags[i].desc); + } + + size += 1; /* For NULL terminator */ + + buf = kmalloc(size, GFP_KERNEL); + + if (!buf) { + /* Allocation may fail; this warning explains possibly + * truncated taint strings + */ + pr_warn_once("taint string buffer allocation failed, using fallback buffer\n"); + return 0; + } + + taint_buf = buf; + taint_buf_size = size; + + return 0; +} +postcore_initcall(alloc_taint_buf); static const char *_print_tainted(bool verbose) { - static char buf[TAINT_BUF_MAX]; struct seq_buf s; BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); - seq_buf_init(&s, buf, sizeof(buf)); + seq_buf_init(&s, taint_buf, taint_buf_size); print_tainted_seq(&s, verbose); From 48d76a8282c9d99ec123d5f09cf6e485e5cb8734 Mon Sep 17 00:00:00 2001 From: Rio Date: Mon, 23 Feb 2026 09:29:14 +0530 Subject: [PATCH 007/127] kernel/panic: mark init_taint_buf as __initdata and panic instead of warning in alloc_taint_buf() However there's a convention of assuming that __init-time allocations cannot fail. Because if a kmalloc() were to fail at this time, the kernel is hopelessly messed up anyway. So simply panic() if that kmalloc failed, then make that 350-byte buffer __initdata. Link: https://lkml.kernel.org/r/20260223035914.4033-1-rioo.tsukatsukii@gmail.com Signed-off-by: Rio Cc: Joel Granados Cc: Petr Mladek Cc: Wang Jinchao Signed-off-by: Andrew Morton --- kernel/panic.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 5d498ff8a18b..20feada5319d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -865,8 +865,8 @@ static void print_tainted_seq(struct seq_buf *s, bool verbose) */ #define INIT_TAINT_BUF_MAX 350 -static char init_taint_buf[INIT_TAINT_BUF_MAX]; -static char *taint_buf = init_taint_buf; +static char init_taint_buf[INIT_TAINT_BUF_MAX] __initdata; +static char *taint_buf __refdata = init_taint_buf; static size_t taint_buf_size = INIT_TAINT_BUF_MAX; static __init int alloc_taint_buf(void) @@ -887,11 +887,7 @@ static __init int alloc_taint_buf(void) buf = kmalloc(size, GFP_KERNEL); if (!buf) { - /* Allocation may fail; this warning explains possibly - * truncated taint strings - */ - pr_warn_once("taint string buffer allocation failed, using fallback buffer\n"); - return 0; + panic("Failed to allocate taint string buffer"); } taint_buf = buf; From a75ae1d5bd15abca737d936926e9dd4b5dc126c1 Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 12 Feb 2026 15:40:04 +0100 Subject: [PATCH 008/127] scripts/spelling.txt: sort alphabetically Easier to add new entries. It was sorted when added in 66b47b4a9dad0, but later got wrong order for few entries. Sorted with en_US.UTF-8 locale. Link: https://lkml.kernel.org/r/20260212144005.45052-1-pvorel@suse.cz Signed-off-by: Petr Vorel Cc: Jonathan Camerom Cc: WangYuli Signed-off-by: Andrew Morton --- scripts/spelling.txt | 334 +++++++++++++++++++++---------------------- 1 file changed, 167 insertions(+), 167 deletions(-) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index e164d147f541..84b05f57c176 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -57,8 +57,8 @@ acknowledgement||acknowledgment ackowledge||acknowledge ackowledged||acknowledged acording||according -activete||activate actived||activated +activete||activate actualy||actually actvie||active acumulating||accumulating @@ -66,12 +66,12 @@ acumulative||accumulative acumulator||accumulator acutally||actually adapater||adapter +adddress||address adderted||asserted addional||additional additionaly||additionally additonal||additional addres||address -adddress||address addreses||addresses addresss||address addrress||address @@ -95,9 +95,9 @@ alegorical||allegorical algined||aligned algorith||algorithm algorithmical||algorithmically +algorithmn||algorithm algoritm||algorithm algoritms||algorithms -algorithmn||algorithm algorrithm||algorithm algorritm||algorithm aligment||alignment @@ -128,20 +128,20 @@ amount of times||number of times amout||amount amplifer||amplifier amplifyer||amplifier -an union||a union -an user||a user -an userspace||a userspace -an one||a one analysator||analyzer ang||and anniversery||anniversary annoucement||announcement anomolies||anomalies anomoly||anomaly +an one||a one anonynous||anonymous +an union||a union +an user||a user +an userspace||a userspace anway||anyway -aplication||application apeared||appeared +aplication||application appearence||appearance applicaion||application appliction||application @@ -155,8 +155,8 @@ approriately||appropriately apropriate||appropriate aquainted||acquainted aquired||acquired -aquisition||acquisition aquires||acquires +aquisition||acquisition arbitary||arbitrary architechture||architecture archtecture||architecture @@ -189,30 +189,30 @@ assum||assume assumtpion||assumption asume||assume asuming||assuming -asycronous||asynchronous asychronous||asynchronous -asynchnous||asynchronous -asynchrnous||asynchronous -asynchronus||asynchronous -asynchromous||asynchronous +asycronous||asynchronous asymetric||asymmetric asymmeric||asymmetric +asynchnous||asynchronous +asynchrnous||asynchronous +asynchromous||asynchronous +asynchronus||asynchronous +atempt||attempt atleast||at least atomatically||automatically atomicly||atomically -atempt||attempt atrributes||attributes attachement||attachment attatch||attach attched||attached attemp||attempt -attemps||attempts attemping||attempting +attemps||attempts attepmpt||attempt attnetion||attention attruibutes||attributes -authentification||authentication authenicated||authenticated +authentification||authentication automaticaly||automatically automaticly||automatically automatize||automate @@ -288,19 +288,19 @@ calucate||calculate calulate||calculate cancelation||cancellation cancle||cancel -cant||can't -cant'||can't -canot||cannot -cann't||can't cannnot||cannot +cann't||can't +canot||cannot +cant'||can't +cant||can't capabiity||capability capabilites||capabilities capabilties||capabilities capabilty||capability capabitilies||capabilities capablity||capability -capatibilities||capabilities capapbilities||capabilities +capatibilities||capabilities captuer||capture caputure||capture carefuly||carefully @@ -308,9 +308,9 @@ cariage||carriage casued||caused catagory||category cehck||check +chache||cache challange||challenge challanges||challenges -chache||cache chanell||channel changable||changeable chanined||chained @@ -348,6 +348,7 @@ colescing||coalescing collapsable||collapsible colorfull||colorful comand||command +comaptible||compatible comit||commit commerical||commercial comming||coming @@ -358,10 +359,6 @@ committ||commit commmand||command commnunication||communication commoditiy||commodity -comsume||consume -comsumer||consumer -comsuming||consuming -comaptible||compatible compability||compatibility compaibility||compatibility comparsion||comparison @@ -377,22 +374,25 @@ compleatly||completely completition||completion completly||completely complient||compliant -componnents||components compoment||component +componnents||components comppatible||compatible compres||compress compresion||compression compresser||compressor comression||compression +comsume||consume comsumed||consumed +comsumer||consumer +comsuming||consuming comunicate||communicate comunication||communication conbination||combination concurent||concurrent conditionaly||conditionally conditon||condition -condtion||condition condtional||conditional +condtion||condition conected||connected conector||connector configed||configured @@ -429,13 +429,13 @@ continous||continuous continously||continuously continueing||continuing contiuous||continuous -contraints||constraints -contruct||construct contol||control contoller||controller +contraints||constraints controled||controlled controler||controller controll||control +contruct||construct contruction||construction contry||country conuntry||country @@ -466,10 +466,9 @@ debouce||debounce decendant||descendant decendants||descendants decompres||decompress -decsribed||described decrese||decrease decription||description -detault||default +decsribed||described dectected||detected defailt||default deferal||deferral @@ -483,9 +482,9 @@ defintion||definition defintions||definitions defualt||default defult||default -deintializing||deinitializing -deintialize||deinitialize deintialized||deinitialized +deintialize||deinitialize +deintializing||deinitializing deivce||device delared||declared delare||declare @@ -495,8 +494,8 @@ delemiter||delimiter deley||delay delibrately||deliberately delievered||delivered -demodualtor||demodulator demension||dimension +demodualtor||demodulator dependancies||dependencies dependancy||dependency dependant||dependent @@ -506,15 +505,15 @@ depreacte||deprecate desactivate||deactivate desciptor||descriptor desciptors||descriptors -descritpor||descriptor descripto||descriptor descripton||description descrition||description +descritpor||descriptor descritptor||descriptor desctiptor||descriptor +desination||destination desriptor||descriptor desriptors||descriptors -desination||destination destionation||destination destoried||destroyed destory||destroy @@ -522,6 +521,7 @@ destoryed||destroyed destorys||destroys destroied||destroyed detabase||database +detault||default deteced||detected detecion||detection detectt||detect @@ -536,55 +536,54 @@ deveolpment||development devided||divided deviece||device devision||division -diable||disable diabled||disabled +diable||disable dicline||decline +diconnected||disconnected dictionnary||dictionary didnt||didn't diferent||different -differrence||difference -diffrent||different differenciate||differentiate +differrence||difference diffreential||differential +diffrent||different diffrentiate||differentiate difinition||definition digial||digital dimention||dimension dimesions||dimensions -diconnected||disconnected -disabed||disabled -disasembler||disassembler -disble||disable -disgest||digest -disired||desired -dispalying||displaying -dissable||disable -dissapeared||disappeared diplay||display -directon||direction direcly||directly +directon||direction direectly||directly diregard||disregard -disassocation||disassociation -disassocative||disassociative +disabed||disabled disapear||disappear disapeared||disappeared disappared||disappeared -disbale||disable +disasembler||disassembler +disassocation||disassociation +disassocative||disassociative disbaled||disabled -disble||disable +disbale||disable disbled||disabled +disble||disable +disble||disable disconnet||disconnect discontinous||discontinuous +disgest||digest disharge||discharge +disired||desired disnabled||disabled +dispalying||displaying dispertion||dispersion +dissable||disable +dissapeared||disappeared dissapears||disappears dissconect||disconnect distiction||distinction divisable||divisible divsiors||divisors -dsiabled||disabled docuentation||documentation documantation||documentation documentaion||documentation @@ -599,6 +598,7 @@ downlads||downloads droped||dropped droput||dropout druing||during +dsiabled||disabled dyanmic||dynamic dynmaic||dynamic eanable||enable @@ -622,20 +622,20 @@ enble||enable enchanced||enhanced encorporating||incorporating encrupted||encrypted -encrypiton||encryption encryped||encrypted +encrypiton||encryption encryptio||encryption endianess||endianness -enpoint||endpoint enhaced||enhanced enlightnment||enlightenment +enocded||encoded +enought||enough +enpoint||endpoint enqueing||enqueuing +enterily||entirely entires||entries entites||entities entrys||entries -enocded||encoded -enought||enough -enterily||entirely enviroiment||environment enviroment||environment environement||environment @@ -654,8 +654,8 @@ evalute||evaluate evalutes||evaluates evalution||evaluation evaulated||evaluated -excecutable||executable excceed||exceed +excecutable||executable exceded||exceeded exceds||exceeds exceeed||exceed @@ -669,41 +669,41 @@ exeuction||execution existance||existence existant||existent exixt||exist -exsits||exists exlcude||exclude exlcuding||excluding exlcusive||exclusive -exlusive||exclusive exlicitly||explicitly +exlusive||exclusive exmaple||example expecially||especially experies||expires explicite||explicit -explicity||explicitly explicitely||explicitly -explict||explicit +explicity||explicitly explictely||explicitly +explict||explicit explictly||explicitly expresion||expression exprienced||experienced exprimental||experimental -extened||extended +exsits||exists exteneded||extended +extened||extended extensability||extensibility -extention||extension extenstion||extension +extention||extension extracter||extractor faied||failed faield||failed -faild||failed failded||failed +faild||failed failer||failure -faill||fail failied||failed +faill||fail faillure||failure +failng||failing failue||failure failuer||failure -failng||failing faireness||fairness falied||failed faliure||failure @@ -718,15 +718,15 @@ fetcing||fetching fileystem||filesystem fimrware||firmware fimware||firmware +finanize||finalize +findn||find +finilizes||finalizes +finsih||finish firmare||firmware firmaware||firmware firtly||firstly firware||firmware firwmare||firmware -finanize||finalize -findn||find -finilizes||finalizes -finsih||finish fliter||filter flusing||flushing folloing||following @@ -743,9 +743,9 @@ forwared||forwarded frambuffer||framebuffer framming||framing framwork||framework +frequancy||frequency frequence||frequency frequncy||frequency -frequancy||frequency frome||from fronend||frontend fucntion||function @@ -767,9 +767,9 @@ gatable||gateable gateing||gating gauage||gauge gaurenteed||guaranteed -generiously||generously genereate||generate genereted||generated +generiously||generously genric||generic gerenal||general geting||getting @@ -791,18 +791,17 @@ hanlde||handle hanled||handled happend||happened hardare||hardware -harware||hardware hardward||hardware +harware||hardware havind||having +hearbeat||heartbeat heigth||height +heirachy||hierarchy heirarchically||hierarchically heirarchy||hierarchy -heirachy||hierarchy helpfull||helpful -hearbeat||heartbeat heterogenous||heterogeneous hexdecimal||hexadecimal -hybernate||hibernate hiearchy||hierarchy hierachy||hierarchy hierarchie||hierarchy @@ -810,14 +809,14 @@ homogenous||homogeneous horizental||horizontal howver||however hsould||should +hybernate||hibernate hypervior||hypervisor hypter||hyper idel||idle identidier||identifier iligal||illegal -illigal||illegal illgal||illegal -iomaped||iomapped +illigal||illegal imblance||imbalance immeadiately||immediately immedaite||immediate @@ -832,13 +831,14 @@ implemantation||implementation implemenation||implementation implementaiton||implementation implementated||implemented -implemention||implementation implementd||implemented +implemention||implementation implemetation||implementation implemntation||implementation implentation||implementation implmentation||implementation implmenting||implementing +inavlid||invalid incative||inactive incomming||incoming incompaitiblity||incompatibility @@ -870,9 +870,9 @@ infromation||information ingore||ignore inheritence||inheritance inital||initial -initalized||initialized initalised||initialized initalise||initialize +initalized||initialized initalize||initialize initation||initiation initators||initiators @@ -880,20 +880,20 @@ initialiazation||initialization initializationg||initialization initializiation||initialization initializtion||initialization -initialze||initialize initialzed||initialized +initialze||initialize initialzing||initializing initilization||initialization +initilized||initialized initilize||initialize initliaze||initialize -initilized||initialized inofficial||unofficial inrerface||interface insititute||institute instace||instance instal||install -instanciate||instantiate instanciated||instantiated +instanciate||instantiate instuments||instruments insufficent||insufficient intead||instead @@ -912,16 +912,16 @@ intergrated||integrated intermittant||intermittent internel||internal interoprability||interoperability -interuupt||interrupt -interupt||interrupt -interupts||interrupts -interurpt||interrupt interrface||interface interrrupt||interrupt interrup||interrupt interrups||interrupts interruptted||interrupted interupted||interrupted +interupt||interrupt +interupts||interrupts +interurpt||interrupt +interuupt||interrupt intiailized||initialized intial||initial intialisation||initialisation @@ -935,18 +935,18 @@ intrerrupt||interrupt intrrupt||interrupt intterrupt||interrupt intuative||intuitive -inavlid||invalid invaid||invalid invaild||invalid invailid||invalid -invald||invalid invalde||invalid +invald||invalid invalide||invalid invalidiate||invalidate invalud||invalid invididual||individual invokation||invocation invokations||invocations +iomaped||iomapped ireelevant||irrelevant irrelevent||irrelevant isnt||isn't @@ -992,11 +992,11 @@ losted||lost maangement||management machinary||machinery maibox||mailbox +mailformed||malformed maintainance||maintenance maintainence||maintenance maintan||maintain makeing||making -mailformed||malformed malplaced||misplaced malplace||misplace managable||manageable @@ -1006,21 +1006,22 @@ mangement||management manger||manager manoeuvering||maneuvering manufaucturing||manufacturing -mappping||mapping maping||mapping +mappping||mapping matchs||matches mathimatical||mathematical mathimatic||mathematic mathimatics||mathematics -maxmium||maximum maximium||maximum maxium||maximum +maxmium||maximum mechamism||mechanism mechanim||mechanism meetign||meeting memeory||memory memmber||member memoery||memory +memomry||memory memroy||memory ment||meant mergable||mergeable @@ -1037,19 +1038,19 @@ migrateable||migratable miliseconds||milliseconds millenium||millennium milliseonds||milliseconds -minimim||minimum -minium||minimum minimam||minimum +minimim||minimum minimun||minimum +minium||minimum miniumum||minimum minumum||minimum misalinged||misaligned miscelleneous||miscellaneous misformed||malformed -mispelled||misspelled -mispelt||misspelt mising||missing mismactch||mismatch +mispelled||misspelled +mispelt||misspelt missign||missing missmanaged||mismanaged missmatch||mismatch @@ -1062,18 +1063,17 @@ modifer||modifier modul||module modulues||modules momery||memory -memomry||memory monitring||monitoring monochorome||monochrome monochromo||monochrome monocrome||monochrome mopdule||module mroe||more -mulitplied||multiplied muliple||multiple -multipler||multiplier +mulitplied||multiplied multidimensionnal||multidimensional multipe||multiple +multipler||multiplier multple||multiple mumber||number muticast||multicast @@ -1095,6 +1095,7 @@ nerver||never nescessary||necessary nessessary||necessary none existent||non-existent +notfify||notify noticable||noticeable notication||notification notications||notifications @@ -1102,7 +1103,6 @@ notifcations||notifications notifed||notified notifer||notifier notity||notify -notfify||notify nubmer||number numebr||number numer||number @@ -1120,10 +1120,10 @@ occurence||occurrence occure||occurred occuring||occurring ocurrence||occurrence -offser||offset offet||offset offlaod||offload offloded||offloaded +offser||offset offseting||offsetting oflload||offload omited||omitted @@ -1142,25 +1142,25 @@ optionnal||optional optmizations||optimizations orientatied||orientated orientied||oriented -orignal||original originial||original +orignal||original orphanded||orphaned otherise||otherwise ouput||output oustanding||outstanding -overaall||overall -overhread||overhead -overlaping||overlapping oveflow||overflow +overaall||overall overflw||overflow -overlfow||overflow +overhread||overhead overide||override +overlaping||overlapping +overlfow||overflow overrided||overridden overriden||overridden overrrun||overrun overun||overrun -overwritting||overwriting overwriten||overwritten +overwritting||overwriting pacakge||package pachage||package packacge||package @@ -1170,11 +1170,11 @@ packtes||packets pakage||package paket||packet pallette||palette -paln||plan palne||plane +paln||plan paramameters||parameters -paramaters||parameters paramater||parameter +paramaters||parameters paramenters||parameters parametes||parameters parametised||parametrised @@ -1242,8 +1242,6 @@ prefered||preferred prefferably||preferably prefitler||prefilter preform||perform -previleged||privileged -previlege||privilege premption||preemption prepaired||prepared prepate||prepare @@ -1251,6 +1249,8 @@ preperation||preparation preprare||prepare pressre||pressure presuambly||presumably +previleged||privileged +previlege||privilege previosuly||previously previsously||previously primative||primitive @@ -1259,17 +1259,17 @@ priorty||priority priting||printing privilaged||privileged privilage||privilege -priviledge||privilege priviledged||privileged +priviledge||privilege priviledges||privileges privleges||privileges -probaly||probably probabalistic||probabilistic +probaly||probably procceed||proceed proccesors||processors procesed||processed -proces||process procesing||processing +proces||process processessing||processing processess||processes processpr||processor @@ -1289,6 +1289,7 @@ progresss||progress prohibitted||prohibited prohibitting||prohibiting promiscous||promiscuous +promixity||proximity promps||prompts pronnounced||pronounced prononciation||pronunciation @@ -1297,15 +1298,14 @@ pronunce||pronounce propery||property propigate||propagate propigation||propagation -propogation||propagation propogate||propagate +propogation||propagation prosess||process protable||portable protcol||protocol protecion||protection protedcted||protected protocoll||protocol -promixity||proximity psudo||pseudo psuedo||pseudo psychadelic||psychedelic @@ -1334,8 +1334,8 @@ recieves||receives recieving||receiving recogniced||recognised recognizeable||recognizable -recompte||recompute recommanded||recommended +recompte||recompute recyle||recycle redect||reject redircet||redirect @@ -1345,8 +1345,8 @@ reename||rename refcounf||refcount refence||reference refered||referred -referencce||reference referenace||reference +referencce||reference refererence||reference refering||referring refernces||references @@ -1354,8 +1354,8 @@ refernnce||reference refrence||reference regiser||register registed||registered -registerd||registered registeration||registration +registerd||registered registeresd||registered registerred||registered registes||registers @@ -1372,8 +1372,8 @@ reloade||reload remoote||remote remore||remote removeable||removable -repective||respective repectively||respectively +repective||respective replacable||replaceable replacments||replacements replys||replies @@ -1390,8 +1390,8 @@ requieres||requires requirment||requirement requred||required requried||required -requst||request requsted||requested +requst||request reregisteration||reregistration reseting||resetting reseved||reserved @@ -1413,11 +1413,11 @@ retransmited||retransmitted retreived||retrieved retreive||retrieve retreiving||retrieving -retrive||retrieve retrived||retrieved +retrive||retrieve retrun||return -retun||return retuned||returned +retun||return reudce||reduce reuest||request reuqest||request @@ -1465,9 +1465,9 @@ seperate||separate seperatly||separately seperator||separator sepperate||separate -seqeunce||sequence -seqeuncer||sequencer seqeuencer||sequencer +seqeuncer||sequencer +seqeunce||sequence sequece||sequence sequemce||sequence sequencial||sequential @@ -1506,8 +1506,8 @@ soley||solely soluation||solution souce||source speach||speech -specfic||specific specfication||specification +specfic||specific specfield||specified speciefied||specified specifc||specific @@ -1516,8 +1516,8 @@ specificatin||specification specificaton||specification specificed||specified specifing||specifying -specifiy||specify specifiying||specifying +specifiy||specify speficied||specified speicify||specify speling||spelling @@ -1544,23 +1544,23 @@ stoppped||stopped straming||streaming struc||struct structres||structures -stuct||struct strucuture||structure +stuct||struct stucture||structure sturcture||structure subdirectoires||subdirectories suble||subtle -substract||subtract submited||submitted submition||submission +substract||subtract succeded||succeeded -suceed||succeed -succesfuly||successfully succesfully||successfully succesful||successful +succesfuly||successfully successed||succeeded successfull||successful successfuly||successfully +suceed||succeed sucessfully||successfully sucessful||successful sucess||success @@ -1569,9 +1569,9 @@ superseeded||superseded suplied||supplied suported||supported suport||support -supportet||supported suppored||supported supporing||supporting +supportet||supported supportin||supporting suppoted||supported suppported||supported @@ -1582,27 +1582,27 @@ surpressed||suppressed surpresses||suppresses susbsystem||subsystem suspeneded||suspended -suspsend||suspend suspicously||suspiciously +suspsend||suspend swaping||swapping switchs||switches -swith||switch swithable||switchable -swithc||switch swithced||switched swithcing||switching +swithc||switch swithed||switched swithing||switching +swith||switch swtich||switch +sychronization||synchronization +sychronously||synchronously syfs||sysfs symetric||symmetric synax||syntax synchonized||synchronized -sychronization||synchronization -sychronously||synchronously synchronuously||synchronously -syncronize||synchronize syncronized||synchronized +syncronize||synchronize syncronizing||synchronizing syncronus||synchronous syste||system @@ -1611,16 +1611,17 @@ sythesis||synthesis tagert||target taht||that tained||tainted -tarffic||traffic +tansition||transition tansmit||transmit +tarffic||traffic targetted||targeted targetting||targeting taskelt||tasklet teh||the temeprature||temperature temorary||temporary -temproarily||temporarily temperture||temperature +temproarily||temporarily theads||threads therfore||therefore thier||their @@ -1630,23 +1631,20 @@ threshhold||threshold thresold||threshold throtting||throttling throught||through -tansition||transition -trackling||tracking -troughput||throughput -trys||tries thses||these -tiggers||triggers tiggered||triggered tiggerring||triggering -tipically||typically +tiggers||triggers timeing||timing timming||timing timout||timeout +tipically||typically tmis||this tolarance||tolerance toogle||toggle torerable||tolerable torlence||tolerance +trackling||tracking traget||target traking||tracking tramsmitted||transmitted @@ -1670,20 +1668,20 @@ trasfer||transfer trasmission||transmission trasmitter||transmitter treshold||threshold -trigged||triggered -triggerd||triggered trigerred||triggered trigerring||triggering +trigged||triggered +triggerd||triggered +troughput||throughput trun||turn +trys||tries tunning||tuning ture||true tyep||type udpate||update -updtes||updates uesd||used -unknwon||unknown uknown||unknown -usccess||success +unamed||unnamed uncommited||uncommitted uncompatible||incompatible uncomressed||uncompressed @@ -1692,6 +1690,7 @@ undeflow||underflow undelying||underlying underun||underrun unecessary||unnecessary +uneeded||unneeded unexecpted||unexpected unexepected||unexpected unexpcted||unexpected @@ -1700,26 +1699,24 @@ unexpeted||unexpected unexpexted||unexpected unfortunatelly||unfortunately unifiy||unify -uniterrupted||uninterrupted uninterruptable||uninterruptible unintialized||uninitialized +uniterrupted||uninterrupted unitialized||uninitialized unkmown||unknown unknonw||unknown unknouwn||unknown unknow||unknown +unknwon||unknown unkown||unknown -unamed||unnamed -uneeded||unneeded -unneded||unneeded +unmached||unmatched unneccecary||unnecessary unneccesary||unnecessary unneccessary||unnecessary unnecesary||unnecessary +unneded||unneeded unneedingly||unnecessarily unnsupported||unsupported -unuspported||unsupported -unmached||unmatched unprecise||imprecise unpriviledged||unprivileged unpriviliged||unprivileged @@ -1727,18 +1724,21 @@ unregester||unregister unresgister||unregister unrgesiter||unregister unsinged||unsigned -unstabel||unstable -unsolicted||unsolicited unsolicitied||unsolicited +unsolicted||unsolicited +unstabel||unstable unsuccessfull||unsuccessful unsuported||unsupported untill||until ununsed||unused unuseful||useless +unuspported||unsupported unvalid||invalid upate||update +updtes||updates upsupported||unsupported upto||up to +usccess||success useable||usable usefule||useful usefull||useful @@ -1760,14 +1760,14 @@ variantions||variations varible||variable varient||variant vaule||value -verbse||verbose veify||verify +verbse||verbose verfication||verification veriosn||version -versoin||version verisons||versions verison||version veritical||vertical +versoin||version verson||version vicefersa||vice-versa virtal||virtual @@ -1780,13 +1780,13 @@ wakeus||wakeups was't||wasn't wathdog||watchdog wating||waiting -wiat||wait wether||whether whataver||whatever whcih||which whenver||whenever wheter||whether whe||when +wiat||wait wierd||weird wihout||without wiil||will From 513d08ace4e51d320ecc19e0a50b1192b217269b Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 12 Feb 2026 15:40:05 +0100 Subject: [PATCH 009/127] scripts/spelling.txt: add "exaclty" typo Link: https://lkml.kernel.org/r/20260212144005.45052-2-pvorel@suse.cz Signed-off-by: Petr Vorel Cc: Jonathan Camerom Cc: WangYuli Signed-off-by: Andrew Morton --- scripts/spelling.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 84b05f57c176..2f2e81dbda03 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -654,6 +654,7 @@ evalute||evaluate evalutes||evaluates evalution||evaluation evaulated||evaluated +exaclty||exactly excceed||exceed excecutable||executable exceded||exceeded From 225ba47fb9ec17440781563ea729f3fe67f1b5b8 Mon Sep 17 00:00:00 2001 From: UYeol Jo Date: Tue, 10 Feb 2026 22:53:59 +0900 Subject: [PATCH 010/127] selftests/ipc: skip msgque test when MSG_COPY is unsupported msgque kselftest uses msgrcv(..., MSG_COPY) to copy messages. When the kernel is built without CONFIG_CHECKPOINT_RESTORE, prepare_copy() is stubbed out and msgrcv() returns -ENOSYS. The test currently reports this as a failure even though it is simply a missing feature/configuration. Skip the test when msgrcv() fails with ENOSYS. Link: https://lkml.kernel.org/r/20260210135359.178636-1-jouyeol8739@gmail.com Signed-off-by: UYeol Jo Cc: Shuah Khan Cc: Wei Yang Signed-off-by: Andrew Morton --- tools/testing/selftests/ipc/msgque.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index e107379d185c..82f73cdae120 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -161,6 +161,9 @@ int dump_queue(struct msgque_data *msgque) ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); if (ret < 0) { + if (errno == ENOSYS) + ksft_exit_skip("MSG_COPY not supported\n"); + ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno); return -errno; } From 617ab884b893032765fbc0ebf656fa3015016648 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 22 Feb 2026 16:23:37 +0100 Subject: [PATCH 011/127] exit: kill unnecessary thread_group_leader() checks in exit_notify() and do_notify_parent() thread_group_empty(tsk) is only possible if tsk is a group leader, and thread_group_empty() already does the thread_group_leader() check. So it makes no sense to check "thread_group_leader() && thread_group_empty()"; thread_group_empty() alone is enough. Link: https://lkml.kernel.org/r/aZsfeegKZPZZszJh@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Mateusz Guzik Cc: Kees Cook Cc; Deepanshu Kartikey Signed-off-by: Andrew Morton --- kernel/exit.c | 8 +++----- kernel/signal.c | 3 +-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index ede3117fa7d4..1f32023d0dbe 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -748,14 +748,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead) tsk->exit_state = EXIT_ZOMBIE; if (unlikely(tsk->ptrace)) { - int sig = thread_group_leader(tsk) && - thread_group_empty(tsk) && - !ptrace_reparented(tsk) ? - tsk->exit_signal : SIGCHLD; + int sig = thread_group_empty(tsk) && !ptrace_reparented(tsk) + ? tsk->exit_signal : SIGCHLD; autoreap = do_notify_parent(tsk, sig); } else if (thread_group_leader(tsk)) { autoreap = thread_group_empty(tsk) && - do_notify_parent(tsk, tsk->exit_signal); + do_notify_parent(tsk, tsk->exit_signal); } else { autoreap = true; /* untraced sub-thread */ diff --git a/kernel/signal.c b/kernel/signal.c index d65d0fe24bfb..ca23059a947d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2178,8 +2178,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) /* do_notify_parent_cldstop should have been called instead. */ WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); - WARN_ON_ONCE(!tsk->ptrace && - (tsk->group_leader != tsk || !thread_group_empty(tsk))); + WARN_ON_ONCE(!tsk->ptrace && !thread_group_empty(tsk)); /* ptraced, or group-leader without sub-threads */ do_notify_pidfd(tsk); From 162e4fd97dc6c4f56c7e94b82651e95a57674091 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 22 Feb 2026 16:24:00 +0100 Subject: [PATCH 012/127] complete_signal: kill always-true "core_state || !SIGNAL_GROUP_EXIT" check The "(signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT))" check in complete_signal() is not obvious at all, and in fact it only adds unnecessary confusion: this condition is always true. prepare_signal() does: if (signal->flags & SIGNAL_GROUP_EXIT) { if (signal->core_state) return sig == SIGKILL; /* * The process is in the middle of dying, drop the signal. */ return false; } This means that "!signal->core_state && (signal->flags & SIGNAL_GROUP_EXIT)" in complete_signal() is never possible. If SIGNAL_GROUP_EXIT is set, prepare_signal() can only return true if signal->core_state is not NULL. Link: https://lkml.kernel.org/r/aZsfkDhnqJ4s1oTs@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Kees Cook Cc: Mateusz Guzik Cc; Deepanshu Kartikey Signed-off-by: Andrew Morton --- kernel/signal.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index ca23059a947d..86aad7badb9a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1000,9 +1000,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type) * Found a killable thread. If the signal will be fatal, * then start taking the whole group down immediately. */ - if (sig_fatal(p, sig) && - (signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT)) && - !sigismember(&t->real_blocked, sig) && + if (sig_fatal(p, sig) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || !p->ptrace)) { /* * This signal will be fatal to the whole group. From 380369ea2e9d3c41855b0d8c41898dcd564209fd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 15 Dec 2025 15:21:52 +0100 Subject: [PATCH 013/127] fork: replace simple_strtoul with kstrtoul in coredump_filter_setup Replace simple_strtoul() with the recommended kstrtoul() for parsing the 'coredump_filter=' boot parameter. Check the return value of kstrtoul() and reject invalid values. This adds error handling while preserving behavior for existing values, and removes use of the deprecated simple_strtoul() helper. The current code silently sets 'default_dump_filter = 0' if parsing fails, instead of leaving the default value (MMF_DUMP_FILTER_DEFAULT) unchanged. Rename the static variable 'default_dump_filter' to 'coredump_filter' since it does not necessarily contain the default value and the current name can be misleading. Link: https://lkml.kernel.org/r/20251215142152.4082-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Reviewed-by: Andrew Morton Cc: Ben Segall Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/fork.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1ec0caea6a7e..db02a301d0c0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1014,13 +1014,14 @@ free_tsk: __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); -static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; +static unsigned long coredump_filter = MMF_DUMP_FILTER_DEFAULT; static int __init coredump_filter_setup(char *s) { - default_dump_filter = - (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & - MMF_DUMP_FILTER_MASK; + if (kstrtoul(s, 0, &coredump_filter)) + return 0; + coredump_filter <<= MMF_DUMP_FILTER_SHIFT; + coredump_filter &= MMF_DUMP_FILTER_MASK; return 1; } @@ -1106,7 +1107,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, __mm_flags_overwrite_word(mm, mmf_init_legacy_flags(flags)); mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { - __mm_flags_overwrite_word(mm, default_dump_filter); + __mm_flags_overwrite_word(mm, coredump_filter); mm->def_flags = 0; } From c02474fe1a62bb20548abd37b1d492a43a8b906e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 28 Feb 2026 09:51:36 +0100 Subject: [PATCH 014/127] crash_dump: remove redundant less-than-zero check 'key_count' is an 'unsigned int' and cannot be less than zero. Remove the redundant condition. Link: https://lkml.kernel.org/r/20260228085136.861971-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_dump_dm_crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index a20d4097744a..5ce958d069dd 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -115,7 +115,7 @@ static int restore_dm_crypt_keys_to_thread_keyring(void) addr = dm_crypt_keys_addr; dm_crypt_keys_read((char *)&key_count, sizeof(key_count), &addr); - if (key_count < 0 || key_count > KEY_NUM_MAX) { + if (key_count > KEY_NUM_MAX) { kexec_dprintk("Failed to read the number of dm-crypt keys\n"); return -1; } From 26430489b10107f0e45333dcf115ec631bd57b08 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 28 Feb 2026 00:04:21 +0100 Subject: [PATCH 015/127] crash_dump: fix typo in function name read_key_from_user_keying The function read_key_from_user_keying() is missing an 'r' in its name. Fix the typo by renaming it to read_key_from_user_keyring(). Link: https://lkml.kernel.org/r/20260227230422.859423-1-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Reviewed-by: Andrew Morton Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_dump_dm_crypt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 5ce958d069dd..03fce24f1947 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -139,7 +139,7 @@ static int restore_dm_crypt_keys_to_thread_keyring(void) return 0; } -static int read_key_from_user_keying(struct dm_crypt_key *dm_key) +static int read_key_from_user_keyring(struct dm_crypt_key *dm_key) { const struct user_key_payload *ukp; struct key *key; @@ -387,7 +387,7 @@ static int build_keys_header(void) strscpy(keys_header->keys[i].key_desc, key->description, KEY_DESC_MAX_LEN); - r = read_key_from_user_keying(&keys_header->keys[i]); + r = read_key_from_user_keyring(&keys_header->keys[i]); if (r != 0) { kexec_dprintk("Failed to read key %s\n", keys_header->keys[i].key_desc); From 040261b118420c523600f7e0421f76143943f948 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Feb 2026 13:03:41 +0100 Subject: [PATCH 016/127] pid: make sub-init creation retryable Patch series "pid: make sub-init creation retryable". This patch (of 2): Currently we allow only one attempt to create init in a new namespace. If the first fork() fails after alloc_pid() succeeds, free_pid() clears PIDNS_ADDING and thus disables further PID allocations. Nowadays this looks like an unnecessary limitation. The original reason to handle "case PIDNS_ADDING" in free_pid() is gone, most probably after commit 69879c01a0c3 ("proc: Remove the now unnecessary internal mount of proc"). Change free_pid() to keep ns->pid_allocated == PIDNS_ADDING, and change alloc_pid() to reset the cursor early, right after taking pidmap_lock. Test-case: #define _GNU_SOURCE #include #include #include #include #include #include int main(void) { struct clone_args args = { .exit_signal = SIGCHLD, .flags = CLONE_PIDFD, .pidfd = 0, }; unsigned long pidfd; int pid; assert(unshare(CLONE_NEWPID) == 0); pid = syscall(__NR_clone3, &args, sizeof(args)); assert(pid == -1 && errno == EFAULT); args.pidfd = (unsigned long)&pidfd; pid = syscall(__NR_clone3, &args, sizeof(args)); if (pid) assert(pid > 0 && wait(NULL) == pid); else assert(getpid() == 1); return 0; } Link: https://lkml.kernel.org/r/aaGHu3ixbw9Y7kFj@redhat.com Link: https://lkml.kernel.org/r/aaGIHa7vGdwhEc_D@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Andrei Vagin Cc: Adrian Reber Cc: Aleksa Sarai Cc: Alexander Mikhalitsyn Cc: Christian Brauner Cc: David Hildenbrand Cc: Ingo Molnar Cc: Jan Kara Cc: Juri Lelli Cc: Kees Cook Cc: Kirill Tkhai Cc: Pavel Tikhomirov Cc: Peter Zijlstra Cc: Shuah Khan Cc: Vincent Guittot Signed-off-by: Andrew Morton --- kernel/pid.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/pid.c b/kernel/pid.c index 3b96571d0fe6..aff5bf0f638f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -131,9 +131,8 @@ void free_pid(struct pid *pid) wake_up_process(ns->child_reaper); break; case PIDNS_ADDING: - /* Handle a fork failure of the first process */ - WARN_ON(ns->child_reaper); - ns->pid_allocated = 0; + /* Only possible if the 1st fork fails */ + WARN_ON(READ_ONCE(ns->child_reaper)); break; } @@ -236,6 +235,10 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid, retried_preload = false; idr_preload(GFP_KERNEL); spin_lock(&pidmap_lock); + /* For the case when the previous attempt to create init failed */ + if (ns->pid_allocated == PIDNS_ADDING) + idr_set_cursor(&ns->idr, 0); + for (tmp = ns, i = ns->level; i >= 0;) { int tid = set_tid[ns->level - i]; @@ -338,10 +341,6 @@ out_free: idr_remove(&upid->ns->idr, upid->nr); } - /* On failure to allocate the first pid, reset the state */ - if (ns->pid_allocated == PIDNS_ADDING) - idr_set_cursor(&ns->idr, 0); - spin_unlock(&pidmap_lock); idr_preload_end(); From 8fba1920ac9fa571dff9aba7157bb7c327719b54 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Feb 2026 13:04:20 +0100 Subject: [PATCH 017/127] pid: document the PIDNS_ADDING checks in alloc_pid() and copy_process() Both copy_process() and alloc_pid() do the same PIDNS_ADDING check. The reasons for these checks, and the fact that both are necessary, are not immediately obvious. Add the comments. Link: https://lkml.kernel.org/r/aaGIRElc78U4Er42@redhat.com Signed-off-by: Oleg Nesterov Cc: Adrian Reber Cc: Aleksa Sarai Cc: Alexander Mikhalitsyn Cc: Andrei Vagin Cc: Christian Brauner Cc: David Hildenbrand Cc: Ingo Molnar Cc: Jan Kara Cc: Juri Lelli Cc: Kees Cook Cc: Kirill Tkhai Cc: Pavel Tikhomirov Cc: Peter Zijlstra Cc: Shuah Khan Cc: Vincent Guittot Signed-off-by: Andrew Morton --- kernel/fork.c | 6 +++++- kernel/pid.c | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index db02a301d0c0..1e80d4cdf538 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2393,7 +2393,11 @@ __latent_entropy struct task_struct *copy_process( rseq_fork(p, clone_flags); - /* Don't start children in a dying pid namespace */ + /* + * If zap_pid_ns_processes() was called after alloc_pid(), the new + * child missed SIGKILL. If current is not in the same namespace, + * we can't rely on fatal_signal_pending() below. + */ if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { retval = -ENOMEM; goto bad_fork_core_free; diff --git a/kernel/pid.c b/kernel/pid.c index aff5bf0f638f..2f1dbcbc2349 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -314,6 +314,11 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid, * * This can't be done earlier because we need to preserve other * error conditions. + * + * We need this even if copy_process() does the same check. If two + * or more tasks from parent namespace try to inject a child into a + * dead namespace, one of free_pid() calls from the copy_process() + * error path may try to wakeup the possibly freed ns->child_reaper. */ retval = -ENOMEM; if (unlikely(!(ns->pid_allocated & PIDNS_ADDING))) From ac2428c141c06528703b9f7ca182cf4b35d9d22e Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sat, 28 Feb 2026 19:53:00 +0000 Subject: [PATCH 018/127] lib: glob: add missing SPDX-License-Identifier Add the missing dual MIT/GPL license identifier to glob.c. Link: https://lkml.kernel.org/r/20260228195300.2468310-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/glob.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/glob.c b/lib/glob.c index aa57900d2062..d0654a5b6f0b 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) #include #include From 04e23830b8f312324f148ccd8289d9cc78992cc1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Feb 2026 22:24:42 +0000 Subject: [PATCH 019/127] selftests/fchmodat2: clean up temporary files and directories Patch series "selftests/fchmodat2: Error handling and general", v4. I looked at the fchmodat2() tests since I've been experiencing some random intermittent segfaults with them in my test systems, while doing so I noticed these two issues. Unfortunately I didn't figure out the original yet, unless I managed to fix it unwittingly. This patch (of 2): The fchmodat2() test program creates a temporary directory with a file and a symlink for every test it runs but never cleans these up, resulting in ${TMPDIR} getting left with stale files after every run. Restructure the program a bit to ensure that we clean these up, this is more invasive than it might otherwise be due to the extensive use of ksft_exit_fail_msg() in the program. As a side effect this also ensures that we report a consistent test name for the tests and always try both tests even if they are skipped. Link: https://lkml.kernel.org/r/20260226-selftests-fchmodat2-v4-0-a6419435f2e8@kernel.org Link: https://lkml.kernel.org/r/20260226-selftests-fchmodat2-v4-1-a6419435f2e8@kernel.org Signed-off-by: Mark Brown Acked-by: Alexey Gladkov Cc: Christian Brauner Signed-off-by: Andrew Morton --- .../selftests/fchmodat2/fchmodat2_test.c | 153 ++++++++++++------ 1 file changed, 107 insertions(+), 46 deletions(-) diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c index e397339495f6..e03430c4675c 100644 --- a/tools/testing/selftests/fchmodat2/fchmodat2_test.c +++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c @@ -9,6 +9,11 @@ #include "kselftest.h" +struct testdir { + char *dirname; + int dfd; +}; + int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags) { int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags); @@ -16,9 +21,9 @@ int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags) return ret >= 0 ? ret : -errno; } -int setup_testdir(void) +static void setup_testdir(struct testdir *testdir) { - int dfd, ret; + int ret, dfd; char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX"; /* Make the top-level directory. */ @@ -26,21 +31,48 @@ int setup_testdir(void) ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__); dfd = open(dirname, O_PATH | O_DIRECTORY); - if (dfd < 0) - ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__); + if (dfd < 0) { + ksft_perror("failed to open tmpdir"); + goto err; + } ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644); - if (ret < 0) - ksft_exit_fail_msg("%s: failed to create file in tmpdir\n", - __func__); + if (ret < 0) { + ksft_perror("failed to create file in tmpdir"); + goto err; + } close(ret); ret = symlinkat("regfile", dfd, "symlink"); - if (ret < 0) - ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n", - __func__); + if (ret < 0) { + ksft_perror("symlinkat() failed"); + goto err_regfile; + } - return dfd; + testdir->dirname = strdup(dirname); + if (!testdir->dirname) { + ksft_perror("Out of memory"); + goto err_symlink; + } + testdir->dfd = dfd; + + return; + +err_symlink: + unlinkat(testdir->dfd, "symlink", 0); +err_regfile: + unlinkat(testdir->dfd, "regfile", 0); +err: + unlink(dirname); + ksft_exit_fail(); +} + +static void cleanup_testdir(struct testdir *testdir) +{ + unlinkat(testdir->dfd, "regfile", 0); + unlinkat(testdir->dfd, "symlink", 0); + rmdir(testdir->dirname); + free(testdir->dirname); } int expect_mode(int dfd, const char *filename, mode_t expect_mode) @@ -48,61 +80,80 @@ int expect_mode(int dfd, const char *filename, mode_t expect_mode) struct stat st; int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW); - if (ret) - ksft_exit_fail_msg("%s: %s: fstatat failed\n", - __func__, filename); + if (ret) { + ksft_perror("fstatat() failed\n"); + return 0; + } return (st.st_mode == expect_mode); } void test_regfile(void) { - int dfd, ret; + struct testdir testdir; + int ret; - dfd = setup_testdir(); + setup_testdir(&testdir); - ret = sys_fchmodat2(dfd, "regfile", 0640, 0); + ret = sys_fchmodat2(testdir.dfd, "regfile", 0640, 0); - if (ret < 0) - ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__); + if (ret < 0) { + ksft_perror("fchmodat2(noflag) failed"); + goto out; + } - if (!expect_mode(dfd, "regfile", 0100640)) - ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n", + if (!expect_mode(testdir.dfd, "regfile", 0100640)) { + ksft_print_msg("%s: wrong file mode bits after fchmodat2\n", __func__); + ret = 1; + goto out; + } - ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW); + ret = sys_fchmodat2(testdir.dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW); - if (ret < 0) - ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n", - __func__); + if (ret < 0) { + ksft_perror("fchmodat2(AT_SYMLINK_NOFOLLOW) failed"); + goto out; + } - if (!expect_mode(dfd, "regfile", 0100600)) - ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", - __func__); + if (!expect_mode(testdir.dfd, "regfile", 0100600)) { + ksft_print_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", + __func__); + ret = 1; + } - ksft_test_result_pass("fchmodat2(regfile)\n"); +out: + ksft_test_result(ret == 0, "fchmodat2(regfile)\n"); + cleanup_testdir(&testdir); } void test_symlink(void) { - int dfd, ret; + struct testdir testdir; + int ret; - dfd = setup_testdir(); + setup_testdir(&testdir); - ret = sys_fchmodat2(dfd, "symlink", 0640, 0); + ret = sys_fchmodat2(testdir.dfd, "symlink", 0640, 0); - if (ret < 0) - ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__); + if (ret < 0) { + ksft_perror("fchmodat2(noflag) failed"); + goto err; + } - if (!expect_mode(dfd, "regfile", 0100640)) - ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n", - __func__); + if (!expect_mode(testdir.dfd, "regfile", 0100640)) { + ksft_print_msg("%s: wrong file mode bits after fchmodat2\n", + __func__); + goto err; + } - if (!expect_mode(dfd, "symlink", 0120777)) - ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n", - __func__); + if (!expect_mode(testdir.dfd, "symlink", 0120777)) { + ksft_print_msg("%s: wrong symlink mode bits after fchmodat2\n", + __func__); + goto err; + } - ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW); + ret = sys_fchmodat2(testdir.dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW); /* * On certain filesystems (xfs or btrfs), chmod operation fails. So we @@ -111,18 +162,28 @@ void test_symlink(void) * * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html */ - if (ret == 0 && !expect_mode(dfd, "symlink", 0120600)) - ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n", + if (ret == 0 && !expect_mode(testdir.dfd, "symlink", 0120600)) { + ksft_print_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n", __func__); + ret = 1; + goto err; + } - if (!expect_mode(dfd, "regfile", 0100640)) - ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", - __func__); + if (!expect_mode(testdir.dfd, "regfile", 0100640)) { + ksft_print_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", + __func__); + } if (ret != 0) ksft_test_result_skip("fchmodat2(symlink)\n"); else ksft_test_result_pass("fchmodat2(symlink)\n"); + cleanup_testdir(&testdir); + return; + +err: + ksft_test_result_fail("fchmodat2(symlink)\n"); + cleanup_testdir(&testdir); } #define NUM_TESTS 2 From 80266c154f7fa4a900136e17e3b813c7a1a78304 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Feb 2026 22:24:43 +0000 Subject: [PATCH 020/127] selftests/fchmodat2: use ksft_finished() The fchmodat2 test program open codes a version of ksft_finished(), use the standard version. Link: https://lkml.kernel.org/r/20260226-selftests-fchmodat2-v4-2-a6419435f2e8@kernel.org Signed-off-by: Mark Brown Acked-by: Alexey Gladkov Cc: Christian Brauner Signed-off-by: Andrew Morton --- tools/testing/selftests/fchmodat2/fchmodat2_test.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c index e03430c4675c..8e1b6384b319 100644 --- a/tools/testing/selftests/fchmodat2/fchmodat2_test.c +++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c @@ -196,8 +196,5 @@ int main(int argc, char **argv) test_regfile(); test_symlink(); - if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); + ksft_finished(); } From 3a1c3be1789580a8d15e810f8fb1958c7185680c Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 1 Mar 2026 15:45:53 +0000 Subject: [PATCH 021/127] lib: glob: fix grammar and replace non-inclusive terminology Fix a missing article ('a') in the comment describing the glob implementation, and replace 'blacklists' with 'denylists' to align with the kernel's inclusive terminology guidelines. Link: https://lkml.kernel.org/r/20260301154553.2592681-1-objecting@objecting.org Signed-off-by: Josh Law Signed-off-by: Andrew Morton --- lib/glob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/glob.c b/lib/glob.c index d0654a5b6f0b..3e4a4b96f6d0 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -21,7 +21,7 @@ MODULE_LICENSE("Dual MIT/GPL"); * Pattern metacharacters are ?, *, [ and \. * (And, inside character classes, !, - and ].) * - * This is small and simple implementation intended for device blacklists + * This is a small and simple implementation intended for device denylists * where a string is matched against a number of patterns. Thus, it * does not preprocess the patterns. It is non-recursive, and run-time * is at most quadratic: strlen(@str)*strlen(@pat). From f5e74cbdbea32e3bf0bcd8d32e7ac8c90518e359 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 1 Mar 2026 15:21:41 +0000 Subject: [PATCH 022/127] lib: glob: add explicit include for export.h Include explicitly instead of relying on it being implicitly included by for the EXPORT_SYMBOL macro. Link: https://lkml.kernel.org/r/20260301152143.2572137-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/glob.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/glob.c b/lib/glob.c index 3e4a4b96f6d0..44a8d5e4e99b 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) #include #include +#include /* * The only reason this code can be compiled as a module is because the From 33a3dd9bfd410044225aa9f812102055d0e21d59 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 1 Mar 2026 15:21:42 +0000 Subject: [PATCH 023/127] lib: glob: replace bitwise OR with logical operation on boolean Using bitwise OR (|=) on a boolean variable is valid C, but replacing it with a direct logical assignment makes the intent clearer and appeases strict static analysis tools. Link: https://lkml.kernel.org/r/20260301152143.2572137-2-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/glob.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/glob.c b/lib/glob.c index 44a8d5e4e99b..877bdd0884a6 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -96,7 +96,8 @@ bool __pure glob_match(char const *pat, char const *str) class += 2; /* Any special action if a > b? */ } - match |= (a <= c && c <= b); + if (a <= c && c <= b) + match = true; } while ((a = *class++) != ']'); if (match == inverted) From 90c73d0bfa36ea80f3a55f5426daa46fa2795957 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 1 Mar 2026 20:38:45 +0000 Subject: [PATCH 024/127] lib/glob: clean up "bool abuse" in pointer arithmetic Replace the implicit 'bool' to 'int' conversion with an explicit ternary operator. This makes the pointer arithmetic clearer and avoids relying on boolean memory representation for logic flow. Link: https://lkml.kernel.org/r/20260301203845.2617217-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/glob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/glob.c b/lib/glob.c index 877bdd0884a6..69311568ad3d 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -73,7 +73,7 @@ bool __pure glob_match(char const *pat, char const *str) if (c == '\0') /* No possible match */ return false; bool match = false, inverted = (*pat == '!'); - char const *class = pat + inverted; + char const *class = inverted ? pat + 1 : pat; unsigned char a = *class++; /* From defec2ca7cd743c9b8a29be5012ad7381a5ecbb3 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 1 Mar 2026 13:51:07 +0100 Subject: [PATCH 025/127] crash_dump: use sysfs_emit in sysfs show functions Replace sprintf() with sysfs_emit() in sysfs show functions. sysfs_emit() is preferred for formatting sysfs output because it provides safer bounds checking. No functional changes. Link: https://lkml.kernel.org/r/20260301125106.911980-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_dump_dm_crypt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 03fce24f1947..8638b821ce58 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -6,6 +6,7 @@ #include #include #include +#include #define KEY_NUM_MAX 128 /* maximum dm crypt keys */ #define KEY_SIZE_MAX 256 /* maximum dm crypt key size */ @@ -189,7 +190,7 @@ static inline struct config_key *to_config_key(struct config_item *item) static ssize_t config_key_description_show(struct config_item *item, char *page) { - return sprintf(page, "%s\n", to_config_key(item)->description); + return sysfs_emit(page, "%s\n", to_config_key(item)->description); } static ssize_t config_key_description_store(struct config_item *item, @@ -265,7 +266,7 @@ static struct config_item *config_keys_make_item(struct config_group *group, static ssize_t config_keys_count_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", key_count); + return sysfs_emit(page, "%d\n", key_count); } CONFIGFS_ATTR_RO(config_keys_, count); @@ -274,7 +275,7 @@ static bool is_dm_key_reused; static ssize_t config_keys_reuse_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", is_dm_key_reused); + return sysfs_emit(page, "%d\n", is_dm_key_reused); } static ssize_t config_keys_reuse_store(struct config_item *item, @@ -321,7 +322,7 @@ static bool restore; static ssize_t config_keys_restore_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", restore); + return sysfs_emit(page, "%d\n", restore); } static ssize_t config_keys_restore_store(struct config_item *item, From 420849332f9f9f1ce6ff142868ae2e6ae9f98f65 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 2 Mar 2026 11:38:22 +0100 Subject: [PATCH 026/127] get_maintainer: add ** glob pattern support Add support for the ** glob operator in MAINTAINERS F: and X: patterns, matching any number of path components (like Python's ** glob). The existing * to .* conversion with slash-count check is preserved. ** is converted to (?:.*), a non-capturing group used as a marker to bypass the slash-count check in file_match_pattern(), allowing the pattern to cross directory boundaries. This enables patterns like F: **/*[_-]kunit*.c to match files at any depth in the tree. Link: https://lkml.kernel.org/r/20260302103822.77343-1-teknoraver@meta.com Signed-off-by: Matteo Croce Acked-by: Joe Perches Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + scripts/get_maintainer.pl | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7d10988cbc62..83e3e87aa053 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -35,6 +35,7 @@ Descriptions of section entries and preferred order F: drivers/net/ all files in and below drivers/net F: drivers/net/* all files in drivers/net, but not below F: */net/* all files in "any top level directory"/net + F: fs/**/*foo*.c all *foo*.c files in any subdirectory of fs One pattern per line. Multiple F: lines acceptable. X: *Excluded* files and directories that are NOT maintained, same rules as F:. Files exclusions are tested before file matches. diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 4414194bedcf..f0ca0db6ddc2 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -375,8 +375,10 @@ sub read_maintainer_file { ##Filename pattern matching if ($type eq "F" || $type eq "X") { $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*\*/\x00/g; ##Convert ** to placeholder $value =~ s/\*/\.\*/g; ##Convert * to .* $value =~ s/\?/\./g; ##Convert ? to . + $value =~ s/\x00/(?:.*)/g; ##Convert placeholder to (?:.*) ##if pattern is a directory and it lacks a trailing slash, add one if ((-d $value)) { $value =~ s@([^/])$@$1/@; @@ -746,8 +748,10 @@ sub self_test { if (($type eq "F" || $type eq "X") && ($self_test eq "" || $self_test =~ /\bpatterns\b/)) { $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*\*/\x00/g; ##Convert ** to placeholder $value =~ s/\*/\.\*/g; ##Convert * to .* $value =~ s/\?/\./g; ##Convert ? to . + $value =~ s/\x00/(?:.*)/g; ##Convert placeholder to (?:.*) ##if pattern is a directory and it lacks a trailing slash, add one if ((-d $value)) { $value =~ s@([^/])$@$1/@; @@ -921,7 +925,7 @@ sub get_maintainers { my $value_pd = ($value =~ tr@/@@); my $file_pd = ($file =~ tr@/@@); $value_pd++ if (substr($value,-1,1) ne "/"); - $value_pd = -1 if ($value =~ /^\.\*/); + $value_pd = -1 if ($value =~ /^(\.\*|\(\?:\.\*\))/); if ($value_pd >= $file_pd && range_is_maintained($start, $end) && range_has_maintainer($start, $end)) { @@ -955,6 +959,7 @@ sub get_maintainers { $line =~ s/([^\\])\.([^\*])/$1\?$2/g; $line =~ s/([^\\])\.$/$1\?/g; ##Convert . back to ? $line =~ s/\\\./\./g; ##Convert \. to . + $line =~ s/\(\?:\.\*\)/\*\*/g; ##Convert (?:.*) to ** $line =~ s/\.\*/\*/g; ##Convert .* to * } my $count = $line =~ s/^([A-Z]):/$1:\t/g; @@ -1048,7 +1053,7 @@ sub file_match_pattern { if ($file =~ m@^$pattern@) { my $s1 = ($file =~ tr@/@@); my $s2 = ($pattern =~ tr@/@@); - if ($s1 == $s2) { + if ($s1 == $s2 || $pattern =~ /\(\?:/) { return 1; } } From 19aa667ace53c7398b08d01def44f96f03708bda Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 2 Mar 2026 14:17:05 +0800 Subject: [PATCH 027/127] ocfs2: fix deadlock when creating quota file syzbot detected a circular locking dependency. the scenarios: CPU0 CPU1 ---- ---- lock(&ocfs2_quota_ip_alloc_sem_key); lock(&ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]); lock(&ocfs2_quota_ip_alloc_sem_key); lock(&ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]); or: CPU0 CPU1 ---- ---- lock(&ocfs2_quota_ip_alloc_sem_key); lock(&dquot->dq_lock); lock(&ocfs2_quota_ip_alloc_sem_key); lock(&ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]); Following are the code paths for above scenarios: path_openat ocfs2_create ocfs2_mknod + ocfs2_reserve_new_inode | ocfs2_reserve_suballoc_bits | inode_lock(alloc_inode) //C0: hold INODE_ALLOC_SYSTEM_INODE | //ocfs2_free_alloc_context(inode_ac) is called at the end of | //caller ocfs2_mknod to handle the release | + ocfs2_get_init_inode __dquot_initialize dqget ocfs2_acquire_dquot + ocfs2_lock_global_qf | down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem)//A2:grabbing + ocfs2_create_local_dquot down_write(&OCFS2_I(lqinode)->ip_alloc_sem)//A3:grabbing evict ocfs2_evict_inode ocfs2_delete_inode ocfs2_wipe_inode + inode_lock(orphan_dir_inode) //B0:hold + ... + ocfs2_remove_inode inode_lock(inode_alloc_inode) //INODE_ALLOC_SYSTEM_INODE down_write(&inode->i_rwsem) //C1:grabbing generic_file_direct_write ocfs2_direct_IO __blockdev_direct_IO dio_complete ocfs2_dio_end_io ocfs2_dio_end_io_write + down_write(&oi->ip_alloc_sem) //A0:hold + ocfs2_del_inode_from_orphan inode_lock(orphan_dir_inode) //B1:grabbing Root cause for the circular locking: DIO completion path: holds oi->ip_alloc_sem and is trying to acquire the orphan_dir_inode lock. evict path: holds the orphan_dir_inode lock and is trying to acquire the inode_alloc_inode lock. ocfs2_mknod path: Holds the inode_alloc_inode lock (to allocate a new quota file) and is blocked waiting for oi->ip_alloc_sem in ocfs2_acquire_dquot(). How to fix: Replace down_write() with down_write_trylock() in ocfs2_acquire_dquot(). If acquiring oi->ip_alloc_sem fails, return -EBUSY to abort the file creation routine and break the deadlock. Link: https://lkml.kernel.org/r/20260302061707.7092-1-heming.zhao@suse.com Signed-off-by: Heming Zhao Reported-by: syzbot+78359d5fbb04318c35e9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=78359d5fbb04318c35e9 Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/quota_global.c | 16 +++++++++++++++- fs/ocfs2/quota_local.c | 4 +++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index e85b1ccf81be..77b8f0363e94 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -311,11 +311,25 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) spin_unlock(&dq_data_lock); if (ex) { inode_lock(oinfo->dqi_gqinode); - down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + if (!down_write_trylock(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem)) { + inode_unlock(oinfo->dqi_gqinode); + status = -EBUSY; + goto bail; + } } else { down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); } return 0; + +bail: + /* does a similar job as ocfs2_unlock_global_qf */ + ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); + brelse(oinfo->dqi_gqi_bh); + spin_lock(&dq_data_lock); + if (!--oinfo->dqi_gqi_count) + oinfo->dqi_gqi_bh = NULL; + spin_unlock(&dq_data_lock); + return status; } void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index c4e0117d8977..e749cd064c87 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1224,7 +1224,9 @@ int ocfs2_create_local_dquot(struct dquot *dquot) int status; u64 pcount; - down_write(&OCFS2_I(lqinode)->ip_alloc_sem); + if (!down_write_trylock(&OCFS2_I(lqinode)->ip_alloc_sem)) + return -EBUSY; + chunk = ocfs2_find_free_entry(sb, type, &offset); if (!chunk) { chunk = ocfs2_extend_local_quota_file(sb, type, &offset); From 7eece6917c541af24a6161b10a150b5744695f80 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Mar 2026 10:28:07 +0100 Subject: [PATCH 028/127] lib: polynomial: move to math/ subfolder Patch series "lib: polynomial: Move to math/ and clean up", v2. While removing Baikal SoC and platform code pieces I found that this code belongs to lib/math/ rather than generic lib/. Hence the move and followed up cleanups. This patch (of 3): The algorithm behind polynomial belongs to our collection of math equations and expressions handling. Move it to math/ subfolder where others of the kind are located. Link: https://lkml.kernel.org/r/20260302092831.2267785-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Kuan-Wei Chiu Cc: Randy Dunlap Signed-off-by: Andrew Morton --- lib/Kconfig | 3 --- lib/Makefile | 2 -- lib/math/Kconfig | 3 +++ lib/math/Makefile | 1 + lib/{ => math}/polynomial.c | 0 5 files changed, 4 insertions(+), 5 deletions(-) rename lib/{ => math}/polynomial.c (100%) diff --git a/lib/Kconfig b/lib/Kconfig index 0f2fb9610647..2b0c56a53a2a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -625,9 +625,6 @@ config PLDMFW config ASN1_ENCODER tristate -config POLYNOMIAL - tristate - config FIRMWARE_TABLE bool diff --git a/lib/Makefile b/lib/Makefile index 1b9ee167517f..60c9c9e79375 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -243,8 +243,6 @@ obj-$(CONFIG_MEMREGION) += memregion.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o -obj-$(CONFIG_POLYNOMIAL) += polynomial.o - # stackdepot.c should not be instrumented or call instrumented functions. # Prevent the compiler from calling builtins like memcmp() or bcmp() from this # file. diff --git a/lib/math/Kconfig b/lib/math/Kconfig index 0634b428d0cb..0e6d9cffc5d6 100644 --- a/lib/math/Kconfig +++ b/lib/math/Kconfig @@ -5,6 +5,9 @@ config CORDIC This option provides an implementation of the CORDIC algorithm; calculations are in fixed point. Module will be called cordic. +config POLYNOMIAL + tristate + config PRIME_NUMBERS tristate "Simple prime number generator for testing" help diff --git a/lib/math/Makefile b/lib/math/Makefile index d1caba23baa0..9a3850d55b79 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -2,6 +2,7 @@ obj-y += div64.o gcd.o lcm.o int_log.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o +obj-$(CONFIG_POLYNOMIAL) += polynomial.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o obj-$(CONFIG_RATIONAL) += rational.o diff --git a/lib/polynomial.c b/lib/math/polynomial.c similarity index 100% rename from lib/polynomial.c rename to lib/math/polynomial.c From 512e19a82bee561b8adf11fc37413d24b01382c8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Mar 2026 10:28:08 +0100 Subject: [PATCH 029/127] lib: math: polynomial: don't use 'proxy' headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20260302092831.2267785-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Kuan-Wei Chiu Cc: Randy Dunlap Signed-off-by: Andrew Morton --- lib/math/polynomial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/math/polynomial.c b/lib/math/polynomial.c index 66d383445fec..8c622099ef81 100644 --- a/lib/math/polynomial.c +++ b/lib/math/polynomial.c @@ -10,7 +10,8 @@ * */ -#include +#include +#include #include #include From 118d86a32422c30861f75835ebb926289fc71941 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Mar 2026 10:28:09 +0100 Subject: [PATCH 030/127] lib: math: polynomial: remove link to non-exist file and fix spelling The Baikal SoC and platform support was dropped from the kernel, remove the reference to non-exist file. While at it, fix spelling. Link: https://lkml.kernel.org/r/20260302092831.2267785-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Reviewed-by: Kuan-Wei Chiu Signed-off-by: Andrew Morton --- lib/math/polynomial.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/math/polynomial.c b/lib/math/polynomial.c index 8c622099ef81..f26677cfeeff 100644 --- a/lib/math/polynomial.c +++ b/lib/math/polynomial.c @@ -16,16 +16,13 @@ #include /* - * Originally this was part of drivers/hwmon/bt1-pvt.c. - * There the following conversion is used and should serve as an example here: + * The following conversion is an example: * * The original translation formulae of the temperature (in degrees of Celsius) * to PVT data and vice-versa are following: * - * N = 1.8322e-8*(T^4) + 2.343e-5*(T^3) + 8.7018e-3*(T^2) + 3.9269*(T^1) + - * 1.7204e2 - * T = -1.6743e-11*(N^4) + 8.1542e-8*(N^3) + -1.8201e-4*(N^2) + - * 3.1020e-1*(N^1) - 4.838e1 + * N = 1.8322e-8*(T^4) + 2.343e-5*(T^3) + 8.7018e-3*(T^2) + 3.9269*(T^1) + 1.7204e2 + * T = -1.6743e-11*(N^4) + 8.1542e-8*(N^3) + -1.8201e-4*(N^2) + 3.1020e-1*(N^1) - 4.838e1 * * where T = [-48.380, 147.438]C and N = [0, 1023]. * @@ -36,10 +33,9 @@ * formulae to accept millidegrees of Celsius. Here what they look like after * the alterations: * - * N = (18322e-20*(T^4) + 2343e-13*(T^3) + 87018e-9*(T^2) + 39269e-3*T + - * 17204e2) / 1e4 - * T = -16743e-12*(D^4) + 81542e-9*(D^3) - 182010e-6*(D^2) + 310200e-3*D - - * 48380 + * N = (18322e-20*(T^4) + 2343e-13*(T^3) + 87018e-9*(T^2) + 39269e-3*T + 17204e2) / 1e4 + * T = -16743e-12*(D^4) + 81542e-9*(D^3) - 182010e-6*(D^2) + 310200e-3*D - 48380 + * * where T = [-48380, 147438] mC and N = [0, 1023]. * * static const struct polynomial poly_temp_to_N = { @@ -69,13 +65,13 @@ * polynomial_calc - calculate a polynomial using integer arithmetic * * @poly: pointer to the descriptor of the polynomial - * @data: input value of the polynimal + * @data: input value of the polynomial * * Calculate the result of a polynomial using only integer arithmetic. For * this to work without too much loss of precision the coefficients has to * be altered. This is called factor redistribution. * - * Returns the result of the polynomial calculation. + * Return: the result of the polynomial calculation. */ long polynomial_calc(const struct polynomial *poly, long data) { From c8f42847514afa4dfa7f9c953e62386fddd32b8d Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Sun, 1 Mar 2026 18:44:20 -0800 Subject: [PATCH 031/127] mailmap: update Guru Das Srinagesh's email address Add my current email address and map previous addresses to it. Link: https://lkml.kernel.org/r/20260301-gds-mailmap-update-2-v1-1-5691415be73c@gurudas.dev Signed-off-by: Guru Das Srinagesh Signed-off-by: Andrew Morton --- .mailmap | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 7d14504daf24..e23a69a9a095 100644 --- a/.mailmap +++ b/.mailmap @@ -305,7 +305,10 @@ Gokul Sriram Palanisamy Govindaraj Saminathan Guo Ren Guo Ren -Guru Das Srinagesh +Guru Das Srinagesh +Guru Das Srinagesh +Guru Das Srinagesh +Guru Das Srinagesh Gustavo Padovan Gustavo Padovan Hamza Mahfooz From 00b5cdeb9fe761654d5a76a411c79b8ff04a81e5 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 3 Mar 2026 15:30:29 -0500 Subject: [PATCH 032/127] hung_task: refactor detection logic and atomicise detection count Patch series "hung_task: Provide runtime reset interface for hung task detector", v9. This series introduces the ability to reset /proc/sys/kernel/hung_task_detect_count. Writing a "0" value to this file atomically resets the counter of detected hung tasks. This functionality provides system administrators with the means to clear the cumulative diagnostic history following incident resolution, thereby simplifying subsequent monitoring without necessitating a system restart. This patch (of 3): The check_hung_task() function currently conflates two distinct responsibilities: validating whether a task is hung and handling the subsequent reporting (printing warnings, triggering panics, or tracepoints). This patch refactors the logic by introducing hung_task_info(), a function dedicated solely to reporting. The actual detection check, task_is_hung(), is hoisted into the primary loop within check_hung_uninterruptible_tasks(). This separation clearly decouples the mechanism of detection from the policy of reporting. Furthermore, to facilitate future support for concurrent hung task detection, the global sysctl_hung_task_detect_count variable is converted from unsigned long to atomic_long_t. Consequently, the counting logic is updated to accumulate the number of hung tasks locally (this_round_count) during the iteration. The global counter is then updated atomically via atomic_long_cmpxchg_relaxed() once the loop concludes, rather than incrementally during the scan. These changes are strictly preparatory and introduce no functional change to the system's runtime behaviour. Link: https://lkml.kernel.org/r/20260303203031.4097316-1-atomlin@atomlin.com Link: https://lkml.kernel.org/r/20260303203031.4097316-2-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Petr Mladek Cc: Greg Kroah-Hartman Cc: Lance Yang Cc: Masami Hiramatsu Cc: Joel Granados Signed-off-by: Andrew Morton --- kernel/hung_task.c | 58 ++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index d2254c91450b..df10830ed9ef 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -36,7 +36,7 @@ static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Total number of tasks detected as hung since boot: */ -static unsigned long __read_mostly sysctl_hung_task_detect_count; +static atomic_long_t sysctl_hung_task_detect_count = ATOMIC_LONG_INIT(0); /* * Limit number of tasks checked in a batch. @@ -223,31 +223,29 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti } #endif -static void check_hung_task(struct task_struct *t, unsigned long timeout, - unsigned long prev_detect_count) +/** + * hung_task_info - Print diagnostic details for a hung task + * @t: Pointer to the detected hung task. + * @timeout: Timeout threshold for detecting hung tasks + * @this_round_count: Count of hung tasks detected in the current iteration + * + * Print structured information about the specified hung task, if warnings + * are enabled or if the panic batch threshold is exceeded. + */ +static void hung_task_info(struct task_struct *t, unsigned long timeout, + unsigned long this_round_count) { - unsigned long total_hung_task; - - if (!task_is_hung(t, timeout)) - return; - - /* - * This counter tracks the total number of tasks detected as hung - * since boot. - */ - sysctl_hung_task_detect_count++; - - total_hung_task = sysctl_hung_task_detect_count - prev_detect_count; trace_sched_process_hang(t); - if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) { + if (sysctl_hung_task_panic && this_round_count >= sysctl_hung_task_panic) { console_verbose(); hung_task_call_panic = true; } /* - * Ok, the task did not get scheduled for more than 2 minutes, - * complain: + * The given task did not get scheduled for more than + * CONFIG_DEFAULT_HUNG_TASK_TIMEOUT. Therefore, complain + * accordingly */ if (sysctl_hung_task_warnings || hung_task_call_panic) { if (sysctl_hung_task_warnings > 0) @@ -297,18 +295,18 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) /* * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for - * a really long time (120 seconds). If that happens, print out - * a warning. + * a really long time. If that happens, print out a warning. */ static void check_hung_uninterruptible_tasks(unsigned long timeout) { int max_count = sysctl_hung_task_check_count; unsigned long last_break = jiffies; struct task_struct *g, *t; - unsigned long prev_detect_count = sysctl_hung_task_detect_count; + unsigned long total_count, this_round_count; int need_warning = sysctl_hung_task_warnings; unsigned long si_mask = hung_task_si_mask; + total_count = atomic_long_read(&sysctl_hung_task_detect_count); /* * If the system crashed already then all bets are off, * do not report extra hung tasks: @@ -316,10 +314,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) if (test_taint(TAINT_DIE) || did_panic) return; - + this_round_count = 0; rcu_read_lock(); for_each_process_thread(g, t) { - if (!max_count--) goto unlock; if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { @@ -328,14 +325,25 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) last_break = jiffies; } - check_hung_task(t, timeout, prev_detect_count); + if (task_is_hung(t, timeout)) { + this_round_count++; + hung_task_info(t, timeout, this_round_count); + } } unlock: rcu_read_unlock(); - if (!(sysctl_hung_task_detect_count - prev_detect_count)) + if (!this_round_count) return; + /* + * This counter tracks the total number of tasks detected as hung + * since boot. + */ + atomic_long_cmpxchg_relaxed(&sysctl_hung_task_detect_count, + total_count, total_count + + this_round_count); + if (need_warning || hung_task_call_panic) { si_mask |= SYS_INFO_LOCKS; From 49085e1b70f898695b63594ff559f5a243589b83 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 3 Mar 2026 15:30:30 -0500 Subject: [PATCH 033/127] hung_task: enable runtime reset of hung_task_detect_count Currently, the hung_task_detect_count sysctl provides a cumulative count of hung tasks since boot. In long-running, high-availability environments, this counter may lose its utility if it cannot be reset once an incident has been resolved. Furthermore, the previous implementation relied upon implicit ordering, which could not strictly guarantee that diagnostic metadata published by one CPU was visible to the panic logic on another. This patch introduces the capability to reset the detection count by writing "0" to the hung_task_detect_count sysctl. The proc_handler logic has been updated to validate this input and atomically reset the counter. The synchronisation of sysctl_hung_task_detect_count relies upon a transactional model to ensure the integrity of the detection counter against concurrent resets from userspace. The application of atomic_long_read_acquire() and atomic_long_cmpxchg_release() is correct and provides the following guarantees: 1. Prevention of Load-Store Reordering via Acquire Semantics By utilising atomic_long_read_acquire() to snapshot the counter before initiating the task traversal, we establish a strict memory barrier. This prevents the compiler or hardware from reordering the initial load to a point later in the scan. Without this "acquire" barrier, a delayed load could potentially read a "0" value resulting from a userspace reset that occurred mid-scan. This would lead to the subsequent cmpxchg succeeding erroneously, thereby overwriting the user's reset with stale increment data. 2. Atomicity of the "Commit" Phase via Release Semantics The atomic_long_cmpxchg_release() serves as the transaction's commit point. The "release" barrier ensures that all diagnostic recordings and task-state observations made during the scan are globally visible before the counter is incremented. 3. Race Condition Resolution This pairing effectively detects any "out-of-band" reset of the counter. If sysctl_hung_task_detect_count is modified via the procfs interface during the scan, the final cmpxchg will detect the discrepancy between the current value and the "acquire" snapshot. Consequently, the update will fail, ensuring that a reset command from the administrator is prioritised over a scan that may have been invalidated by that very reset. Link: https://lkml.kernel.org/r/20260303203031.4097316-3-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Joel Granados Reviewed-by: Petr Mladek Cc: Greg Kroah-Hartman Cc: Lance Yang Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 3 +- kernel/hung_task.c | 58 ++++++++++++++++++--- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 9aed74e65cf4..c6994e55d141 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -418,7 +418,8 @@ hung_task_detect_count ====================== Indicates the total number of tasks that have been detected as hung since -the system boot. +the system boot or since the counter was reset. The counter is zeroed when +a value of 0 is written. This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. diff --git a/kernel/hung_task.c b/kernel/hung_task.c index df10830ed9ef..350093de0535 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -306,7 +306,11 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) int need_warning = sysctl_hung_task_warnings; unsigned long si_mask = hung_task_si_mask; - total_count = atomic_long_read(&sysctl_hung_task_detect_count); + /* + * The counter might get reset. Remember the initial value. + * Acquire prevents reordering task checks before this point. + */ + total_count = atomic_long_read_acquire(&sysctl_hung_task_detect_count); /* * If the system crashed already then all bets are off, * do not report extra hung tasks: @@ -337,10 +341,11 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) return; /* - * This counter tracks the total number of tasks detected as hung - * since boot. + * Do not count this round when the global counter has been reset + * during this check. Release ensures we see all hang details + * recorded during the scan. */ - atomic_long_cmpxchg_relaxed(&sysctl_hung_task_detect_count, + atomic_long_cmpxchg_release(&sysctl_hung_task_detect_count, total_count, total_count + this_round_count); @@ -366,6 +371,46 @@ static long hung_timeout_jiffies(unsigned long last_checked, } #ifdef CONFIG_SYSCTL + +/** + * proc_dohung_task_detect_count - proc handler for hung_task_detect_count + * @table: Pointer to the struct ctl_table definition for this proc entry + * @dir: Flag indicating the operation + * @buffer: User space buffer for data transfer + * @lenp: Pointer to the length of the data being transferred + * @ppos: Pointer to the current file offset + * + * This handler is used for reading the current hung task detection count + * and for resetting it to zero when a write operation is performed using a + * zero value only. + * Return: 0 on success, or a negative error code on failure. + */ +static int proc_dohung_task_detect_count(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos) +{ + unsigned long detect_count; + struct ctl_table proxy_table; + int err; + + proxy_table = *table; + proxy_table.data = &detect_count; + + if (SYSCTL_KERN_TO_USER(dir)) + detect_count = atomic_long_read(&sysctl_hung_task_detect_count); + + err = proc_doulongvec_minmax(&proxy_table, dir, buffer, lenp, ppos); + if (err < 0) + return err; + + if (SYSCTL_USER_TO_KERN(dir)) { + if (detect_count) + return -EINVAL; + atomic_long_set(&sysctl_hung_task_detect_count, 0); + } + + return 0; +} + /* * Process updating of timeout sysctl */ @@ -446,10 +491,9 @@ static const struct ctl_table hung_task_sysctls[] = { }, { .procname = "hung_task_detect_count", - .data = &sysctl_hung_task_detect_count, .maxlen = sizeof(unsigned long), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, + .mode = 0644, + .proc_handler = proc_dohung_task_detect_count, }, { .procname = "hung_task_sys_info", From 5eaef7f8ee40150cbd78a7b445001929bb2d2031 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 3 Mar 2026 15:30:31 -0500 Subject: [PATCH 034/127] hung_task: increment the global counter immediately A recent change allowed to reset the global counter of hung tasks using the sysctl interface. A potential race with the regular check has been solved by updating the global counter only once at the end of the check. However, the hung task check can take a significant amount of time, particularly when task information is being dumped to slow serial consoles. Some users monitor this global counter to trigger immediate migration of critical containers. Delaying the increment until the full check completes postpones these high-priority rescue operations. Update the global counter as soon as a hung task is detected. Since the value is read asynchronously, a relaxed atomic operation is sufficient. Link: https://lkml.kernel.org/r/20260303203031.4097316-4-atomlin@atomlin.com Signed-off-by: Petr Mladek Signed-off-by: Aaron Tomlin Reported-by: Lance Yang Closes: https://lore.kernel.org/r/f239e00f-4282-408d-b172-0f9885f4b01b@linux.dev Reviewed-by: Aaron Tomlin Reviewed-by: Lance Yang Cc: Greg Kroah-Hartman Cc: Joel Granados Cc: Masami Hiramatsu (Google) Signed-off-by: Andrew Morton --- kernel/hung_task.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 350093de0535..8bc043fbe89c 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -302,15 +302,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) int max_count = sysctl_hung_task_check_count; unsigned long last_break = jiffies; struct task_struct *g, *t; - unsigned long total_count, this_round_count; + unsigned long this_round_count; int need_warning = sysctl_hung_task_warnings; unsigned long si_mask = hung_task_si_mask; - /* - * The counter might get reset. Remember the initial value. - * Acquire prevents reordering task checks before this point. - */ - total_count = atomic_long_read_acquire(&sysctl_hung_task_detect_count); /* * If the system crashed already then all bets are off, * do not report extra hung tasks: @@ -330,6 +325,13 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } if (task_is_hung(t, timeout)) { + /* + * Increment the global counter so that userspace could + * start migrating tasks ASAP. But count the current + * round separately because userspace could reset + * the global counter at any time. + */ + atomic_long_inc(&sysctl_hung_task_detect_count); this_round_count++; hung_task_info(t, timeout, this_round_count); } @@ -340,15 +342,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) if (!this_round_count) return; - /* - * Do not count this round when the global counter has been reset - * during this check. Release ensures we see all hang details - * recorded during the scan. - */ - atomic_long_cmpxchg_release(&sysctl_hung_task_detect_count, - total_count, total_count + - this_round_count); - if (need_warning || hung_task_call_panic) { si_mask |= SYS_INFO_LOCKS; From 73d40c42f6aa1702f685261911429bf5265f78d5 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Tue, 3 Mar 2026 17:13:24 -0500 Subject: [PATCH 035/127] hung_task: explicitly report I/O wait state in log output Currently, the hung task reporting mechanism indiscriminately labels all TASK_UNINTERRUPTIBLE (D) tasks as "blocked", irrespective of whether they are awaiting I/O completion or kernel locking primitives. This ambiguity compels system administrators to manually inspect stack traces to discern whether the delay stems from an I/O wait (typically indicative of hardware or filesystem anomalies) or software contention. Such detailed analysis is not always immediately accessible to system administrators or support engineers. To address this, this patch utilises the existing in_iowait field within struct task_struct to augment the failure report. If the task is blocked due to I/O (e.g., via io_schedule_prepare()), the log message is updated to explicitly state "blocked in I/O wait". Examples: - Standard Block: "INFO: task bash:123 blocked for more than 120 seconds". - I/O Block: "INFO: task dd:456 blocked in I/O wait for more than 120 seconds". Theoretically, concurrent executions of io_schedule_finish() could result in a race condition where the read flag does not precisely correlate with the subsequently printed backtrace. However, this limitation is deemed acceptable in practice. The entire reporting mechanism is inherently racy by design; nevertheless, it remains highly reliable in the vast majority of cases, particularly because it primarily captures protracted stalls. Consequently, introducing additional synchronisation to mitigate this minor inaccuracy would be entirely disproportionate to the situation. Link: https://lkml.kernel.org/r/20260303221324.4106917-1-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Acked-by: Masami Hiramatsu (Google) Reviewed-by: Petr Mladek Cc: Greg Kroah-Hartman Cc: Lance Yang Signed-off-by: Andrew Morton --- kernel/hung_task.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 8bc043fbe89c..6fcc94ce4ca9 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -250,8 +250,9 @@ static void hung_task_info(struct task_struct *t, unsigned long timeout, if (sysctl_hung_task_warnings || hung_task_call_panic) { if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; - pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", - t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); + pr_err("INFO: task %s:%d blocked%s for more than %ld seconds.\n", + t->comm, t->pid, t->in_iowait ? " in I/O wait" : "", + (jiffies - t->last_switch_time) / HZ); pr_err(" %s %s %.*s\n", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), From 8e4513303b8726e4434f718ab39749cbb4c142b1 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 4 Mar 2026 12:06:43 +0100 Subject: [PATCH 036/127] scripts/gdb/symbols: handle module path parameters commit 581ee79a2547 ("scripts/gdb/symbols: make BPF debug info available to GDB") added support to make BPF debug information available to GDB. However, the argument handling loop was slightly broken, causing it to fail if further modules were passed. Fix it to append these passed modules to the instance variable after expansion. Link: https://lkml.kernel.org/r/20260304110642.2020614-2-benjamin@sipsolutions.net Fixes: 581ee79a2547 ("scripts/gdb/symbols: make BPF debug info available to GDB") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Cc: Ilya Leoshkevich Cc: Jan Kiszka Cc: Kieran Bingham Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/symbols.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index d4308b726183..943ff1228b48 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -298,7 +298,7 @@ are loaded as well.""" if p == "-bpf": monitor_bpf = True else: - p.append(os.path.abspath(os.path.expanduser(p))) + self.module_paths.append(os.path.abspath(os.path.expanduser(p))) self.module_paths.append(os.getcwd()) if self.breakpoint is not None: From e54f7f67e9b361b3213b5b75e0cf7167c1105fc7 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:12:50 +0000 Subject: [PATCH 037/127] lib/uuid: fix typo "reversion" to "revision" in comment Fix a typo in __uuid_gen_common() where "reversion" (meaning to revert) was used instead of "revision" when describing the UUID variant field. Link: https://lkml.kernel.org/r/20260306161250.2811500-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andy Shevchenko Signed-off-by: Andrew Morton --- lib/uuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/uuid.c b/lib/uuid.c index e8543c668dc7..128a51f1879b 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL(generate_random_guid); static void __uuid_gen_common(__u8 b[16]) { get_random_bytes(b, 16); - /* reversion 0b10 */ + /* revision 0b10 */ b[8] = (b[8] & 0x3F) | 0x80; } From bc6cc36855e9ebcf0c7e01f980ad67bcb0e50bdb Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:16:12 +0000 Subject: [PATCH 038/127] lib/inflate: fix memory leak in inflate_fixed() on inflate_codes() failure When inflate_codes() fails in inflate_fixed(), only the length list 'l' is freed, but the Huffman tables 'tl' and 'td' are leaked. Add the missing huft_free() calls on the error path. Link: https://lkml.kernel.org/r/20260306161612.2811703-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/inflate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/inflate.c b/lib/inflate.c index eab886baa1b4..5b1e70d0ce8c 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -811,6 +811,8 @@ DEBG(" Date: Fri, 6 Mar 2026 16:16:47 +0000 Subject: [PATCH 039/127] lib/inflate: fix memory leak in inflate_dynamic() on inflate_codes() failure When inflate_codes() fails in inflate_dynamic(), the code jumps to the 'out' label which only frees 'll', leaking the Huffman tables 'tl' and 'td'. Restructure the code so that the decoding tables are always freed before reaching the 'out' label. Link: https://lkml.kernel.org/r/20260306161647.2811874-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/inflate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/inflate.c b/lib/inflate.c index 5b1e70d0ce8c..ae704e4f64ec 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -1009,10 +1009,10 @@ DEBG("dyn5d "); DEBG("dyn6 "); /* decompress until an end-of-block code */ - if (inflate_codes(tl, td, bl, bd)) { + if (inflate_codes(tl, td, bl, bd)) ret = 1; - goto out; - } + else + ret = 0; DEBG("dyn7 "); @@ -1021,7 +1021,6 @@ DEBG("dyn7 "); huft_free(td); DEBG(">"); - ret = 0; out: free(ll); return ret; From b2a7f5f88ae45e1b18793f58788059c8a61ecfb9 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:17:07 +0000 Subject: [PATCH 040/127] lib/inflate: fix grammar in comment: "variable" to "variables" Fix "all variable" to "all variables" in the file header comment. Link: https://lkml.kernel.org/r/20260306161707.2812005-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inflate.c b/lib/inflate.c index ae704e4f64ec..2bba475d4652 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -9,7 +9,7 @@ * based on gzip-1.0.3 * * Nicolas Pitre , 1999/04/14 : - * Little mods for all variable to reside either into rodata or bss segments + * Little mods for all variables to reside either into rodata or bss segments * by marking constant variables with 'const' and initializing all the others * at run-time only. This allows for the kernel uncompressor to run * directly from Flash or ROM memory on embedded systems. From 5ab288ffab948a8201bdac5c5845abe4d36005f1 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:17:32 +0000 Subject: [PATCH 041/127] lib/inflate: fix typo "This results" to "The results" in comment Fix "This results of this trade" to "The results of this trade" in the comment describing the lbits and dbits tuning parameters. Link: https://lkml.kernel.org/r/20260306161732.2812132-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inflate.c b/lib/inflate.c index 2bba475d4652..44a7da582baa 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -286,7 +286,7 @@ static void free(void *where) the longer codes. The time it costs to decode the longer codes is then traded against the time it takes to make longer tables. - This results of this trade are in the variables lbits and dbits + The results of this trade are in the variables lbits and dbits below. lbits is the number of bits the first level table for literal/ length codes can decode in one step, and dbits is the same thing for the distance codes. Subsequent tables are also less than or equal to From 6a2804112d98d351df664d9209aa459a2e2fa0d0 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:23:27 +0000 Subject: [PATCH 042/127] lib/bug: fix inconsistent capitalization in BUG message Use lowercase "kernel BUG" consistently in pr_crit() messages. The verbose path already uses "kernel BUG at %s:%u!" but the non-verbose fallback uses "Kernel BUG" with an uppercase 'K'. Link: https://lkml.kernel.org/r/20260306162327.2815553-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/bug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bug.c b/lib/bug.c index 623c467a8b76..d7faa1255f85 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -254,7 +254,7 @@ static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long buga if (file) pr_crit("kernel BUG at %s:%u!\n", file, line); else - pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n", + pr_crit("kernel BUG at %pB [verbose debug info unavailable]\n", (void *)bugaddr); return BUG_TRAP_TYPE_BUG; From 228491c380cf0d569634587d4042f9a2ebb9f93e Mon Sep 17 00:00:00 2001 From: Josh Law Date: Fri, 6 Mar 2026 16:24:18 +0000 Subject: [PATCH 043/127] lib/bug: remove unnecessary variable initializations Remove the unnecessary initialization of 'rcu' to false in report_bug_entry() and report_bug(), as it is assigned by warn_rcu_enter() before its first use. Link: https://lkml.kernel.org/r/20260306162418.2815979-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/bug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bug.c b/lib/bug.c index d7faa1255f85..bbc301097749 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -263,7 +263,7 @@ static enum bug_trap_type __report_bug(struct bug_entry *bug, unsigned long buga enum bug_trap_type report_bug_entry(struct bug_entry *bug, struct pt_regs *regs) { enum bug_trap_type ret; - bool rcu = false; + bool rcu; rcu = warn_rcu_enter(); ret = __report_bug(bug, bug_addr(bug), regs); @@ -275,7 +275,7 @@ enum bug_trap_type report_bug_entry(struct bug_entry *bug, struct pt_regs *regs) enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) { enum bug_trap_type ret; - bool rcu = false; + bool rcu; rcu = warn_rcu_enter(); ret = __report_bug(NULL, bugaddr, regs); From b02da26a992db0c0e2559acbda0fc48d4a2fd337 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 6 Mar 2026 11:22:11 +0800 Subject: [PATCH 044/127] ocfs2: fix possible deadlock between unlink and dio_end_io_write ocfs2_unlink takes orphan dir inode_lock first and then ip_alloc_sem, while in ocfs2_dio_end_io_write, it acquires these locks in reverse order. This creates an ABBA lock ordering violation on lock classes ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE] and ocfs2_file_ip_alloc_sem_key. Lock Chain #0 (orphan dir inode_lock -> ip_alloc_sem): ocfs2_unlink ocfs2_prepare_orphan_dir ocfs2_lookup_lock_orphan_dir inode_lock(orphan_dir_inode) <- lock A __ocfs2_prepare_orphan_dir ocfs2_prepare_dir_for_insert ocfs2_extend_dir ocfs2_expand_inline_dir down_write(&oi->ip_alloc_sem) <- Lock B Lock Chain #1 (ip_alloc_sem -> orphan dir inode_lock): ocfs2_dio_end_io_write down_write(&oi->ip_alloc_sem) <- Lock B ocfs2_del_inode_from_orphan() inode_lock(orphan_dir_inode) <- Lock A Deadlock Scenario: CPU0 (unlink) CPU1 (dio_end_io_write) ------ ------ inode_lock(orphan_dir_inode) down_write(ip_alloc_sem) down_write(ip_alloc_sem) inode_lock(orphan_dir_inode) Since ip_alloc_sem is to protect allocation changes, which is unrelated with operations in ocfs2_del_inode_from_orphan. So move ocfs2_del_inode_from_orphan out of ip_alloc_sem to fix the deadlock. Link: https://lkml.kernel.org/r/20260306032211.1016452-1-joseph.qi@linux.alibaba.com Reported-by: syzbot+67b90111784a3eac8c04@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=67b90111784a3eac8c04 Fixes: a86a72a4a4e0 ("ocfs2: take ip_alloc_sem in ocfs2_dio_get_block & ocfs2_dio_end_io_write") Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 17ba79f443ee..09146b43d1f0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2294,8 +2294,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, goto out; } - down_write(&oi->ip_alloc_sem); - /* Delete orphan before acquire i_rwsem. */ if (dwc->dw_orphaned) { BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); @@ -2308,6 +2306,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, mlog_errno(ret); } + down_write(&oi->ip_alloc_sem); di = (struct ocfs2_dinode *)di_bh->b_data; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); From d4dba3b9c03a326cfa73833d6b166aeb442f82b5 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 8 Mar 2026 16:50:12 +0000 Subject: [PATCH 045/127] lib: decompress_bunzip2: fix 32-bit shift undefined behavior Fix undefined behavior caused by shifting a 32-bit integer by 32 bits during decompression. This prevents potential kernel decompression failures or corruption when parsing malicious or malformed bzip2 archives. Link: https://lkml.kernel.org/r/20260308165012.2872633-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/decompress_bunzip2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index ca736166f100..1288f146661f 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -135,7 +135,7 @@ static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) } /* Avoid 32-bit overflow (dump bit buffer to top of output) */ if (bd->inbufBitCount >= 24) { - bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); + bits = bd->inbufBits & ((1ULL << bd->inbufBitCount) - 1); bits_wanted -= bd->inbufBitCount; bits <<= bits_wanted; bd->inbufBitCount = 0; @@ -146,7 +146,7 @@ static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) } /* Calculate result */ bd->inbufBitCount -= bits_wanted; - bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); + bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1ULL << bits_wanted) - 1); return bits; } From e5bbb35a07b3b9fe7b5e70b641344db68593215d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 7 Mar 2026 09:47:52 +0100 Subject: [PATCH 046/127] tools headers UAPI: sync linux/taskstats.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "tools/getdelays: use the static UAPI headers from tools/include/uapi". The include directory ../../usr/include is only present if an in-tree kernel build with CONFIG_HEADERS_INSTALL was done before. Otherwise the system UAPI headers are used, which most likely are not the most recent ones. To make sure to always have access to up-to-date UAPI headers, use the static copy in tools/include/uapi. This patch (of 2): To give the accounting tools access to the new fields introduced in commit 503efe850c74 ("delayacct: add timestamp of delay max") Link: https://lkml.kernel.org/r/20260307-accounting-taskstats-h-v1-0-0b75915c6ce5@weissschuh.net Link: https://lkml.kernel.org/r/20260307-accounting-taskstats-h-v1-1-0b75915c6ce5@weissschuh.net Signed-off-by: Thomas Weißschuh Cc: Arnd Bergmann Cc: Balbir Singh Cc: Jiang Kun Cc: Wang Yaxin Cc: xu xin Cc: Yang Yang Cc: kernel test robot Signed-off-by: Andrew Morton --- tools/include/uapi/linux/taskstats.h | 291 +++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tools/include/uapi/linux/taskstats.h diff --git a/tools/include/uapi/linux/taskstats.h b/tools/include/uapi/linux/taskstats.h new file mode 100644 index 000000000000..3ae25f3ce067 --- /dev/null +++ b/tools/include/uapi/linux/taskstats.h @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ +/* taskstats.h - exporting per-task statistics + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * (C) Balbir Singh, IBM Corp. 2006 + * (C) Jay Lan, SGI, 2006 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef _LINUX_TASKSTATS_H +#define _LINUX_TASKSTATS_H + +#include +#include + +/* Format for per-task data returned to userland when + * - a task exits + * - listener requests stats for a task + * + * The struct is versioned. Newer versions should only add fields to + * the bottom of the struct to maintain backward compatibility. + * + * + * To add new fields + * a) bump up TASKSTATS_VERSION + * b) add comment indicating new version number at end of struct + * c) add new fields after version comment; maintain 64-bit alignment + */ + + +#define TASKSTATS_VERSION 17 +#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +struct taskstats { + + /* The version number of this struct. This field is always set to + * TAKSTATS_VERSION, which is defined in . + * Each time the struct is changed, the value should be incremented. + */ + __u16 version; + __u32 ac_exitcode; /* Exit status */ + + /* The accounting flags of a task as defined in + * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP. + * (AGROUP since version 12). + */ + __u8 ac_flag; /* Record flags */ + __u8 ac_nice; /* task_nice */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count __attribute__((aligned(8))); + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + /* Basic Accounting Fields start */ + char ac_comm[TS_COMM_LEN]; /* Command name */ + __u8 ac_sched __attribute__((aligned(8))); + /* Scheduling discipline */ + __u8 ac_pad[3]; + __u32 ac_uid __attribute__((aligned(8))); + /* User ID */ + __u32 ac_gid; /* Group ID */ + __u32 ac_pid; /* Process ID */ + __u32 ac_ppid; /* Parent process ID */ + /* __u32 range means times from 1970 to 2106 */ + __u32 ac_btime; /* Begin time [sec since 1970] */ + __u64 ac_etime __attribute__((aligned(8))); + /* Elapsed time [usec] */ + __u64 ac_utime; /* User CPU time [usec] */ + __u64 ac_stime; /* SYstem CPU time [usec] */ + __u64 ac_minflt; /* Minor Page Fault Count */ + __u64 ac_majflt; /* Major Page Fault Count */ + /* Basic Accounting Fields end */ + + /* Extended accounting fields start */ + /* Accumulated RSS usage in duration of a task, in MBytes-usecs. + * The current rss usage is added to this counter every time + * a tick is charged to a task's system time. So, at the end we + * will have memory usage multiplied by system time. Thus an + * average usage per system time unit can be calculated. + */ + __u64 coremem; /* accumulated RSS usage in MB-usec */ + /* Accumulated virtual memory usage in duration of a task. + * Same as acct_rss_mem1 above except that we keep track of VM usage. + */ + __u64 virtmem; /* accumulated VM usage in MB-usec */ + + /* High watermark of RSS and virtual memory usage in duration of + * a task, in KBytes. + */ + __u64 hiwater_rss; /* High-watermark of RSS usage, in KB */ + __u64 hiwater_vm; /* High-water VM usage, in KB */ + + /* The following four fields are I/O statistics of a task. */ + __u64 read_char; /* bytes read */ + __u64 write_char; /* bytes written */ + __u64 read_syscalls; /* read syscalls */ + __u64 write_syscalls; /* write syscalls */ + /* Extended accounting fields end */ + +#define TASKSTATS_HAS_IO_ACCOUNTING + /* Per-task storage I/O accounting starts */ + __u64 read_bytes; /* bytes of read I/O */ + __u64 write_bytes; /* bytes of write I/O */ + __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ + + __u64 nvcsw; /* voluntary_ctxt_switches */ + __u64 nivcsw; /* nonvoluntary_ctxt_switches */ + + /* time accounting for SMT machines */ + __u64 ac_utimescaled; /* utime scaled on frequency etc */ + __u64 ac_stimescaled; /* stime scaled on frequency etc */ + __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */ + + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; + + + /* Delay waiting for thrashing page */ + __u64 thrashing_count; + __u64 thrashing_delay_total; + + /* v10: 64-bit btime to avoid overflow */ + __u64 ac_btime64; /* 64-bit begin time */ + + /* v11: Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ + /* Thread group walltime up to now. This is total process walltime if + * AGROUP flag is set. + */ + __u64 ac_tgetime __attribute__((aligned(8))); + /* Lightweight information to identify process binary files. + * This leaves userspace to match this to a file system path, using + * MAJOR() and MINOR() macros to identify a device and mount point, + * the inode to identify the executable file. This is /proc/self/exe + * at the end, so matching the most recent exec(). Values are zero + * for kernel threads. + */ + __u64 ac_exe_dev; /* program binary device ID */ + __u64 ac_exe_inode; /* program binary inode number */ + /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; + + /*v17: delay max timestamp record*/ + struct __kernel_timespec cpu_delay_max_ts; + struct __kernel_timespec blkio_delay_max_ts; + struct __kernel_timespec swapin_delay_max_ts; + struct __kernel_timespec freepages_delay_max_ts; + struct __kernel_timespec thrashing_delay_max_ts; + struct __kernel_timespec compact_delay_max_ts; + struct __kernel_timespec wpcopy_delay_max_ts; + struct __kernel_timespec irq_delay_max_ts; +}; + + +/* + * Commands sent from userspace + * Not versioned. New commands should only be inserted at the enum's end + * prior to __TASKSTATS_CMD_MAX + */ + +enum { + TASKSTATS_CMD_UNSPEC = 0, /* Reserved */ + TASKSTATS_CMD_GET, /* user->kernel request/get-response */ + TASKSTATS_CMD_NEW, /* kernel->user event */ + __TASKSTATS_CMD_MAX, +}; + +#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1) + +enum { + TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */ + TASKSTATS_TYPE_PID, /* Process id */ + TASKSTATS_TYPE_TGID, /* Thread group id */ + TASKSTATS_TYPE_STATS, /* taskstats structure */ + TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ + TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + TASKSTATS_TYPE_NULL, /* contains nothing */ + __TASKSTATS_TYPE_MAX, +}; + +#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1) + +enum { + TASKSTATS_CMD_ATTR_UNSPEC = 0, + TASKSTATS_CMD_ATTR_PID, + TASKSTATS_CMD_ATTR_TGID, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, + __TASKSTATS_CMD_ATTR_MAX, +}; + +#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1) + +/* NETLINK_GENERIC related info */ + +#define TASKSTATS_GENL_NAME "TASKSTATS" +#define TASKSTATS_GENL_VERSION 0x1 + +#endif /* _LINUX_TASKSTATS_H */ From 9b93f7e3277490f0356309c9b241eeeaa27ba859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 7 Mar 2026 09:47:53 +0100 Subject: [PATCH 047/127] tools/getdelays: use the static UAPI headers from tools/include/uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The include directory ../../usr/include is only present if an in-tree kernel build with CONFIG_HEADERS_INSTALL was done before. Otherwise the system UAPI headers are used, which most likely are not the most recent ones. To make sure to always have access to up-to-date UAPI headers, use the static copy in tools/include/uapi. Link: https://lkml.kernel.org/r/20260307-accounting-taskstats-h-v1-2-0b75915c6ce5@weissschuh.net Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202603062103.Z5fecwZD-lkp@intel.com/ Signed-off-by: Thomas Weißschuh Cc: Arnd Bergmann Cc: Balbir Singh Cc: Jiang Kun Cc: Wang Yaxin Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- tools/accounting/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile index 20bbd461515e..007c0bb8cbbb 100644 --- a/tools/accounting/Makefile +++ b/tools/accounting/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CC := $(CROSS_COMPILE)gcc -CFLAGS := -I../../usr/include +CFLAGS := -I../include/uapi/ PROGS := getdelays procacct delaytop From 7aa89307fcbff038a13c3891e2e94674adc59237 Mon Sep 17 00:00:00 2001 From: Alexey Velichayshiy Date: Sun, 8 Mar 2026 02:47:53 +0300 Subject: [PATCH 048/127] ocfs2: remove redundant error code assignment Remove the error assignment for variable 'ret' during correct code execution. In subsequent execution, variable 'ret' is overwritten. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lkml.kernel.org/r/20260307234809.88421-1-a.velichayshiy@ispras.ru Signed-off-by: Alexey Velichayshiy Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/dlm/dlmmaster.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index eb62724bbe9b..93eff38fdadd 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -930,7 +930,6 @@ redo_request: if (blocked) goto wait; - ret = -EINVAL; dlm_node_iter_init(mle->vote_map, &iter); while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { ret = dlm_do_master_request(res, mle, nodenum); From 9003ec6f7f394943880618737d797a9f257e6e1e Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 8 Mar 2026 20:20:27 +0000 Subject: [PATCH 049/127] lib/ts_bm: fix integer overflow in pattern length calculation The ts_bm algorithm stores its good_shift[] table and pattern in a single allocation sized from the pattern length. If the good_shift[] size calculation wraps, the resulting allocation can be too small and subsequent pattern copies can overflow it. Fix this by rejecting zero-length patterns and by using overflow helpers before calculating the combined allocation size. This fixes a potential heap overflow. The pattern length calculation can wrap during a size_t addition, leading to an undersized allocation. Because the textsearch library is reachable from userspace via Netfilter's xt_string module, this is a security risk that should be backported to LTS kernels. Link: https://lkml.kernel.org/r/20260308202028.2889285-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Cc: Signed-off-by: Andrew Morton --- lib/ts_bm.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/ts_bm.c b/lib/ts_bm.c index eed5967238c5..676105e84005 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -163,8 +163,22 @@ static struct ts_config *bm_init(const void *pattern, unsigned int len, struct ts_config *conf; struct ts_bm *bm; int i; - unsigned int prefix_tbl_len = len * sizeof(unsigned int); - size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; + unsigned int prefix_tbl_len; + size_t priv_size; + + /* Zero-length patterns would underflow bm_find()'s initial shift. */ + if (unlikely(!len)) + return ERR_PTR(-EINVAL); + + /* + * bm->pattern is stored immediately after the good_shift[] table. + * Reject lengths that would wrap while sizing either region. + */ + if (unlikely(check_mul_overflow(len, sizeof(*bm->good_shift), + &prefix_tbl_len) || + check_add_overflow(sizeof(*bm), (size_t)len, &priv_size) || + check_add_overflow(priv_size, prefix_tbl_len, &priv_size))) + return ERR_PTR(-EINVAL); conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) From 8cdf30813ea8ce881cecc08664144416dbdb3e16 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Sun, 8 Mar 2026 20:20:28 +0000 Subject: [PATCH 050/127] lib/ts_kmp: fix integer overflow in pattern length calculation The ts_kmp algorithm stores its prefix_tbl[] table and pattern in a single allocation sized from the pattern length. If the prefix_tbl[] size calculation wraps, the resulting allocation can be too small and subsequent pattern copies can overflow it. Fix this by rejecting zero-length patterns and by using overflow helpers before calculating the combined allocation size. This fixes a potential heap overflow. The pattern length calculation can wrap during a size_t addition, leading to an undersized allocation. Because the textsearch library is reachable from userspace via Netfilter's xt_string module, this is a security risk that should be backported to LTS kernels. Link: https://lkml.kernel.org/r/20260308202028.2889285-2-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Cc: Signed-off-by: Andrew Morton --- lib/ts_kmp.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index 5520dc28255a..29466c1803c9 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -94,8 +94,22 @@ static struct ts_config *kmp_init(const void *pattern, unsigned int len, struct ts_config *conf; struct ts_kmp *kmp; int i; - unsigned int prefix_tbl_len = len * sizeof(unsigned int); - size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; + unsigned int prefix_tbl_len; + size_t priv_size; + + /* Zero-length patterns would make kmp_find() read beyond kmp->pattern. */ + if (unlikely(!len)) + return ERR_PTR(-EINVAL); + + /* + * kmp->pattern is stored immediately after the prefix_tbl[] table. + * Reject lengths that would wrap while sizing either region. + */ + if (unlikely(check_mul_overflow(len, sizeof(*kmp->prefix_tbl), + &prefix_tbl_len) || + check_add_overflow(sizeof(*kmp), (size_t)len, &priv_size) || + check_add_overflow(priv_size, prefix_tbl_len, &priv_size))) + return ERR_PTR(-EINVAL); conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) From 5a1292137e89c8c4b12076b17427eb00f788a4ed Mon Sep 17 00:00:00 2001 From: Aleksei Oladko Date: Mon, 9 Mar 2026 20:51:45 +0000 Subject: [PATCH 051/127] selftests: fix ARCH normalization to handle command-line argument Several selftests Makefiles (e.g. prctl, breakpoints, etc) attempt to normalize the ARCH variable by converting x86_64 and i.86 to x86. However, it uses the conditional assignment operator '?='. When ARCH is passed as a command-line argument (e.g., during an rpmbuild process), the '?=' operator ignores the shell command and the sed transformation. This leads to an incorrect ARCH value being used, which causes build failures # make -C tools/testing/selftests TARGETS=prctl ARCH=x86_64 make: Entering directory '/build/tools/testing/selftests' make[1]: Entering directory '/build/tools/testing/selftests/prctl' make[1]: *** No targets. Stop. make[1]: Leaving directory '/build/tools/testing/selftests/prctl' make: *** [Makefile:197: all] Error 2 Change the assignment to use 'override' and ':=' to ensure the normalization logic is applied regardless of how the ARCH variable was initially defined. Link: https://lkml.kernel.org/r/20260309205145.572778-1-aleksey.oladko@virtuozzo.com Signed-off-by: Aleksei Oladko Cc: Shuah Khan Cc: Wei Yang Cc: Bala-Vignesh-Reddy Cc: Chelsy Ratnawat Signed-off-by: Andrew Morton --- tools/testing/selftests/breakpoints/Makefile | 4 ++-- tools/testing/selftests/ipc/Makefile | 8 ++++---- tools/testing/selftests/prctl/Makefile | 4 ++-- tools/testing/selftests/sparc64/Makefile | 4 ++-- .../testing/selftests/thermal/intel/power_floor/Makefile | 4 ++-- .../selftests/thermal/intel/workload_hint/Makefile | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index 9ec2c78de8ca..0b8f5acf7c78 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Taken from perf makefile -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) TEST_GEN_PROGS := step_after_suspend_test diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile index 50e9c299fc4a..fad10f2bb57b 100644 --- a/tools/testing/selftests/ipc/Makefile +++ b/tools/testing/selftests/ipc/Makefile @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/i.86/i386/) ifeq ($(ARCH),i386) - ARCH := x86 + override ARCH := x86 CFLAGS := -DCONFIG_X86_32 -D__i386__ endif ifeq ($(ARCH),x86_64) - ARCH := x86 + override ARCH := x86 CFLAGS := -DCONFIG_X86_64 -D__x86_64__ endif diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile index 01dc90fbb509..e770e86fad9a 100644 --- a/tools/testing/selftests/prctl/Makefile +++ b/tools/testing/selftests/prctl/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ifeq ($(ARCH),x86) TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \ diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile index a19531dba4dc..88f7be76f962 100644 --- a/tools/testing/selftests/sparc64/Makefile +++ b/tools/testing/selftests/sparc64/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/x86_64/x86/) ifneq ($(ARCH),sparc64) nothing: diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile index 9b88e57dbba5..07463c2160e0 100644 --- a/tools/testing/selftests/thermal/intel/power_floor/Makefile +++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ifeq ($(ARCH),x86) TEST_GEN_PROGS := power_floor_test diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile index 37ff3286283b..49e3bc2b20f9 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/Makefile +++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +override ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ifeq ($(ARCH),x86) TEST_GEN_PROGS := workload_hint_test From ecfad171221447f3fe53f3d634765ef15d3e7232 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 6 Mar 2026 09:50:16 +0900 Subject: [PATCH 052/127] decode_stacktrace: decode caller address Decode the caller address instead of the return address by default. This also introduced -R option to provide return address decoding mode. This changes the decode_stacktrace.sh to decode the line info 1byte before the return address which will be the call(branch) instruction address. If the return address is a symbol address (zero offset from it), it falls back to decoding the return address. This improves results especially when optimizations have changed the order of the lines around the return address, or when the return address does not have the actual line information. With this change; Call Trace: dump_stack_lvl (lib/dump_stack.c:94 lib/dump_stack.c:120) lockdep_rcu_suspicious (kernel/locking/lockdep.c:6876) event_filter_pid_sched_process_fork (kernel/trace/trace_events.c:1057) kernel_clone (include/trace/events/sched.h:396 include/trace/events/sched.h:396 kernel/fork.c:2664) __x64_sys_clone (kernel/fork.c:2795 kernel/fork.c:2779 kernel/fork.c:2779) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) ? entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) ? trace_irq_disable (include/trace/events/preemptirq.h:36) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) Without this (or give -R option); Call Trace: dump_stack_lvl (lib/dump_stack.c:122) lockdep_rcu_suspicious (kernel/locking/lockdep.c:6877) event_filter_pid_sched_process_fork (kernel/trace/trace_events.c:?) kernel_clone (include/trace/events/sched.h:? include/trace/events/sched.h:396 kernel/fork.c:2664) __x64_sys_clone (kernel/fork.c:2779) do_syscall_64 (arch/x86/entry/syscall_64.c:?) ? entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) ? trace_irq_disable (include/trace/events/preemptirq.h:36) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [akpm@linux-foundation.org: fix spello] Link: https://lkml.kernel.org/r/177275821652.1557019.18367881408364381866.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Matthieu Baerts (NGI0) Tested-by: Luca Ceresoli [arm64] Cc: Carlos Llamas Cc: Sasha Levin (Microsoft) Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 8d01b741de62..39d60d477bf3 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -5,9 +5,11 @@ usage() { echo "Usage:" - echo " $0 -r " - echo " $0 [ [|auto []]]" + echo " $0 [-R] -r " + echo " $0 [-R] [ [|auto []]]" echo " $0 -h" + echo "Options:" + echo " -R: decode return address instead of caller address." } # Try to find a Rust demangler @@ -33,11 +35,17 @@ fi READELF=${UTIL_PREFIX}readelf${UTIL_SUFFIX} ADDR2LINE=${UTIL_PREFIX}addr2line${UTIL_SUFFIX} NM=${UTIL_PREFIX}nm${UTIL_SUFFIX} +decode_retaddr=false if [[ $1 == "-h" ]] ; then usage exit 0 -elif [[ $1 == "-r" ]] ; then +elif [[ $1 == "-R" ]] ; then + decode_retaddr=true + shift 1 +fi + +if [[ $1 == "-r" ]] ; then vmlinux="" basepath="auto" modpath="" @@ -176,13 +184,23 @@ parse_symbol() { # Let's start doing the math to get the exact address into the # symbol. First, strip out the symbol total length. local expr=${symbol%/*} + # Also parse the offset from symbol. + local offset=${expr#*+} + offset=$((offset)) # Now, replace the symbol name with the base address we found # before. expr=${expr/$name/0x$base_addr} # Evaluate it to find the actual address - expr=$((expr)) + # The stack trace shows the return address, which is the next + # instruction after the actual call, so as long as it's in the same + # symbol, subtract one from that to point the call instruction. + if [[ $decode_retaddr == false && $offset != 0 ]]; then + expr=$((expr-1)) + else + expr=$((expr)) + fi local address=$(printf "%x\n" "$expr") # Pass it to addr2line to get filename and line number From d1db4118489fffd2b2f612140b7acbb477880839 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 11 Mar 2026 17:58:17 -0400 Subject: [PATCH 053/127] checkpatch: add support for Assisted-by tag The Assisted-by tag was introduced in Documentation/process/coding-assistants.rst for attributing AI tool contributions to kernel patches. However, checkpatch.pl did not recognize this tag, causing two issues: WARNING: Non-standard signature: Assisted-by: ERROR: Unrecognized email address: 'AGENT_NAME:MODEL_VERSION' Fix this by: 1. Adding Assisted-by to the recognized $signature_tags list 2. Skipping email validation for Assisted-by lines since they use the AGENT_NAME:MODEL_VERSION format instead of an email address 3. Warning when the Assisted-by value doesn't match the expected format Link: https://lkml.kernel.org/r/20260311215818.518930-1-sashal@kernel.org Signed-off-by: Sasha Levin Reported-by: Bart Van Assche Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e56374662ff7..27a43a4d9c43 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -641,6 +641,7 @@ our $signature_tags = qr{(?xi: Reviewed-by:| Reported-by:| Suggested-by:| + Assisted-by:| To:| Cc: )}; @@ -3105,6 +3106,15 @@ sub process { } } + # Assisted-by uses AGENT_NAME:MODEL_VERSION format, not email + if ($sign_off =~ /^Assisted-by:/i) { + if ($email !~ /^\S+:\S+/) { + WARN("BAD_SIGN_OFF", + "Assisted-by expects 'AGENT_NAME:MODEL_VERSION [TOOL1] [TOOL2]' format\n" . $herecurr); + } + next; + } + my ($email_name, $name_comment, $email_address, $comment) = parse_email($email); my $suggested_email = format_email(($email_name, $name_comment, $email_address, $comment)); if ($suggested_email eq "") { From c9ba82624b684679b37c5062b697c85f932089b9 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Thu, 12 Mar 2026 21:52:49 +0000 Subject: [PATCH 054/127] lib/glob: initialize back_str to silence uninitialized variable warning back_str is only used when back_pat is non-NULL, and both are always set together, so it is safe in practice. Initialize back_str to NULL to make this safety invariant explicit and silence compiler/static analysis warnings. Link: https://lkml.kernel.org/r/20260312215249.50165-1-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton --- lib/glob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/glob.c b/lib/glob.c index 69311568ad3d..7aca76c25bcb 100644 --- a/lib/glob.c +++ b/lib/glob.c @@ -47,7 +47,7 @@ bool __pure glob_match(char const *pat, char const *str) * (no exception for /), it can be easily proved that there's * never a need to backtrack multiple levels. */ - char const *back_pat = NULL, *back_str; + char const *back_pat = NULL, *back_str = NULL; /* * Loop over each token (character or class) in pat, matching From 8b140131bf341704ca8f56c9e1a69a27d3ba225d Mon Sep 17 00:00:00 2001 From: Hisam Mehboob Date: Thu, 12 Mar 2026 06:17:42 +0500 Subject: [PATCH 055/127] CREDITS: simplify the end-of-file alphabetical order comment The existing comment references specific individuals by name which becomes outdated as new entries are added. Simplify it to state the alphabetical ordering rule clearly without depending on who happens to be the last entry. Link: https://lkml.kernel.org/r/20260312011741.846664-2-hisamshar@gmail.com Signed-off-by: Hisam Mehboob Suggested-by: Randy Dunlap Acked-by: Randy Dunlap Cc: Jakub Kacinski Cc: Simon Horman Signed-off-by: Andrew Morton --- CREDITS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/CREDITS b/CREDITS index 9091bac3d2da..97d829455045 100644 --- a/CREDITS +++ b/CREDITS @@ -4560,8 +4560,5 @@ D: MD driver D: EISA/sysfs subsystem S: France -# Don't add your name here, unless you really _are_ after Marc -# alphabetically. Leonard used to be very proud of being the -# last entry, and he'll get positively pissed if he can't even -# be second-to-last. (and this file really _is_ supposed to be -# in alphabetic order) +# Don't add your name here unless you really are last alphabetically. +# (This file is supposed to be kept in alphabetical order by last name.) From 231bb8c1be040044f14ed74b30e4e1a26db550f8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2026 13:42:43 -0700 Subject: [PATCH 056/127] kernel/crash: remove inclusion of crypto/sha1.h Several files related to kernel crash dumps include crypto/sha1.h but never use any of its functionality. Remove these includes so that these files don't unnecessarily come up in searches for which kernel code is still using the obsolete SHA-1 algorithm. Link: https://lkml.kernel.org/r/20260314204243.45001-1-ebiggers@kernel.org Signed-off-by: Eric Biggers Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_core.c | 2 -- kernel/crash_reserve.c | 2 -- kernel/vmcore_info.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 2c1a3791e410..4f21fc3b108b 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -27,8 +27,6 @@ #include #include -#include - #include "kallsyms_internal.h" #include "kexec_internal.h" diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index 62e60e0223cf..eee37a11380c 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -20,8 +20,6 @@ #include #include -#include - #include "kallsyms_internal.h" #include "kexec_internal.h" diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index 8d82913223a1..36772126385c 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -18,8 +18,6 @@ #include #include -#include - #include "kallsyms_internal.h" #include "kexec_internal.h" From ea2976032df9d520ef97d71f12dafdedf9a72ed8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2026 13:41:44 -0700 Subject: [PATCH 057/127] kernel/kexec: remove inclusion of crypto/hash.h kexec_core.c does not do any cryptographic hashing, so the header crypto/hash.h is not needed at all. Link: https://lkml.kernel.org/r/20260314204144.44884-1-ebiggers@kernel.org Signed-off-by: Eric Biggers Cc: Baoquan He Signed-off-by: Andrew Morton --- kernel/kexec_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 2fea396d29b9..a43d2da0fe3e 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -47,7 +47,6 @@ #include #include -#include #include "kexec_internal.h" atomic_t __kexec_lock = ATOMIC_INIT(0); From 3e811cae321904c111f3e963b165c1eb0bc17ae0 Mon Sep 17 00:00:00 2001 From: Mayank Rungta Date: Thu, 12 Mar 2026 16:22:02 -0700 Subject: [PATCH 058/127] watchdog: return early in watchdog_hardlockup_check() Patch series "watchdog/hardlockup: Improvements to hardlockup", v2. This series addresses limitations in the hardlockup detector implementations and updates the documentation to reflect actual behavior and recent changes. The changes are structured as follows: Refactoring (Patch 1) ===================== Patch 1 refactors watchdog_hardlockup_check() to return early if no lockup is detected. This reduces the indentation level of the main logic block, serving as a clean base for the subsequent changes. Hardlockup Detection Improvements (Patches 2 & 4) ================================================= The hardlockup detector logic relies on updating saved interrupt counts to determine if the CPU is making progress. Patch 1 ensures that the saved interrupt count is updated unconditionally before checking the "touched" flag. This prevents stale comparisons which can delay detection. This is a logic fix that ensures the detector remains accurate even when the watchdog is frequently touched. Patch 3 improves the Buddy detector's timeliness. The current checking interval (every 3rd sample) causes high variability in detection time (up to 24s). This patch changes the Buddy detector to check at every hrtimer interval (4s) with a missed-interrupt threshold of 3, narrowing the detection window to a consistent 8-12 second range. Documentation Updates (Patches 3 & 5) ===================================== The current documentation does not fully capture the variable nature of detection latency or the details of the Buddy system. Patch 3 removes the strict "10 seconds" definition of a hardlockup, which was misleading given the periodic nature of the detector. It adds a "Detection Overhead" section to the admin guide, using "Best Case" and "Worst Case" scenarios to illustrate that detection time can vary significantly (e.g., ~6s to ~20s). Patch 5 adds a dedicated section for the Buddy detector, which was previously undocumented. It details the mechanism, the new timing logic, and known limitations. This patch (of 5): Invert the `is_hardlockup(cpu)` check in `watchdog_hardlockup_check()` to return early when a hardlockup is not detected. This flattens the main logic block, reducing the indentation level and making the code easier to read and maintain. This refactoring serves as a preparation patch for future hardlockup changes. Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-0-45bd8a0cc7ed@google.com Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-1-45bd8a0cc7ed@google.com Signed-off-by: Mayank Rungta Reviewed-by: Douglas Anderson Reviewed-by: Petr Mladek Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- kernel/watchdog.c | 127 +++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 63 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7d675781bc91..4c5b47495745 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -187,6 +187,8 @@ static void watchdog_hardlockup_kick(void) void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { int hardlockup_all_cpu_backtrace; + unsigned int this_cpu; + unsigned long flags; if (per_cpu(watchdog_hardlockup_touched, cpu)) { per_cpu(watchdog_hardlockup_touched, cpu) = false; @@ -201,74 +203,73 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) * fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ - if (is_hardlockup(cpu)) { - unsigned int this_cpu = smp_processor_id(); - unsigned long flags; + if (!is_hardlockup(cpu)) { + per_cpu(watchdog_hardlockup_warned, cpu) = false; + return; + } #ifdef CONFIG_SYSFS - ++hardlockup_count; + ++hardlockup_count; #endif - /* - * A poorly behaving BPF scheduler can trigger hard lockup by - * e.g. putting numerous affinitized tasks in a single queue and - * directing all CPUs at it. The following call can return true - * only once when sched_ext is enabled and will immediately - * abort the BPF scheduler and print out a warning message. - */ - if (scx_hardlockup(cpu)) + /* + * A poorly behaving BPF scheduler can trigger hard lockup by + * e.g. putting numerous affinitized tasks in a single queue and + * directing all CPUs at it. The following call can return true + * only once when sched_ext is enabled and will immediately + * abort the BPF scheduler and print out a warning message. + */ + if (scx_hardlockup(cpu)) + return; + + /* Only print hardlockups once. */ + if (per_cpu(watchdog_hardlockup_warned, cpu)) + return; + + /* + * Prevent multiple hard-lockup reports if one cpu is already + * engaged in dumping all cpu back traces. + */ + if (hardlockup_all_cpu_backtrace) { + if (test_and_set_bit_lock(0, &hard_lockup_nmi_warn)) return; - - /* Only print hardlockups once. */ - if (per_cpu(watchdog_hardlockup_warned, cpu)) - return; - - /* - * Prevent multiple hard-lockup reports if one cpu is already - * engaged in dumping all cpu back traces. - */ - if (hardlockup_all_cpu_backtrace) { - if (test_and_set_bit_lock(0, &hard_lockup_nmi_warn)) - return; - } - - /* - * NOTE: we call printk_cpu_sync_get_irqsave() after printing - * the lockup message. While it would be nice to serialize - * that printout, we really want to make sure that if some - * other CPU somehow locked up while holding the lock associated - * with printk_cpu_sync_get_irqsave() that we can still at least - * get the message about the lockup out. - */ - pr_emerg("CPU%u: Watchdog detected hard LOCKUP on cpu %u\n", this_cpu, cpu); - printk_cpu_sync_get_irqsave(flags); - - print_modules(); - print_irqtrace_events(current); - if (cpu == this_cpu) { - if (regs) - show_regs(regs); - else - dump_stack(); - printk_cpu_sync_put_irqrestore(flags); - } else { - printk_cpu_sync_put_irqrestore(flags); - trigger_single_cpu_backtrace(cpu); - } - - if (hardlockup_all_cpu_backtrace) { - trigger_allbutcpu_cpu_backtrace(cpu); - if (!hardlockup_panic) - clear_bit_unlock(0, &hard_lockup_nmi_warn); - } - - sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT); - if (hardlockup_panic) - nmi_panic(regs, "Hard LOCKUP"); - - per_cpu(watchdog_hardlockup_warned, cpu) = true; - } else { - per_cpu(watchdog_hardlockup_warned, cpu) = false; } + + /* + * NOTE: we call printk_cpu_sync_get_irqsave() after printing + * the lockup message. While it would be nice to serialize + * that printout, we really want to make sure that if some + * other CPU somehow locked up while holding the lock associated + * with printk_cpu_sync_get_irqsave() that we can still at least + * get the message about the lockup out. + */ + this_cpu = smp_processor_id(); + pr_emerg("CPU%u: Watchdog detected hard LOCKUP on cpu %u\n", this_cpu, cpu); + printk_cpu_sync_get_irqsave(flags); + + print_modules(); + print_irqtrace_events(current); + if (cpu == this_cpu) { + if (regs) + show_regs(regs); + else + dump_stack(); + printk_cpu_sync_put_irqrestore(flags); + } else { + printk_cpu_sync_put_irqrestore(flags); + trigger_single_cpu_backtrace(cpu); + } + + if (hardlockup_all_cpu_backtrace) { + trigger_allbutcpu_cpu_backtrace(cpu); + if (!hardlockup_panic) + clear_bit_unlock(0, &hard_lockup_nmi_warn); + } + + sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT); + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + + per_cpu(watchdog_hardlockup_warned, cpu) = true; } #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ From 746bb7fa915c33f8a0560764709a228f352999e8 Mon Sep 17 00:00:00 2001 From: Mayank Rungta Date: Thu, 12 Mar 2026 16:22:03 -0700 Subject: [PATCH 059/127] watchdog: update saved interrupts during check Currently, arch_touch_nmi_watchdog() causes an early return that skips updating hrtimer_interrupts_saved. This leads to stale comparisons and delayed lockup detection. I found this issue because in our system the serial console is fairly chatty. For example, the 8250 console driver frequently calls touch_nmi_watchdog() via console_write(). If a CPU locks up after a timer interrupt but before next watchdog check, we see the following sequence: * watchdog_hardlockup_check() saves counter (e.g., 1000) * Timer runs and updates the counter (1001) * touch_nmi_watchdog() is called * CPU locks up * 10s pass: check() notices touch, returns early, skips update * 10s pass: check() saves counter (1001) * 10s pass: check() finally detects lockup This delays detection to 30 seconds. With this fix, we detect the lockup in 20 seconds. Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-2-45bd8a0cc7ed@google.com Signed-off-by: Mayank Rungta Reviewed-by: Douglas Anderson Reviewed-by: Petr Mladek Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- kernel/watchdog.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4c5b47495745..431c540bd035 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -159,21 +159,28 @@ void watchdog_hardlockup_touch_cpu(unsigned int cpu) per_cpu(watchdog_hardlockup_touched, cpu) = true; } -static bool is_hardlockup(unsigned int cpu) +static void watchdog_hardlockup_update(unsigned int cpu) { int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); - if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) - return true; - /* * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is * written/read by a single CPU. */ per_cpu(hrtimer_interrupts_saved, cpu) = hrint; +} - return false; +static bool is_hardlockup(unsigned int cpu) +{ + int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); + + if (per_cpu(hrtimer_interrupts_saved, cpu) != hrint) { + watchdog_hardlockup_update(cpu); + return false; + } + + return true; } static void watchdog_hardlockup_kick(void) @@ -191,6 +198,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) unsigned long flags; if (per_cpu(watchdog_hardlockup_touched, cpu)) { + watchdog_hardlockup_update(cpu); per_cpu(watchdog_hardlockup_touched, cpu) = false; return; } From de832583435c3500d94d8aea16fb998021fc1a57 Mon Sep 17 00:00:00 2001 From: Mayank Rungta Date: Thu, 12 Mar 2026 16:22:04 -0700 Subject: [PATCH 060/127] doc: watchdog: clarify hardlockup detection timing The current documentation implies that a hardlockup is strictly defined as looping for "more than 10 seconds." However, the detection mechanism is periodic (based on `watchdog_thresh`), meaning detection time varies significantly depending on when the lockup occurs relative to the NMI perf event. Update the definition to remove the strict "more than 10 seconds" constraint in the introduction and defer details to the Implementation section. Additionally, add a "Detection Overhead" section illustrating the Best Case (~6s) and Worst Case (~20s) detection scenarios to provide administrators with a clearer understanding of the watchdog's latency. Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-3-45bd8a0cc7ed@google.com Signed-off-by: Mayank Rungta Reviewed-by: Petr Mladek Reviewed-by: Douglas Anderson Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- .../admin-guide/lockup-watchdogs.rst | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst index 3e09284a8b9b..1b374053771f 100644 --- a/Documentation/admin-guide/lockup-watchdogs.rst +++ b/Documentation/admin-guide/lockup-watchdogs.rst @@ -16,7 +16,7 @@ details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are provided for this. A 'hardlockup' is defined as a bug that causes the CPU to loop in -kernel mode for more than 10 seconds (see "Implementation" below for +kernel mode for several seconds (see "Implementation" below for details), without letting other interrupts have a chance to run. Similarly to the softlockup case, the current stack trace is displayed upon detection and the system will stay locked up unless the default @@ -64,6 +64,45 @@ administrators to configure the period of the hrtimer and the perf event. The right value for a particular environment is a trade-off between fast response to lockups and detection overhead. +Detection Overhead +------------------ + +The hardlockup detector checks for lockups using a periodic NMI perf +event. This means the time to detect a lockup can vary depending on +when the lockup occurs relative to the NMI check window. + +**Best Case:** +In the best case scenario, the lockup occurs just before the first +heartbeat is due. The detector will notice the missing hrtimer +interrupt almost immediately during the next check. + +:: + + Time 100.0: cpu 1 heartbeat + Time 100.1: hardlockup_check, cpu1 stores its state + Time 103.9: Hard Lockup on cpu1 + Time 104.0: cpu 1 heartbeat never comes + Time 110.1: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup + + Time to detection: ~6 seconds + +**Worst Case:** +In the worst case scenario, the lockup occurs shortly after a valid +interrupt (heartbeat) which itself happened just after the NMI check. +The next NMI check sees that the interrupt count has changed (due to +that one heartbeat), assumes the CPU is healthy, and resets the +baseline. The lockup is only detected at the subsequent check. + +:: + + Time 100.0: hardlockup_check, cpu1 stores its state + Time 100.1: cpu 1 heartbeat + Time 100.2: Hard Lockup on cpu1 + Time 110.0: hardlockup_check, cpu1 stores its state (misses lockup as state changed) + Time 120.0: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup + + Time to detection: ~20 seconds + By default, the watchdog runs on all online cores. However, on a kernel configured with NO_HZ_FULL, by default the watchdog runs only on the housekeeping cores, not the cores specified in the "nohz_full" From 077ba03600faea5f2aa15afbb83580878cc8b500 Mon Sep 17 00:00:00 2001 From: Mayank Rungta Date: Thu, 12 Mar 2026 16:22:05 -0700 Subject: [PATCH 061/127] watchdog/hardlockup: improve buddy system detection timeliness Currently, the buddy system only performs checks every 3rd sample. With a 4-second interval. If a check window is missed, the next check occurs 12 seconds later, potentially delaying hard lockup detection for up to 24 seconds. Modify the buddy system to perform checks at every interval (4s). Introduce a missed-interrupt threshold to maintain the existing grace period while reducing the detection window to 8-12 seconds. Best and worst case detection scenarios: Before (12s check window): - Best case: Lockup occurs after first check but just before heartbeat interval. Detected in ~8s (8s till next check). - Worst case: Lockup occurs just after a check. Detected in ~24s (missed check + 12s till next check + 12s logic). After (4s check window with threshold of 3): - Best case: Lockup occurs just before a check. Detected in ~8s (0s till 1st check + 4s till 2nd + 4s till 3rd). - Worst case: Lockup occurs just after a check. Detected in ~12s (4s till 1st check + 4s till 2nd + 4s till 3rd). Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-4-45bd8a0cc7ed@google.com Signed-off-by: Mayank Rungta Reviewed-by: Douglas Anderson Reviewed-by: Petr Mladek Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- include/linux/nmi.h | 1 + kernel/watchdog.c | 19 ++++++++++++++++--- kernel/watchdog_buddy.c | 9 +-------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 207156f2143c..bc1162895f35 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -21,6 +21,7 @@ void lockup_detector_soft_poweroff(void); extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern int watchdog_hardlockup_miss_thresh; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 431c540bd035..87dd5e0f6968 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -60,6 +60,13 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); int __read_mostly sysctl_hardlockup_all_cpu_backtrace; # endif /* CONFIG_SMP */ +/* + * Number of consecutive missed interrupts before declaring a lockup. + * Default to 1 (immediate) for NMI/Perf. Buddy will overwrite this to 3. + */ +int __read_mostly watchdog_hardlockup_miss_thresh = 1; +EXPORT_SYMBOL_GPL(watchdog_hardlockup_miss_thresh); + /* * Should we panic when a soft-lockup or hard-lockup occurs: */ @@ -137,6 +144,7 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(int, hrtimer_interrupts_missed); static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched); static unsigned long hard_lockup_nmi_warn; @@ -159,7 +167,7 @@ void watchdog_hardlockup_touch_cpu(unsigned int cpu) per_cpu(watchdog_hardlockup_touched, cpu) = true; } -static void watchdog_hardlockup_update(unsigned int cpu) +static void watchdog_hardlockup_update_reset(unsigned int cpu) { int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); @@ -169,6 +177,7 @@ static void watchdog_hardlockup_update(unsigned int cpu) * written/read by a single CPU. */ per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + per_cpu(hrtimer_interrupts_missed, cpu) = 0; } static bool is_hardlockup(unsigned int cpu) @@ -176,10 +185,14 @@ static bool is_hardlockup(unsigned int cpu) int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); if (per_cpu(hrtimer_interrupts_saved, cpu) != hrint) { - watchdog_hardlockup_update(cpu); + watchdog_hardlockup_update_reset(cpu); return false; } + per_cpu(hrtimer_interrupts_missed, cpu)++; + if (per_cpu(hrtimer_interrupts_missed, cpu) % watchdog_hardlockup_miss_thresh) + return false; + return true; } @@ -198,7 +211,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) unsigned long flags; if (per_cpu(watchdog_hardlockup_touched, cpu)) { - watchdog_hardlockup_update(cpu); + watchdog_hardlockup_update_reset(cpu); per_cpu(watchdog_hardlockup_touched, cpu) = false; return; } diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c index ee754d767c21..3a1e57080c1c 100644 --- a/kernel/watchdog_buddy.c +++ b/kernel/watchdog_buddy.c @@ -21,6 +21,7 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) int __init watchdog_hardlockup_probe(void) { + watchdog_hardlockup_miss_thresh = 3; return 0; } @@ -86,14 +87,6 @@ void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) { unsigned int next_cpu; - /* - * Test for hardlockups every 3 samples. The sample period is - * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over - * watchdog_thresh (over by 20%). - */ - if (hrtimer_interrupts % 3 != 0) - return; - /* check for a hardlockup on the next CPU */ next_cpu = watchdog_next_cpu(smp_processor_id()); if (next_cpu >= nr_cpu_ids) From cb8615f3cb00210a27237e1c97cefe3aaf27f0cb Mon Sep 17 00:00:00 2001 From: Mayank Rungta Date: Thu, 12 Mar 2026 16:22:06 -0700 Subject: [PATCH 062/127] doc: watchdog: document buddy detector The current documentation generalizes the hardlockup detector as primarily NMI-perf-based and lacks details on the SMP "Buddy" detector. Update the documentation to add a detailed description of the Buddy detector, and also restructure the "Implementation" section to explicitly separate "Softlockup Detector", "Hardlockup Detector (NMI/Perf)", and "Hardlockup Detector (Buddy)". Clarify that the softlockup hrtimer acts as the heartbeat generator for both hardlockup mechanisms and centralize the configuration details in a "Frequency and Heartbeats" section. Link: https://lkml.kernel.org/r/20260312-hardlockup-watchdog-fixes-v2-5-45bd8a0cc7ed@google.com Signed-off-by: Mayank Rungta Reviewed-by: Douglas Anderson Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Petr Mladek Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- .../admin-guide/lockup-watchdogs.rst | 149 ++++++++++++------ 1 file changed, 101 insertions(+), 48 deletions(-) diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst index 1b374053771f..7ae7ce3abd2c 100644 --- a/Documentation/admin-guide/lockup-watchdogs.rst +++ b/Documentation/admin-guide/lockup-watchdogs.rst @@ -30,22 +30,23 @@ timeout is set through the confusingly named "kernel.panic" sysctl), to cause the system to reboot automatically after a specified amount of time. +Configuration +============= + +A kernel knob is provided that allows administrators to configure +this period. The "watchdog_thresh" parameter (default 10 seconds) +controls the threshold. The right value for a particular environment +is a trade-off between fast response to lockups and detection overhead. + Implementation ============== -The soft and hard lockup detectors are built on top of the hrtimer and -perf subsystems, respectively. A direct consequence of this is that, -in principle, they should work in any architecture where these -subsystems are present. +The soft lockup detector is built on top of the hrtimer subsystem. +The hard lockup detector is built on top of the perf subsystem +(on architectures that support it) or uses an SMP "buddy" system. -A periodic hrtimer runs to generate interrupts and kick the watchdog -job. An NMI perf event is generated every "watchdog_thresh" -(compile-time initialized to 10 and configurable through sysctl of the -same name) seconds to check for hardlockups. If any CPU in the system -does not receive any hrtimer interrupt during that time the -'hardlockup detector' (the handler for the NMI perf event) will -generate a kernel warning or call panic, depending on the -configuration. +Softlockup Detector +------------------- The watchdog job runs in a stop scheduling thread that updates a timestamp every time it is scheduled. If that timestamp is not updated @@ -55,53 +56,105 @@ will dump useful debug information to the system log, after which it will call panic if it was instructed to do so or resume execution of other kernel code. -The period of the hrtimer is 2*watchdog_thresh/5, which means it has -two or three chances to generate an interrupt before the hardlockup -detector kicks in. +Frequency and Heartbeats +------------------------ -As explained above, a kernel knob is provided that allows -administrators to configure the period of the hrtimer and the perf -event. The right value for a particular environment is a trade-off -between fast response to lockups and detection overhead. +The hrtimer used by the softlockup detector serves a dual purpose: +it detects softlockups, and it also generates the interrupts +(heartbeats) that the hardlockup detectors use to verify CPU liveness. -Detection Overhead ------------------- +The period of this hrtimer is 2*watchdog_thresh/5. This means the +hrtimer has two or three chances to generate an interrupt before the +NMI hardlockup detector kicks in. -The hardlockup detector checks for lockups using a periodic NMI perf -event. This means the time to detect a lockup can vary depending on -when the lockup occurs relative to the NMI check window. +Hardlockup Detector (NMI/Perf) +------------------------------ -**Best Case:** -In the best case scenario, the lockup occurs just before the first -heartbeat is due. The detector will notice the missing hrtimer -interrupt almost immediately during the next check. +On architectures that support NMI (Non-Maskable Interrupt) perf events, +a periodic NMI is generated every "watchdog_thresh" seconds. -:: +If any CPU in the system does not receive any hrtimer interrupt +(heartbeat) during the "watchdog_thresh" window, the 'hardlockup +detector' (the handler for the NMI perf event) will generate a kernel +warning or call panic. - Time 100.0: cpu 1 heartbeat - Time 100.1: hardlockup_check, cpu1 stores its state - Time 103.9: Hard Lockup on cpu1 - Time 104.0: cpu 1 heartbeat never comes - Time 110.1: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup +**Detection Overhead (NMI):** - Time to detection: ~6 seconds +The time to detect a lockup can vary depending on when the lockup +occurs relative to the NMI check window. Examples below assume a watchdog_thresh of 10. -**Worst Case:** -In the worst case scenario, the lockup occurs shortly after a valid -interrupt (heartbeat) which itself happened just after the NMI check. -The next NMI check sees that the interrupt count has changed (due to -that one heartbeat), assumes the CPU is healthy, and resets the -baseline. The lockup is only detected at the subsequent check. +* **Best Case:** The lockup occurs just before the first heartbeat is + due. The detector will notice the missing hrtimer interrupt almost + immediately during the next check. -:: + :: - Time 100.0: hardlockup_check, cpu1 stores its state - Time 100.1: cpu 1 heartbeat - Time 100.2: Hard Lockup on cpu1 - Time 110.0: hardlockup_check, cpu1 stores its state (misses lockup as state changed) - Time 120.0: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup + Time 100.0: cpu 1 heartbeat + Time 100.1: hardlockup_check, cpu1 stores its state + Time 103.9: Hard Lockup on cpu1 + Time 104.0: cpu 1 heartbeat never comes + Time 110.1: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup - Time to detection: ~20 seconds + Time to detection: ~6 seconds + +* **Worst Case:** The lockup occurs shortly after a valid interrupt + (heartbeat) which itself happened just after the NMI check. The next + NMI check sees that the interrupt count has changed (due to that one + heartbeat), assumes the CPU is healthy, and resets the baseline. The + lockup is only detected at the subsequent check. + + :: + + Time 100.0: hardlockup_check, cpu1 stores its state + Time 100.1: cpu 1 heartbeat + Time 100.2: Hard Lockup on cpu1 + Time 110.0: hardlockup_check, cpu1 stores its state (misses lockup as state changed) + Time 120.0: hardlockup_check, cpu1 checks the state again, should be the same, declares lockup + + Time to detection: ~20 seconds + +Hardlockup Detector (Buddy) +--------------------------- + +On architectures or configurations where NMI perf events are not +available (or disabled), the kernel may use the "buddy" hardlockup +detector. This mechanism requires SMP (Symmetric Multi-Processing). + +In this mode, each CPU is assigned a "buddy" CPU to monitor. The +monitoring CPU runs its own hrtimer (the same one used for softlockup +detection) and checks if the buddy CPU's hrtimer interrupt count has +increased. + +To ensure timeliness and avoid false positives, the buddy system performs +checks at every hrtimer interval (2*watchdog_thresh/5, which is 4 seconds +by default). It uses a missed-interrupt threshold of 3. If the buddy's +interrupt count has not changed for 3 consecutive checks, it is assumed +that the buddy CPU is hardlocked (interrupts disabled). The monitoring +CPU will then trigger the hardlockup response (warning or panic). + +**Detection Overhead (Buddy):** + +With a default check interval of 4 seconds (watchdog_thresh = 10): + +* **Best case:** Lockup occurs just before a check. + Detected in ~8s (0s till 1st check + 4s till 2nd + 4s till 3rd). +* **Worst case:** Lockup occurs just after a check. + Detected in ~12s (4s till 1st check + 4s till 2nd + 4s till 3rd). + +**Limitations of the Buddy Detector:** + +1. **All-CPU Lockup:** If all CPUs lock up simultaneously, the buddy + detector cannot detect the condition because the monitoring CPUs + are also frozen. +2. **Stack Traces:** Unlike the NMI detector, the buddy detector + cannot directly interrupt the locked CPU to grab a stack trace. + It relies on architecture-specific mechanisms (like NMI backtrace + support) to try and retrieve the status of the locked CPU. If + such support is missing, the log may only show that a lockup + occurred without providing the locked CPU's stack. + +Watchdog Core Exclusion +======================= By default, the watchdog runs on all online cores. However, on a kernel configured with NO_HZ_FULL, by default the watchdog runs only From 4580900fe184dcb2a4e32e28951a3aa0726dc48b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 23 Mar 2026 18:21:38 +0100 Subject: [PATCH 063/127] doc: watchdog: futher improvements Make further additions and alterations to the watchdog documentation. Link: https://lkml.kernel.org/r/acF3tXBxSr0KOP9b@pathway.suse.cz Signed-off-by: Petr Mladek Reviewed-by: Douglas Anderson Cc: Ian Rogers Cc: Jonathan Corbet Cc: Li Huafei Cc: Max Kellermann Cc: Mayank Rungta Cc: Shuah Khan Cc: Stephane Erainan Cc: Wang Jinchao Cc: Yunhui Cui Signed-off-by: Andrew Morton --- .../admin-guide/lockup-watchdogs.rst | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst index 7ae7ce3abd2c..26634982e4dd 100644 --- a/Documentation/admin-guide/lockup-watchdogs.rst +++ b/Documentation/admin-guide/lockup-watchdogs.rst @@ -41,31 +41,35 @@ is a trade-off between fast response to lockups and detection overhead. Implementation ============== -The soft lockup detector is built on top of the hrtimer subsystem. -The hard lockup detector is built on top of the perf subsystem -(on architectures that support it) or uses an SMP "buddy" system. - -Softlockup Detector -------------------- - -The watchdog job runs in a stop scheduling thread that updates a -timestamp every time it is scheduled. If that timestamp is not updated -for 2*watchdog_thresh seconds (the softlockup threshold) the -'softlockup detector' (coded inside the hrtimer callback function) -will dump useful debug information to the system log, after which it -will call panic if it was instructed to do so or resume execution of -other kernel code. +The soft and hard lockup detectors are built around a hrtimer. +In addition, the softlockup detector regularly schedules a job, and +the hard lockup detector might use Perf/NMI events on architectures +that support it. Frequency and Heartbeats ------------------------ -The hrtimer used by the softlockup detector serves a dual purpose: -it detects softlockups, and it also generates the interrupts -(heartbeats) that the hardlockup detectors use to verify CPU liveness. +The core of the detectors in a hrtimer. It servers multiple purpose: -The period of this hrtimer is 2*watchdog_thresh/5. This means the -hrtimer has two or three chances to generate an interrupt before the -NMI hardlockup detector kicks in. +- schedules watchdog job for the softlockup detector +- bumps the interrupt counter for hardlockup detectors (heartbeat) +- detects softlockups +- detects hardlockups in Buddy mode + +The period of this hrtimer is 2*watchdog_thresh/5, which is 4 seconds +by default. The hrtimer has two or three chances to generate an interrupt +(heartbeat) before the hardlockup detector kicks in. + +Softlockup Detector +------------------- + +The watchdog job is scheduled by the hrtimer and runs in a stop scheduling +thread. It updates a timestamp every time it is scheduled. If that timestamp +is not updated for 2*watchdog_thresh seconds (the softlockup threshold) the +'softlockup detector' (coded inside the hrtimer callback function) +will dump useful debug information to the system log, after which it +will call panic if it was instructed to do so or resume execution of +other kernel code. Hardlockup Detector (NMI/Perf) ------------------------------ From 0f8e38eeb995bf818a7f220de109d6b6b96038b4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 17 Mar 2026 14:58:18 +0100 Subject: [PATCH 064/127] do_notify_parent: sanitize the valid_signal() checks Now that kernel_clone() checks valid_signal(args->exit_signal), the "sig" argument of do_notify_parent() must always be valid or we have a bug. However, do_notify_parent() only checks that sig != -1 at the start, then it does another valid_signal() check before __send_signal_locked(). This is confusing. Change do_notify_parent() to WARN and return early if valid_signal(sig) is false. Link: https://lkml.kernel.org/r/abld-ilvMEZ7VgMw@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Deepanshu Kartikey Signed-off-by: Andrew Morton --- kernel/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 86aad7badb9a..683ef92f7234 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2171,7 +2171,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) bool autoreap = false; u64 utime, stime; - WARN_ON_ONCE(sig == -1); + if (WARN_ON_ONCE(!valid_signal(sig))) + return false; /* do_notify_parent_cldstop should have been called instead. */ WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); @@ -2252,7 +2253,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) * Send with __send_signal as si_pid and si_uid are in the * parent's namespaces. */ - if (valid_signal(sig) && sig) + if (sig) __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); From b8822d73d6fe0d43de3b98ccc995b7032993b1b7 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 18 Mar 2026 15:05:45 +0000 Subject: [PATCH 065/127] scripts/decodecode: return 0 on success The decodecode script always returns an exit code of 1, regardless of whether the operation was successful or not. This is because the "cleanup" function, which is registered to run on any script exit via "trap cleanup EXIT", contains an unconditional "exit 1". Remove the "exit 1" from the "cleanup" function so that it only performs the necessary file cleanup without forcing a non-zero exit status. Do that to ensure successful script executions now exit with code 0. Exits due to errors are all handled by the "die()" function and will still correctly exit with code 1. Link: https://lkml.kernel.org/r/20260318150545.2809311-1-derkling@google.com Signed-off-by: Patrick Bellasi Acked-by: Randy Dunlap Signed-off-by: Andrew Morton --- scripts/decodecode | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/decodecode b/scripts/decodecode index 6364218b2178..01d25dc110de 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -12,7 +12,6 @@ faultlinenum=1 cleanup() { rm -f $T $T.s $T.o $T.oo $T.aa $T.dis - exit 1 } die() { @@ -49,7 +48,7 @@ done if [ -z "$code" ]; then rm $T - exit + die "Code line not found" fi echo $code From f42b510990030bbc0d6e4ba4486decbcd952b924 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Wed, 18 Mar 2026 07:48:05 +0000 Subject: [PATCH 066/127] lib/bch: fix signed left-shift undefined behavior Patch series "lib/bch: fix undefined behavior from signed left-shifts". Fix two instances of undefined behavior in lib/bch.c caused by left-shifting signed integers into or past the sign bit. While the kernel's -fno-strict-overflow flag prevents miscompilation today, these are formally UB per C11 6.5.7p4 and trivial to fix. This patch (of 2): Use 1u instead of 1 to avoid undefined behavior when left-shifting into the sign bit of a signed int. deg() can return up to 31, and 1 << 31 is UB per C11. Link: https://lkml.kernel.org/r/20260318074806.16527-2-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Cc: Ivan Djelic Signed-off-by: Andrew Morton --- lib/bch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bch.c b/lib/bch.c index 9561c0828802..ef733f08082f 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -392,7 +392,7 @@ static void compute_syndromes(struct bch_control *bch, uint32_t *ecc, for (j = 0; j < 2*t; j += 2) syn[j] ^= a_pow(bch, (j+1)*(i+s)); - poly ^= (1 << i); + poly ^= (1u << i); } } while (s > 0); @@ -612,7 +612,7 @@ static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly, while (v) { i = deg(v); r ^= bch->xi_tab[i]; - v ^= (1 << i); + v ^= (1u << i); } /* verify root */ if ((gf_sqr(bch, r)^r) == u) { From 010d7d9846504f97da649595c61985137ca67cd4 Mon Sep 17 00:00:00 2001 From: Josh Law Date: Wed, 18 Mar 2026 07:48:06 +0000 Subject: [PATCH 067/127] lib/bch: fix signed shift overflow in build_mod8_tables Cast loop variable to unsigned int before left-shifting to avoid undefined behavior when i >= 128 and b == 3 (i << 24 overflows signed int). Link: https://lkml.kernel.org/r/20260318074806.16527-3-objecting@objecting.org Signed-off-by: Josh Law Reviewed-by: Andrew Morton Cc: Ivan Djelic Signed-off-by: Andrew Morton --- lib/bch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bch.c b/lib/bch.c index ef733f08082f..c991c71c4cbd 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -1116,7 +1116,7 @@ static void build_mod8_tables(struct bch_control *bch, const uint32_t *g) for (b = 0; b < 4; b++) { /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */ tab = bch->mod8_tab + (b*256+i)*l; - data = i << (8*b); + data = (unsigned int)i << (8*b); while (data) { d = deg(data); /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */ From 7ab3fbb01bc6d79091bc375e5235d360cd9b78be Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Sat, 7 Mar 2026 15:21:08 +0800 Subject: [PATCH 068/127] ocfs2/dlm: validate qr_numregions in dlm_match_regions() Patch series "ocfs2/dlm: fix two bugs in dlm_match_regions()". In dlm_match_regions(), the qr_numregions field from a DLM_QUERY_REGION network message is used to drive loops over the qr_regions buffer without sufficient validation. This series fixes two issues: - Patch 1 adds a bounds check to reject messages where qr_numregions exceeds O2NM_MAX_REGIONS. The o2net layer only validates message byte length; it does not constrain field values, so a crafted message can set qr_numregions up to 255 and trigger out-of-bounds reads past the 1024-byte qr_regions buffer. - Patch 2 fixes an off-by-one in the local-vs-remote comparison loop, which uses '<=' instead of '<', reading one entry past the valid range even when qr_numregions is within bounds. This patch (of 2): The qr_numregions field from a DLM_QUERY_REGION network message is used directly as loop bounds in dlm_match_regions() without checking against O2NM_MAX_REGIONS. Since qr_regions is sized for at most O2NM_MAX_REGIONS (32) entries, a crafted message with qr_numregions > 32 causes out-of-bounds reads past the qr_regions buffer. Add a bounds check for qr_numregions before entering the loops. Link: https://lkml.kernel.org/r/SYBPR01MB7881A334D02ACEE5E0645801AF7BA@SYBPR01MB7881.ausprd01.prod.outlook.com Link: https://lkml.kernel.org/r/SYBPR01MB788166F524AD04E262E174BEAF7BA@SYBPR01MB7881.ausprd01.prod.outlook.com Fixes: ea2034416b54 ("ocfs2/dlm: Add message DLM_QUERY_REGION") Signed-off-by: Junrui Luo Reported-by: Yuhao Jiang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/dlm/dlmdomain.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 70ca79e4bdc3..0a28cb5ded2a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -980,6 +980,14 @@ static int dlm_match_regions(struct dlm_ctxt *dlm, goto bail; } + if (qr->qr_numregions > O2NM_MAX_REGIONS) { + mlog(ML_ERROR, "Domain %s: Joining node %d has invalid " + "number of heartbeat regions %u\n", + qr->qr_domain, qr->qr_node, qr->qr_numregions); + status = -EINVAL; + goto bail; + } + r = remote; for (i = 0; i < qr->qr_numregions; ++i) { mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); From 01b61e8dda9b0fdb0d4cda43de25f4e390554d7b Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Sat, 7 Mar 2026 15:21:09 +0800 Subject: [PATCH 069/127] ocfs2/dlm: fix off-by-one in dlm_match_regions() region comparison The local-vs-remote region comparison loop uses '<=' instead of '<', causing it to read one entry past the valid range of qr_regions. The other loops in the same function correctly use '<'. Fix the loop condition to use '<' for consistency and correctness. Link: https://lkml.kernel.org/r/SYBPR01MB78813DA26B50EC5E01F00566AF7BA@SYBPR01MB7881.ausprd01.prod.outlook.com Fixes: ea2034416b54 ("ocfs2/dlm: Add message DLM_QUERY_REGION") Signed-off-by: Junrui Luo Reported-by: Yuhao Jiang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/dlm/dlmdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0a28cb5ded2a..dc9da9133c8e 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1002,7 +1002,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm, for (i = 0; i < localnr; ++i) { foundit = 0; r = remote; - for (j = 0; j <= qr->qr_numregions; ++j) { + for (j = 0; j < qr->qr_numregions; ++j) { if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { foundit = 1; break; From 929cc1a53a6f5a09e78451ba359e870727751f9e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 24 Feb 2026 11:26:32 +0100 Subject: [PATCH 070/127] fork: zero vmap stack using clear_pages() instead of memset() After the introduction of clear_pages() we exploit the fact that the process vm_area is allocated in contiguous pages to just clear them all in one swift operation. Link: https://lkml.kernel.org/r/20260224-mm-fork-clear-pages-v1-1-184c65a72d49@kernel.org Signed-off-by: Linus Walleij Suggested-by: Mateusz Guzik Link: https://lore.kernel.org/linux-mm/dpnwsp7dl4535rd7qmszanw6u5an2p74uxfex4dh53frpb7pu3@2bnjjavjrepe/ Suggested-by: Pasha Tatashin Link: https://lore.kernel.org/20240311164638.2015063-7-pasha.tatashin@soleen.com Reviewed-by: Lorenzo Stoakes (Oracle) Cc: Ankur Arora Cc: Ben Segall Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: Liam Howlett Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 1e80d4cdf538..7fe5ab7d4ec6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -345,7 +345,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) stack = kasan_reset_tag(vm_area->addr); /* Clear stale pointers from reused stack. */ - memset(stack, 0, THREAD_SIZE); + clear_pages(vm_area->addr, vm_area->nr_pages); tsk->stack_vm_area = vm_area; tsk->stack = stack; From d8593b8f93541d9fef81130f2f33f25c47593094 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:33 +0100 Subject: [PATCH 071/127] xor: assert that xor_blocks is not call from interrupt context Patch series "cleanup the RAID5 XOR library", v4. The XOR library used for the RAID5 parity is a bit of a mess right now. The main file sits in crypto/ despite not being cryptography and not using the crypto API, with the generic implementations sitting in include/asm-generic and the arch implementations sitting in an asm/ header in theory. The latter doesn't work for many cases, so architectures often build the code directly into the core kernel, or create another module for the architecture code. Change this to a single module in lib/ that also contains the architecture optimizations, similar to the library work Eric Biggers has done for the CRC and crypto libraries later. After that it changes to better calling conventions that allow for smarter architecture implementations (although none is contained here yet), and uses static_call to avoid indirection function call overhead. This patch (of 27): Most of the optimized xor_blocks versions require FPU/vector registers, which generally are not supported in interrupt context. Both callers already are in user context, so enforce this at the highest level. Link: https://lkml.kernel.org/r/20260327061704.3707577-1-hch@lst.de Link: https://lkml.kernel.org/r/20260327061704.3707577-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- crypto/xor.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/xor.c b/crypto/xor.c index f39621a57bb3..df530ddc9f06 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -28,6 +28,8 @@ xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) { unsigned long *p1, *p2, *p3, *p4; + WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); + p1 = (unsigned long *) srcs[0]; if (src_count == 1) { active_template->do_2(bytes, dest, p1); From b7ca705758b956f78eb776fbc85c99ee501674a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:34 +0100 Subject: [PATCH 072/127] arm/xor: remove in_interrupt() handling xor_blocks can't be called from interrupt context, so remove the handling for that. Link: https://lkml.kernel.org/r/20260327061704.3707577-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/include/asm/xor.h | 41 +++++++++++--------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index 934b549905f5..bca2a6514746 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -4,7 +4,6 @@ * * Copyright (C) 2001 Russell King */ -#include #include #include #include @@ -156,13 +155,9 @@ static void xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { - if (in_interrupt()) { - xor_arm4regs_2(bytes, p1, p2); - } else { - kernel_neon_begin(); - xor_block_neon_inner.do_2(bytes, p1, p2); - kernel_neon_end(); - } + kernel_neon_begin(); + xor_block_neon_inner.do_2(bytes, p1, p2); + kernel_neon_end(); } static void @@ -170,13 +165,9 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { - if (in_interrupt()) { - xor_arm4regs_3(bytes, p1, p2, p3); - } else { - kernel_neon_begin(); - xor_block_neon_inner.do_3(bytes, p1, p2, p3); - kernel_neon_end(); - } + kernel_neon_begin(); + xor_block_neon_inner.do_3(bytes, p1, p2, p3); + kernel_neon_end(); } static void @@ -185,13 +176,9 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3, const unsigned long * __restrict p4) { - if (in_interrupt()) { - xor_arm4regs_4(bytes, p1, p2, p3, p4); - } else { - kernel_neon_begin(); - xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); - kernel_neon_end(); - } + kernel_neon_begin(); + xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); + kernel_neon_end(); } static void @@ -201,13 +188,9 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4, const unsigned long * __restrict p5) { - if (in_interrupt()) { - xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); - } else { - kernel_neon_begin(); - xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); - kernel_neon_end(); - } + kernel_neon_begin(); + xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); + kernel_neon_end(); } static struct xor_block_template xor_block_neon = { From 675a0dd596e712404557286d0a883b54ee28e4f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:35 +0100 Subject: [PATCH 073/127] arm64/xor: fix conflicting attributes for xor_block_template Commit 2c54b423cf85 ("arm64/xor: use EOR3 instructions when available") changes the definition to __ro_after_init instead of const, but failed to update the external declaration in xor.h. This was not found because xor-neon.c doesn't include , and can't easily do that due to current architecture of the XOR code. Link: https://lkml.kernel.org/r/20260327061704.3707577-4-hch@lst.de Fixes: 2c54b423cf85 ("arm64/xor: use EOR3 instructions when available") Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/include/asm/xor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index c38e3d017a79..bb7428d4ebc6 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -13,7 +13,7 @@ #ifdef CONFIG_KERNEL_MODE_NEON -extern struct xor_block_template const xor_block_inner_neon; +extern struct xor_block_template xor_block_inner_neon __ro_after_init; static void xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, From 3ea16a98518a39f85bcf62ce59c115c988e85457 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:36 +0100 Subject: [PATCH 074/127] um/xor: cleanup xor.h Since commit c055e3eae0f1 ("crypto: xor - use ktime for template benchmarking") the benchmarking works just fine even for TT_MODE_INFCPU, so drop the workarounds. Note that for CPUs supporting AVX2, which includes almost everything built in the last 10 years, the AVX2 implementation is forced anyway. CONFIG_X86_32 is always correctly set for UM in arch/x86/um/Kconfig, so don't override it either. Link: https://lkml.kernel.org/r/20260327061704.3707577-5-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Richard Weinberger Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/um/include/asm/xor.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h index 647fae200c5d..99e5c7e1f475 100644 --- a/arch/um/include/asm/xor.h +++ b/arch/um/include/asm/xor.h @@ -2,23 +2,7 @@ #ifndef _ASM_UM_XOR_H #define _ASM_UM_XOR_H -#ifdef CONFIG_64BIT -#undef CONFIG_X86_32 -#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64)) -#else -#define CONFIG_X86_32 1 -#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs)) -#endif - #include #include <../../x86/include/asm/xor.h> -#include - -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -#undef XOR_SELECT_TEMPLATE -/* pick an arbitrary one - measuring isn't possible with inf-cpu */ -#define XOR_SELECT_TEMPLATE(x) \ - (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x) -#endif #endif From 9e229025e2474115c151f08bdbdd3d8d5f159af3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:37 +0100 Subject: [PATCH 075/127] xor: move to lib/raid/ Move the RAID XOR code to lib/raid/ as it has nothing to do with the crypto API. Link: https://lkml.kernel.org/r/20260327061704.3707577-6-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- crypto/Kconfig | 2 -- crypto/Makefile | 1 - lib/Kconfig | 1 + lib/Makefile | 2 +- lib/raid/Kconfig | 4 ++++ lib/raid/Makefile | 3 +++ lib/raid/xor/Makefile | 5 +++++ crypto/xor.c => lib/raid/xor/xor-core.c | 0 8 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 lib/raid/Kconfig create mode 100644 lib/raid/Makefile create mode 100644 lib/raid/xor/Makefile rename crypto/xor.c => lib/raid/xor/xor-core.c (100%) diff --git a/crypto/Kconfig b/crypto/Kconfig index b4bb85e8e226..97d73136a0b4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -2,8 +2,6 @@ # # Generic algorithms support # -config XOR_BLOCKS - tristate # # async_tx api: hardware offloaded memory transfer/transform support diff --git a/crypto/Makefile b/crypto/Makefile index 04e269117589..795c2eea51fe 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -196,7 +196,6 @@ obj-$(CONFIG_CRYPTO_ECRDSA) += ecrdsa_generic.o # # generic algorithms and the async_tx api # -obj-$(CONFIG_XOR_BLOCKS) += xor.o obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ crypto_simd-y := simd.o diff --git a/lib/Kconfig b/lib/Kconfig index 2b0c56a53a2a..00a9509636c1 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -138,6 +138,7 @@ config TRACE_MMIO_ACCESS source "lib/crc/Kconfig" source "lib/crypto/Kconfig" +source "lib/raid/Kconfig" config XXHASH tristate diff --git a/lib/Makefile b/lib/Makefile index 60c9c9e79375..9ea149e618d2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -120,7 +120,7 @@ endif obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) -obj-y += math/ crc/ crypto/ tests/ vdso/ +obj-y += math/ crc/ crypto/ tests/ vdso/ raid/ obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o diff --git a/lib/raid/Kconfig b/lib/raid/Kconfig new file mode 100644 index 000000000000..01b73a1c303f --- /dev/null +++ b/lib/raid/Kconfig @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +config XOR_BLOCKS + tristate diff --git a/lib/raid/Makefile b/lib/raid/Makefile new file mode 100644 index 000000000000..3540fe846dc4 --- /dev/null +++ b/lib/raid/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += xor/ diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile new file mode 100644 index 000000000000..7bca0ce8e90a --- /dev/null +++ b/lib/raid/xor/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_XOR_BLOCKS) += xor.o + +xor-y += xor-core.o diff --git a/crypto/xor.c b/lib/raid/xor/xor-core.c similarity index 100% rename from crypto/xor.c rename to lib/raid/xor/xor-core.c From 7c6e6b2b48e8e9f3a1ad57dc78a8d33947cb5dda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:38 +0100 Subject: [PATCH 076/127] xor: small cleanups Update the to of file comment to be correct and non-redundant, and drop the unused BH_TRACE define. Link: https://lkml.kernel.org/r/20260327061704.3707577-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/raid/xor/xor-core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index df530ddc9f06..c54f48405c40 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -1,14 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * xor.c : Multiple Devices driver for Linux - * * Copyright (C) 1996, 1997, 1998, 1999, 2000, * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. * - * Dispatch optimized RAID-5 checksumming functions. + * Dispatch optimized XOR parity functions. */ -#define BH_TRACE 0 #include #include #include From 0471415f3fd6007bf435dbf158060bc646d7813f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:39 +0100 Subject: [PATCH 077/127] xor: cleanup registration and probing Originally, the XOR code benchmarked all algorithms at load time, but it has since then been hacked multiple times to allow forcing an algorithm, and then commit 524ccdbdfb52 ("crypto: xor - defer load time benchmark to a later time") changed the logic to a two-step process or registration and benchmarking, but only when built-in. Rework this, so that the XOR_TRY_TEMPLATES macro magic now always just deals with adding the templates to the list, and benchmarking is always done in a second pass; for modular builds from module_init, and for the built-in case using a separate init call level. Link: https://lkml.kernel.org/r/20260327061704.3707577-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/raid/xor/xor-core.c | 100 ++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index c54f48405c40..e6e593e404fb 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -52,29 +52,14 @@ EXPORT_SYMBOL(xor_blocks); /* Set of all registered templates. */ static struct xor_block_template *__initdata template_list; +static bool __initdata xor_forced = false; -#ifndef MODULE static void __init do_xor_register(struct xor_block_template *tmpl) { tmpl->next = template_list; template_list = tmpl; } -static int __init register_xor_blocks(void) -{ - active_template = XOR_SELECT_TEMPLATE(NULL); - - if (!active_template) { -#define xor_speed do_xor_register - // register all the templates and pick the first as the default - XOR_TRY_TEMPLATES; -#undef xor_speed - active_template = template_list; - } - return 0; -} -#endif - #define BENCH_SIZE 4096 #define REPS 800U @@ -85,9 +70,6 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) unsigned long reps; ktime_t min, start, t0; - tmpl->next = template_list; - template_list = tmpl; - preempt_disable(); reps = 0; @@ -111,63 +93,79 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); } -static int __init -calibrate_xor_blocks(void) +static int __init calibrate_xor_blocks(void) { void *b1, *b2; struct xor_block_template *f, *fastest; - fastest = XOR_SELECT_TEMPLATE(NULL); - - if (fastest) { - printk(KERN_INFO "xor: automatically using best " - "checksumming function %-10s\n", - fastest->name); - goto out; - } + if (xor_forced) + return 0; b1 = (void *) __get_free_pages(GFP_KERNEL, 2); if (!b1) { - printk(KERN_WARNING "xor: Yikes! No memory available.\n"); + pr_warn("xor: Yikes! No memory available.\n"); return -ENOMEM; } b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; - /* - * If this arch/cpu has a short-circuited selection, don't loop through - * all the possible functions, just test the best one - */ - -#define xor_speed(templ) do_xor_speed((templ), b1, b2) - - printk(KERN_INFO "xor: measuring software checksum speed\n"); - template_list = NULL; - XOR_TRY_TEMPLATES; + pr_info("xor: measuring software checksum speed\n"); fastest = template_list; - for (f = fastest; f; f = f->next) + for (f = template_list; f; f = f->next) { + do_xor_speed(f, b1, b2); if (f->speed > fastest->speed) fastest = f; - + } + active_template = fastest; pr_info("xor: using function: %s (%d MB/sec)\n", fastest->name, fastest->speed); -#undef xor_speed - free_pages((unsigned long)b1, 2); -out: - active_template = fastest; return 0; } -static __exit void xor_exit(void) { } +static int __init xor_init(void) +{ + /* + * If this arch/cpu has a short-circuited selection, don't loop through + * all the possible functions, just use the best one. + */ + active_template = XOR_SELECT_TEMPLATE(NULL); + if (active_template) { + pr_info("xor: automatically using best checksumming function %-10s\n", + active_template->name); + xor_forced = true; + return 0; + } + +#define xor_speed do_xor_register + XOR_TRY_TEMPLATES; +#undef xor_speed + +#ifdef MODULE + return calibrate_xor_blocks(); +#else + /* + * Pick the first template as the temporary default until calibration + * happens. + */ + active_template = template_list; + return 0; +#endif +} + +static __exit void xor_exit(void) +{ +} MODULE_DESCRIPTION("RAID-5 checksumming functions"); MODULE_LICENSE("GPL"); +/* + * When built-in we must register the default template before md, but we don't + * want calibration to run that early as that would delay the boot process. + */ #ifndef MODULE -/* when built-in xor.o must initialize before drivers/md/md.o */ -core_initcall(register_xor_blocks); +__initcall(calibrate_xor_blocks); #endif - -module_init(calibrate_xor_blocks); +core_initcall(xor_init); module_exit(xor_exit); From 54e20be48fd4bc1df5f6fbca552b5be8c47dbd18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:40 +0100 Subject: [PATCH 078/127] xor: split xor.h Keep xor.h for the public API, and split the struct xor_block_template definition that is only needed by the xor.ko core and architecture-specific optimizations into a separate xor_impl.h header. Link: https://lkml.kernel.org/r/20260327061704.3707577-9-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/lib/xor-neon.c | 1 + arch/arm64/lib/xor-neon.c | 1 + arch/s390/lib/xor.c | 2 +- include/linux/raid/xor.h | 22 +--------------------- include/linux/raid/xor_impl.h | 25 +++++++++++++++++++++++++ lib/raid/xor/xor-core.c | 1 + 6 files changed, 30 insertions(+), 22 deletions(-) create mode 100644 include/linux/raid/xor_impl.h diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index cf57fca97908..282980b9bf2a 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -6,6 +6,7 @@ */ #include +#include #include MODULE_DESCRIPTION("NEON accelerated XOR implementation"); diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index 8fffebfa17b2..351aba92d932 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index 5363e4c2462d..3bbe21b40e66 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 51b811b62322..02bda8d99534 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -7,24 +7,4 @@ extern void xor_blocks(unsigned int count, unsigned int bytes, void *dest, void **srcs); -struct xor_block_template { - struct xor_block_template *next; - const char *name; - int speed; - void (*do_2)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_3)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_4)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_5)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); -}; - -#endif +#endif /* _XOR_H */ diff --git a/include/linux/raid/xor_impl.h b/include/linux/raid/xor_impl.h new file mode 100644 index 000000000000..a1890cd66812 --- /dev/null +++ b/include/linux/raid/xor_impl.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XOR_IMPL_H +#define _XOR_IMPL_H + +struct xor_block_template { + struct xor_block_template *next; + const char *name; + int speed; + void (*do_2)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_3)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_4)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); + void (*do_5)(unsigned long, unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict, + const unsigned long * __restrict); +}; + +#endif /* _XOR_IMPL_H */ diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index e6e593e404fb..db1824011a12 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 35ebc4de105989034f1250e40eb6dbf5e136b04e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:41 +0100 Subject: [PATCH 079/127] xor: remove macro abuse for XOR implementation registrations Drop the pretty confusing historic XOR_TRY_TEMPLATES and XOR_SELECT_TEMPLATE, and instead let the architectures provide a arch_xor_init that calls either xor_register to register candidates or xor_force to force a specific implementation. Link: https://lkml.kernel.org/r/20260327061704.3707577-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/include/asm/xor.h | 29 ++++++++++++---------- arch/arm/include/asm/xor.h | 25 +++++++++---------- arch/arm64/include/asm/xor.h | 18 +++++++------- arch/loongarch/include/asm/xor.h | 42 ++++++++++++-------------------- arch/powerpc/include/asm/xor.h | 31 ++++++++++------------- arch/riscv/include/asm/xor.h | 19 ++++++++------- arch/s390/include/asm/xor.h | 12 ++++----- arch/sparc/include/asm/xor_32.h | 14 +++++------ arch/sparc/include/asm/xor_64.h | 31 +++++++++++------------ arch/x86/include/asm/xor.h | 3 --- arch/x86/include/asm/xor_32.h | 36 ++++++++++++++------------- arch/x86/include/asm/xor_64.h | 18 ++++++++------ arch/x86/include/asm/xor_avx.h | 9 ------- include/asm-generic/xor.h | 8 ------ include/linux/raid/xor_impl.h | 5 ++++ lib/raid/xor/xor-core.c | 41 +++++++++++++++++++++++-------- 16 files changed, 168 insertions(+), 173 deletions(-) diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h index e0de0c233ab9..4c8085711df1 100644 --- a/arch/alpha/include/asm/xor.h +++ b/arch/alpha/include/asm/xor.h @@ -851,16 +851,19 @@ static struct xor_block_template xor_block_alpha_prefetch = { /* For grins, also test the generic routines. */ #include -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_alpha); \ - xor_speed(&xor_block_alpha_prefetch); \ - } while (0) - -/* Force the use of alpha_prefetch if EV6, as it is significantly - faster in the cold cache case. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) +/* + * Force the use of alpha_prefetch if EV6, as it is significantly faster in the + * cold cache case. + */ +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + if (implver() == IMPLVER_EV6) { + xor_force(&xor_block_alpha_prefetch); + } else { + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); + xor_register(&xor_block_alpha); + xor_register(&xor_block_alpha_prefetch); + } +} diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index bca2a6514746..b2dcd49186e2 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -138,15 +138,6 @@ static struct xor_block_template xor_block_arm4regs = { .do_5 = xor_arm4regs_5, }; -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_arm4regs); \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - NEON_TEMPLATES; \ - } while (0) - #ifdef CONFIG_KERNEL_MODE_NEON extern struct xor_block_template const xor_block_neon_inner; @@ -201,8 +192,16 @@ static struct xor_block_template xor_block_neon = { .do_5 = xor_neon_5 }; -#define NEON_TEMPLATES \ - do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) -#else -#define NEON_TEMPLATES +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_arm4regs); + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); +#ifdef CONFIG_KERNEL_MODE_NEON + if (cpu_has_neon()) + xor_register(&xor_block_neon); #endif +} diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index bb7428d4ebc6..3cee1eb86371 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -60,14 +60,14 @@ static struct xor_block_template xor_block_arm64 = { .do_4 = xor_neon_4, .do_5 = xor_neon_5 }; -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - if (cpu_has_neon()) { \ - xor_speed(&xor_block_arm64);\ - } \ - } while (0) + +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); + if (cpu_has_neon()) + xor_register(&xor_block_arm64); +} #endif /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h index 12467fffee46..d17c0e3b047f 100644 --- a/arch/loongarch/include/asm/xor.h +++ b/arch/loongarch/include/asm/xor.h @@ -16,14 +16,6 @@ static struct xor_block_template xor_block_lsx = { .do_4 = xor_lsx_4, .do_5 = xor_lsx_5, }; - -#define XOR_SPEED_LSX() \ - do { \ - if (cpu_has_lsx) \ - xor_speed(&xor_block_lsx); \ - } while (0) -#else /* CONFIG_CPU_HAS_LSX */ -#define XOR_SPEED_LSX() #endif /* CONFIG_CPU_HAS_LSX */ #ifdef CONFIG_CPU_HAS_LASX @@ -34,14 +26,6 @@ static struct xor_block_template xor_block_lasx = { .do_4 = xor_lasx_4, .do_5 = xor_lasx_5, }; - -#define XOR_SPEED_LASX() \ - do { \ - if (cpu_has_lasx) \ - xor_speed(&xor_block_lasx); \ - } while (0) -#else /* CONFIG_CPU_HAS_LASX */ -#define XOR_SPEED_LASX() #endif /* CONFIG_CPU_HAS_LASX */ /* @@ -54,15 +38,21 @@ static struct xor_block_template xor_block_lasx = { */ #include -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ -do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ - XOR_SPEED_LSX(); \ - XOR_SPEED_LASX(); \ -} while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_8regs_p); + xor_register(&xor_block_32regs); + xor_register(&xor_block_32regs_p); +#ifdef CONFIG_CPU_HAS_LSX + if (cpu_has_lsx) + xor_register(&xor_block_lsx); +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (cpu_has_lasx) + xor_register(&xor_block_lasx); +#endif +} #endif /* _ASM_LOONGARCH_XOR_H */ diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h index 37d05c11d09c..30224c5279c4 100644 --- a/arch/powerpc/include/asm/xor.h +++ b/arch/powerpc/include/asm/xor.h @@ -21,27 +21,22 @@ static struct xor_block_template xor_block_altivec = { .do_4 = xor_altivec_4, .do_5 = xor_altivec_5, }; - -#define XOR_SPEED_ALTIVEC() \ - do { \ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) \ - xor_speed(&xor_block_altivec); \ - } while (0) -#else -#define XOR_SPEED_ALTIVEC() -#endif +#endif /* CONFIG_ALTIVEC */ /* Also try the generic routines. */ #include -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ -do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ - XOR_SPEED_ALTIVEC(); \ -} while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_8regs_p); + xor_register(&xor_block_32regs); + xor_register(&xor_block_32regs_p); +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + xor_register(&xor_block_altivec); +#endif +} #endif /* _ASM_POWERPC_XOR_H */ diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h index 96011861e46b..ed5f27903efc 100644 --- a/arch/riscv/include/asm/xor.h +++ b/arch/riscv/include/asm/xor.h @@ -55,14 +55,15 @@ static struct xor_block_template xor_block_rvv = { .do_4 = xor_vector_4, .do_5 = xor_vector_5 }; +#endif /* CONFIG_RISCV_ISA_V */ -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - if (has_vector()) { \ - xor_speed(&xor_block_rvv);\ - } \ - } while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); +#ifdef CONFIG_RISCV_ISA_V + if (has_vector()) + xor_register(&xor_block_rvv); #endif +} diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index 857d6759b67f..4e2233f64da9 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h @@ -10,12 +10,10 @@ extern struct xor_block_template xor_block_xc; -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ -do { \ - xor_speed(&xor_block_xc); \ -} while (0) - -#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_force(&xor_block_xc); +} #endif /* _ASM_S390_XOR_H */ diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h index 0351813cf3af..8fbf0c07ec28 100644 --- a/arch/sparc/include/asm/xor_32.h +++ b/arch/sparc/include/asm/xor_32.h @@ -259,10 +259,10 @@ static struct xor_block_template xor_block_SPARC = { /* For grins, also test the generic routines. */ #include -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_SPARC); \ - } while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); + xor_register(&xor_block_SPARC); +} diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index caaddea8ad79..e0482ecc0a68 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -60,20 +60,17 @@ static struct xor_block_template xor_block_niagara = { .do_5 = xor_niagara_5, }; -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_VIS); \ - xor_speed(&xor_block_niagara); \ - } while (0) - -/* For VIS for everything except Niagara. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - ((tlb_type == hypervisor && \ - (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) ? \ - &xor_block_niagara : \ - &xor_block_VIS) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + /* Force VIS for everything except Niagara. */ + if (tlb_type == hypervisor && + (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) + xor_force(&xor_block_niagara); + else + xor_force(&xor_block_VIS); +} diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 7b0307acc410..33f5620d8d69 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -496,7 +496,4 @@ static struct xor_block_template xor_block_sse_pf64 = { # include #endif -#define XOR_SELECT_TEMPLATE(FASTEST) \ - AVX_SELECT(FASTEST) - #endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 7a6b9474591e..ee32d08c27bc 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -552,22 +552,24 @@ static struct xor_block_template xor_block_pIII_sse = { /* We force the use of the SSE xor block because it can write around L2. We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ -do { \ - AVX_XOR_SPEED; \ - if (boot_cpu_has(X86_FEATURE_XMM)) { \ - xor_speed(&xor_block_pIII_sse); \ - xor_speed(&xor_block_sse_pf64); \ - } else if (boot_cpu_has(X86_FEATURE_MMX)) { \ - xor_speed(&xor_block_pII_mmx); \ - xor_speed(&xor_block_p5_mmx); \ - } else { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ - } \ -} while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_OSXSAVE)) { + xor_force(&xor_block_avx); + } else if (boot_cpu_has(X86_FEATURE_XMM)) { + xor_register(&xor_block_pIII_sse); + xor_register(&xor_block_sse_pf64); + } else if (boot_cpu_has(X86_FEATURE_MMX)) { + xor_register(&xor_block_pII_mmx); + xor_register(&xor_block_p5_mmx); + } else { + xor_register(&xor_block_8regs); + xor_register(&xor_block_8regs_p); + xor_register(&xor_block_32regs); + xor_register(&xor_block_32regs_p); + } +} #endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index 0307e4ec5044..2d2ceb241866 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -17,12 +17,16 @@ static struct xor_block_template xor_block_sse = { /* We force the use of the SSE xor block because it can write around L2. We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ -#undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ -do { \ - AVX_XOR_SPEED; \ - xor_speed(&xor_block_sse_pf64); \ - xor_speed(&xor_block_sse); \ -} while (0) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_OSXSAVE)) { + xor_force(&xor_block_avx); + } else { + xor_register(&xor_block_sse_pf64); + xor_register(&xor_block_sse); + } +} #endif /* _ASM_X86_XOR_64_H */ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 7f81dd5897f4..c600888436bb 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -166,13 +166,4 @@ static struct xor_block_template xor_block_avx = { .do_5 = xor_avx_5, }; -#define AVX_XOR_SPEED \ -do { \ - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \ - xor_speed(&xor_block_avx); \ -} while (0) - -#define AVX_SELECT(FASTEST) \ - (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) - #endif diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h index 44509d48fca2..79c0096aa9d9 100644 --- a/include/asm-generic/xor.h +++ b/include/asm-generic/xor.h @@ -728,11 +728,3 @@ static struct xor_block_template xor_block_32regs_p __maybe_unused = { .do_4 = xor_32regs_p_4, .do_5 = xor_32regs_p_5, }; - -#define XOR_TRY_TEMPLATES \ - do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ - } while (0) diff --git a/include/linux/raid/xor_impl.h b/include/linux/raid/xor_impl.h index a1890cd66812..6ed4c445ab24 100644 --- a/include/linux/raid/xor_impl.h +++ b/include/linux/raid/xor_impl.h @@ -2,6 +2,8 @@ #ifndef _XOR_IMPL_H #define _XOR_IMPL_H +#include + struct xor_block_template { struct xor_block_template *next; const char *name; @@ -22,4 +24,7 @@ struct xor_block_template { const unsigned long * __restrict); }; +void __init xor_register(struct xor_block_template *tmpl); +void __init xor_force(struct xor_block_template *tmpl); + #endif /* _XOR_IMPL_H */ diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index db1824011a12..93608b5fece9 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -14,10 +14,6 @@ #include #include -#ifndef XOR_SELECT_TEMPLATE -#define XOR_SELECT_TEMPLATE(x) (x) -#endif - /* The xor routines to use. */ static struct xor_block_template *active_template; @@ -55,12 +51,33 @@ EXPORT_SYMBOL(xor_blocks); static struct xor_block_template *__initdata template_list; static bool __initdata xor_forced = false; -static void __init do_xor_register(struct xor_block_template *tmpl) +/** + * xor_register - register a XOR template + * @tmpl: template to register + * + * Register a XOR implementation with the core. Registered implementations + * will be measured by a trivial benchmark, and the fastest one is chosen + * unless an implementation is forced using xor_force(). + */ +void __init xor_register(struct xor_block_template *tmpl) { tmpl->next = template_list; template_list = tmpl; } +/** + * xor_force - force use of a XOR template + * @tmpl: template to register + * + * Register a XOR implementation with the core and force using it. Forcing + * an implementation will make the core ignore any template registered using + * xor_register(), or any previous implementation forced using xor_force(). + */ +void __init xor_force(struct xor_block_template *tmpl) +{ + active_template = tmpl; +} + #define BENCH_SIZE 4096 #define REPS 800U @@ -126,11 +143,19 @@ static int __init calibrate_xor_blocks(void) static int __init xor_init(void) { +#ifdef arch_xor_init + arch_xor_init(); +#else + xor_register(&xor_block_8regs); + xor_register(&xor_block_8regs_p); + xor_register(&xor_block_32regs); + xor_register(&xor_block_32regs_p); +#endif + /* * If this arch/cpu has a short-circuited selection, don't loop through * all the possible functions, just use the best one. */ - active_template = XOR_SELECT_TEMPLATE(NULL); if (active_template) { pr_info("xor: automatically using best checksumming function %-10s\n", active_template->name); @@ -138,10 +163,6 @@ static int __init xor_init(void) return 0; } -#define xor_speed do_xor_register - XOR_TRY_TEMPLATES; -#undef xor_speed - #ifdef MODULE return calibrate_xor_blocks(); #else From c46928fdcfa01b6ca422e9c2b49e8a1072260cf3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:42 +0100 Subject: [PATCH 080/127] xor: move generic implementations out of asm-generic/xor.h Move the generic implementations from asm-generic/xor.h to per-implementaion .c files in lib/raid. This will build them unconditionally even when an architecture forces a specific implementation, but as we'll need at least one generic version for the static_call optimization later on we'll pay that price. Note that this would cause the second xor_block_8regs instance created by arch/arm/lib/xor-neon.c to be generated instead of discarded as dead code, so add a NO_TEMPLATE symbol to disable it for this case. Link: https://lkml.kernel.org/r/20260327061704.3707577-11-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/lib/xor-neon.c | 4 +- include/asm-generic/xor.h | 727 +---------------------------- lib/raid/xor/Makefile | 4 + lib/raid/xor/xor-32regs-prefetch.c | 268 +++++++++++ lib/raid/xor/xor-32regs.c | 219 +++++++++ lib/raid/xor/xor-8regs-prefetch.c | 146 ++++++ lib/raid/xor/xor-8regs.c | 105 +++++ 7 files changed, 748 insertions(+), 725 deletions(-) create mode 100644 lib/raid/xor/xor-32regs-prefetch.c create mode 100644 lib/raid/xor/xor-32regs.c create mode 100644 lib/raid/xor/xor-8regs-prefetch.c create mode 100644 lib/raid/xor/xor-8regs.c diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index 282980b9bf2a..b5be50567991 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -26,8 +26,8 @@ MODULE_LICENSE("GPL"); #pragma GCC optimize "tree-vectorize" #endif -#pragma GCC diagnostic ignored "-Wunused-variable" -#include +#define NO_TEMPLATE +#include "../../../lib/raid/xor/xor-8regs.c" struct xor_block_template const xor_block_neon_inner = { .name = "__inner_neon__", diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h index 79c0096aa9d9..fc151fdc45ab 100644 --- a/include/asm-generic/xor.h +++ b/include/asm-generic/xor.h @@ -5,726 +5,7 @@ * Generic optimized RAID-5 checksumming functions. */ -#include - -static void -xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - p1[0] ^= p2[0]; - p1[1] ^= p2[1]; - p1[2] ^= p2[2]; - p1[3] ^= p2[3]; - p1[4] ^= p2[4]; - p1[5] ^= p2[5]; - p1[6] ^= p2[6]; - p1[7] ^= p2[7]; - p1 += 8; - p2 += 8; - } while (--lines > 0); -} - -static void -xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - p1[0] ^= p2[0] ^ p3[0]; - p1[1] ^= p2[1] ^ p3[1]; - p1[2] ^= p2[2] ^ p3[2]; - p1[3] ^= p2[3] ^ p3[3]; - p1[4] ^= p2[4] ^ p3[4]; - p1[5] ^= p2[5] ^ p3[5]; - p1[6] ^= p2[6] ^ p3[6]; - p1[7] ^= p2[7] ^ p3[7]; - p1 += 8; - p2 += 8; - p3 += 8; - } while (--lines > 0); -} - -static void -xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; - p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; - p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; - p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; - p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; - p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; - p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; - p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - } while (--lines > 0); -} - -static void -xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; - p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; - p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; - p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; - p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; - p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; - p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; - p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - p5 += 8; - } while (--lines > 0); -} - -static void -xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - } while (--lines > 0); -} - -static void -xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - } while (--lines > 0); -} - -static void -xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - d0 ^= p4[0]; - d1 ^= p4[1]; - d2 ^= p4[2]; - d3 ^= p4[3]; - d4 ^= p4[4]; - d5 ^= p4[5]; - d6 ^= p4[6]; - d7 ^= p4[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - } while (--lines > 0); -} - -static void -xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - long lines = bytes / (sizeof (long)) / 8; - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - d0 ^= p4[0]; - d1 ^= p4[1]; - d2 ^= p4[2]; - d3 ^= p4[3]; - d4 ^= p4[4]; - d5 ^= p4[5]; - d6 ^= p4[6]; - d7 ^= p4[7]; - d0 ^= p5[0]; - d1 ^= p5[1]; - d2 ^= p5[2]; - d3 ^= p5[3]; - d4 ^= p5[4]; - d5 ^= p5[5]; - d6 ^= p5[6]; - d7 ^= p5[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - p5 += 8; - } while (--lines > 0); -} - -static void -xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - prefetchw(p1); - prefetch(p2); - - do { - prefetchw(p1+8); - prefetch(p2+8); - once_more: - p1[0] ^= p2[0]; - p1[1] ^= p2[1]; - p1[2] ^= p2[2]; - p1[3] ^= p2[3]; - p1[4] ^= p2[4]; - p1[5] ^= p2[5]; - p1[6] ^= p2[6]; - p1[7] ^= p2[7]; - p1 += 8; - p2 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - prefetchw(p1); - prefetch(p2); - prefetch(p3); - - do { - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - once_more: - p1[0] ^= p2[0] ^ p3[0]; - p1[1] ^= p2[1] ^ p3[1]; - p1[2] ^= p2[2] ^ p3[2]; - p1[3] ^= p2[3] ^ p3[3]; - p1[4] ^= p2[4] ^ p3[4]; - p1[5] ^= p2[5] ^ p3[5]; - p1[6] ^= p2[6] ^ p3[6]; - p1[7] ^= p2[7] ^ p3[7]; - p1 += 8; - p2 += 8; - p3 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - prefetch(p3); - prefetch(p4); - - do { - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - prefetch(p4+8); - once_more: - p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; - p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; - p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; - p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; - p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; - p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; - p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; - p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - prefetch(p3); - prefetch(p4); - prefetch(p5); - - do { - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - prefetch(p4+8); - prefetch(p5+8); - once_more: - p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; - p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; - p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; - p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; - p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; - p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; - p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; - p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - p5 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - - prefetchw(p1+8); - prefetch(p2+8); - once_more: - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - prefetch(p3); - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - once_more: - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - prefetch(p3); - prefetch(p4); - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - prefetch(p4+8); - once_more: - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - d0 ^= p4[0]; - d1 ^= p4[1]; - d2 ^= p4[2]; - d3 ^= p4[3]; - d4 ^= p4[4]; - d5 ^= p4[5]; - d6 ^= p4[6]; - d7 ^= p4[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static void -xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - long lines = bytes / (sizeof (long)) / 8 - 1; - - prefetchw(p1); - prefetch(p2); - prefetch(p3); - prefetch(p4); - prefetch(p5); - - do { - register long d0, d1, d2, d3, d4, d5, d6, d7; - - prefetchw(p1+8); - prefetch(p2+8); - prefetch(p3+8); - prefetch(p4+8); - prefetch(p5+8); - once_more: - d0 = p1[0]; /* Pull the stuff into registers */ - d1 = p1[1]; /* ... in bursts, if possible. */ - d2 = p1[2]; - d3 = p1[3]; - d4 = p1[4]; - d5 = p1[5]; - d6 = p1[6]; - d7 = p1[7]; - d0 ^= p2[0]; - d1 ^= p2[1]; - d2 ^= p2[2]; - d3 ^= p2[3]; - d4 ^= p2[4]; - d5 ^= p2[5]; - d6 ^= p2[6]; - d7 ^= p2[7]; - d0 ^= p3[0]; - d1 ^= p3[1]; - d2 ^= p3[2]; - d3 ^= p3[3]; - d4 ^= p3[4]; - d5 ^= p3[5]; - d6 ^= p3[6]; - d7 ^= p3[7]; - d0 ^= p4[0]; - d1 ^= p4[1]; - d2 ^= p4[2]; - d3 ^= p4[3]; - d4 ^= p4[4]; - d5 ^= p4[5]; - d6 ^= p4[6]; - d7 ^= p4[7]; - d0 ^= p5[0]; - d1 ^= p5[1]; - d2 ^= p5[2]; - d3 ^= p5[3]; - d4 ^= p5[4]; - d5 ^= p5[5]; - d6 ^= p5[6]; - d7 ^= p5[7]; - p1[0] = d0; /* Store the result (in bursts) */ - p1[1] = d1; - p1[2] = d2; - p1[3] = d3; - p1[4] = d4; - p1[5] = d5; - p1[6] = d6; - p1[7] = d7; - p1 += 8; - p2 += 8; - p3 += 8; - p4 += 8; - p5 += 8; - } while (--lines > 0); - if (lines == 0) - goto once_more; -} - -static struct xor_block_template xor_block_8regs = { - .name = "8regs", - .do_2 = xor_8regs_2, - .do_3 = xor_8regs_3, - .do_4 = xor_8regs_4, - .do_5 = xor_8regs_5, -}; - -static struct xor_block_template xor_block_32regs = { - .name = "32regs", - .do_2 = xor_32regs_2, - .do_3 = xor_32regs_3, - .do_4 = xor_32regs_4, - .do_5 = xor_32regs_5, -}; - -static struct xor_block_template xor_block_8regs_p __maybe_unused = { - .name = "8regs_prefetch", - .do_2 = xor_8regs_p_2, - .do_3 = xor_8regs_p_3, - .do_4 = xor_8regs_p_4, - .do_5 = xor_8regs_p_5, -}; - -static struct xor_block_template xor_block_32regs_p __maybe_unused = { - .name = "32regs_prefetch", - .do_2 = xor_32regs_p_2, - .do_3 = xor_32regs_p_3, - .do_4 = xor_32regs_p_4, - .do_5 = xor_32regs_p_5, -}; +extern struct xor_block_template xor_block_8regs; +extern struct xor_block_template xor_block_32regs; +extern struct xor_block_template xor_block_8regs_p; +extern struct xor_block_template xor_block_32regs_p; diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 7bca0ce8e90a..89a944c9f990 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -3,3 +3,7 @@ obj-$(CONFIG_XOR_BLOCKS) += xor.o xor-y += xor-core.o +xor-y += xor-8regs.o +xor-y += xor-32regs.o +xor-y += xor-8regs-prefetch.o +xor-y += xor-32regs-prefetch.o diff --git a/lib/raid/xor/xor-32regs-prefetch.c b/lib/raid/xor/xor-32regs-prefetch.c new file mode 100644 index 000000000000..8666c287f777 --- /dev/null +++ b/lib/raid/xor/xor-32regs-prefetch.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +static void +xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + + prefetchw(p1+8); + prefetch(p2+8); + once_more: + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + prefetch(p3); + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + once_more: + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + prefetch(p3); + prefetch(p4); + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + prefetch(p4+8); + once_more: + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + d0 ^= p4[0]; + d1 ^= p4[1]; + d2 ^= p4[2]; + d3 ^= p4[3]; + d4 ^= p4[4]; + d5 ^= p4[5]; + d6 ^= p4[6]; + d7 ^= p4[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + prefetch(p3); + prefetch(p4); + prefetch(p5); + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + prefetch(p4+8); + prefetch(p5+8); + once_more: + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + d0 ^= p4[0]; + d1 ^= p4[1]; + d2 ^= p4[2]; + d3 ^= p4[3]; + d4 ^= p4[4]; + d5 ^= p4[5]; + d6 ^= p4[6]; + d7 ^= p4[7]; + d0 ^= p5[0]; + d1 ^= p5[1]; + d2 ^= p5[2]; + d3 ^= p5[3]; + d4 ^= p5[4]; + d5 ^= p5[5]; + d6 ^= p5[6]; + d7 ^= p5[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + p5 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +struct xor_block_template xor_block_32regs_p = { + .name = "32regs_prefetch", + .do_2 = xor_32regs_p_2, + .do_3 = xor_32regs_p_3, + .do_4 = xor_32regs_p_4, + .do_5 = xor_32regs_p_5, +}; diff --git a/lib/raid/xor/xor-32regs.c b/lib/raid/xor/xor-32regs.c new file mode 100644 index 000000000000..58d4fac43eb4 --- /dev/null +++ b/lib/raid/xor/xor-32regs.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include + +static void +xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + } while (--lines > 0); +} + +static void +xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + } while (--lines > 0); +} + +static void +xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + d0 ^= p4[0]; + d1 ^= p4[1]; + d2 ^= p4[2]; + d3 ^= p4[3]; + d4 ^= p4[4]; + d5 ^= p4[5]; + d6 ^= p4[6]; + d7 ^= p4[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + } while (--lines > 0); +} + +static void +xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + register long d0, d1, d2, d3, d4, d5, d6, d7; + d0 = p1[0]; /* Pull the stuff into registers */ + d1 = p1[1]; /* ... in bursts, if possible. */ + d2 = p1[2]; + d3 = p1[3]; + d4 = p1[4]; + d5 = p1[5]; + d6 = p1[6]; + d7 = p1[7]; + d0 ^= p2[0]; + d1 ^= p2[1]; + d2 ^= p2[2]; + d3 ^= p2[3]; + d4 ^= p2[4]; + d5 ^= p2[5]; + d6 ^= p2[6]; + d7 ^= p2[7]; + d0 ^= p3[0]; + d1 ^= p3[1]; + d2 ^= p3[2]; + d3 ^= p3[3]; + d4 ^= p3[4]; + d5 ^= p3[5]; + d6 ^= p3[6]; + d7 ^= p3[7]; + d0 ^= p4[0]; + d1 ^= p4[1]; + d2 ^= p4[2]; + d3 ^= p4[3]; + d4 ^= p4[4]; + d5 ^= p4[5]; + d6 ^= p4[6]; + d7 ^= p4[7]; + d0 ^= p5[0]; + d1 ^= p5[1]; + d2 ^= p5[2]; + d3 ^= p5[3]; + d4 ^= p5[4]; + d5 ^= p5[5]; + d6 ^= p5[6]; + d7 ^= p5[7]; + p1[0] = d0; /* Store the result (in bursts) */ + p1[1] = d1; + p1[2] = d2; + p1[3] = d3; + p1[4] = d4; + p1[5] = d5; + p1[6] = d6; + p1[7] = d7; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + p5 += 8; + } while (--lines > 0); +} + +struct xor_block_template xor_block_32regs = { + .name = "32regs", + .do_2 = xor_32regs_2, + .do_3 = xor_32regs_3, + .do_4 = xor_32regs_4, + .do_5 = xor_32regs_5, +}; diff --git a/lib/raid/xor/xor-8regs-prefetch.c b/lib/raid/xor/xor-8regs-prefetch.c new file mode 100644 index 000000000000..67061e35a0a6 --- /dev/null +++ b/lib/raid/xor/xor-8regs-prefetch.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +static void +xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + prefetchw(p1); + prefetch(p2); + + do { + prefetchw(p1+8); + prefetch(p2+8); + once_more: + p1[0] ^= p2[0]; + p1[1] ^= p2[1]; + p1[2] ^= p2[2]; + p1[3] ^= p2[3]; + p1[4] ^= p2[4]; + p1[5] ^= p2[5]; + p1[6] ^= p2[6]; + p1[7] ^= p2[7]; + p1 += 8; + p2 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + prefetchw(p1); + prefetch(p2); + prefetch(p3); + + do { + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + once_more: + p1[0] ^= p2[0] ^ p3[0]; + p1[1] ^= p2[1] ^ p3[1]; + p1[2] ^= p2[2] ^ p3[2]; + p1[3] ^= p2[3] ^ p3[3]; + p1[4] ^= p2[4] ^ p3[4]; + p1[5] ^= p2[5] ^ p3[5]; + p1[6] ^= p2[6] ^ p3[6]; + p1[7] ^= p2[7] ^ p3[7]; + p1 += 8; + p2 += 8; + p3 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + prefetch(p3); + prefetch(p4); + + do { + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + prefetch(p4+8); + once_more: + p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; + p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; + p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; + p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; + p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; + p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; + p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; + p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +static void +xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + long lines = bytes / (sizeof (long)) / 8 - 1; + + prefetchw(p1); + prefetch(p2); + prefetch(p3); + prefetch(p4); + prefetch(p5); + + do { + prefetchw(p1+8); + prefetch(p2+8); + prefetch(p3+8); + prefetch(p4+8); + prefetch(p5+8); + once_more: + p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; + p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; + p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; + p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; + p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; + p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; + p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; + p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + p5 += 8; + } while (--lines > 0); + if (lines == 0) + goto once_more; +} + +struct xor_block_template xor_block_8regs_p = { + .name = "8regs_prefetch", + .do_2 = xor_8regs_p_2, + .do_3 = xor_8regs_p_3, + .do_4 = xor_8regs_p_4, + .do_5 = xor_8regs_p_5, +}; diff --git a/lib/raid/xor/xor-8regs.c b/lib/raid/xor/xor-8regs.c new file mode 100644 index 000000000000..769f796ab2cf --- /dev/null +++ b/lib/raid/xor/xor-8regs.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include + +static void +xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + p1[0] ^= p2[0]; + p1[1] ^= p2[1]; + p1[2] ^= p2[2]; + p1[3] ^= p2[3]; + p1[4] ^= p2[4]; + p1[5] ^= p2[5]; + p1[6] ^= p2[6]; + p1[7] ^= p2[7]; + p1 += 8; + p2 += 8; + } while (--lines > 0); +} + +static void +xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + p1[0] ^= p2[0] ^ p3[0]; + p1[1] ^= p2[1] ^ p3[1]; + p1[2] ^= p2[2] ^ p3[2]; + p1[3] ^= p2[3] ^ p3[3]; + p1[4] ^= p2[4] ^ p3[4]; + p1[5] ^= p2[5] ^ p3[5]; + p1[6] ^= p2[6] ^ p3[6]; + p1[7] ^= p2[7] ^ p3[7]; + p1 += 8; + p2 += 8; + p3 += 8; + } while (--lines > 0); +} + +static void +xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; + p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; + p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; + p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; + p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; + p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; + p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; + p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + } while (--lines > 0); +} + +static void +xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + long lines = bytes / (sizeof (long)) / 8; + + do { + p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; + p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; + p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; + p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; + p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; + p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; + p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; + p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; + p1 += 8; + p2 += 8; + p3 += 8; + p4 += 8; + p5 += 8; + } while (--lines > 0); +} + +#ifndef NO_TEMPLATE +struct xor_block_template xor_block_8regs = { + .name = "8regs", + .do_2 = xor_8regs_2, + .do_3 = xor_8regs_3, + .do_4 = xor_8regs_4, + .do_5 = xor_8regs_5, +}; +#endif /* NO_TEMPLATE */ From 503793b1340e3622f7464c2717a8e4f6f8424cca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:43 +0100 Subject: [PATCH 081/127] alpha: move the XOR code to lib/raid/ Move the optimized XOR code out of line into lib/raid. Note that the giant inline assembly block might be better off as a separate assembly source file now, but I'll leave that to the alpha maintainers. Link: https://lkml.kernel.org/r/20260327061704.3707577-12-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Magnus Lindholm Tested-by: Magnus Lindholm Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/include/asm/xor.h | 853 +---------------------------------- lib/raid/xor/Makefile | 2 + lib/raid/xor/alpha/xor.c | 849 ++++++++++++++++++++++++++++++++++ 3 files changed, 855 insertions(+), 849 deletions(-) create mode 100644 lib/raid/xor/alpha/xor.c diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h index 4c8085711df1..e517be577a09 100644 --- a/arch/alpha/include/asm/xor.h +++ b/arch/alpha/include/asm/xor.h @@ -1,856 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-alpha/xor.h - * - * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 - */ -extern void -xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -extern void -xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -extern void -xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -extern void -xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -extern void -xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -extern void -xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -extern void -xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -extern void -xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -asm(" \n\ - .text \n\ - .align 3 \n\ - .ent xor_alpha_2 \n\ -xor_alpha_2: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - .align 4 \n\ -2: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,8($17) \n\ - ldq $3,8($18) \n\ - \n\ - ldq $4,16($17) \n\ - ldq $5,16($18) \n\ - ldq $6,24($17) \n\ - ldq $7,24($18) \n\ - \n\ - ldq $19,32($17) \n\ - ldq $20,32($18) \n\ - ldq $21,40($17) \n\ - ldq $22,40($18) \n\ - \n\ - ldq $23,48($17) \n\ - ldq $24,48($18) \n\ - ldq $25,56($17) \n\ - xor $0,$1,$0 # 7 cycles from $1 load \n\ - \n\ - ldq $27,56($18) \n\ - xor $2,$3,$2 \n\ - stq $0,0($17) \n\ - xor $4,$5,$4 \n\ - \n\ - stq $2,8($17) \n\ - xor $6,$7,$6 \n\ - stq $4,16($17) \n\ - xor $19,$20,$19 \n\ - \n\ - stq $6,24($17) \n\ - xor $21,$22,$21 \n\ - stq $19,32($17) \n\ - xor $23,$24,$23 \n\ - \n\ - stq $21,40($17) \n\ - xor $25,$27,$25 \n\ - stq $23,48($17) \n\ - subq $16,1,$16 \n\ - \n\ - stq $25,56($17) \n\ - addq $17,64,$17 \n\ - addq $18,64,$18 \n\ - bgt $16,2b \n\ - \n\ - ret \n\ - .end xor_alpha_2 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_3 \n\ -xor_alpha_3: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - .align 4 \n\ -3: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,8($17) \n\ - \n\ - ldq $4,8($18) \n\ - ldq $6,16($17) \n\ - ldq $7,16($18) \n\ - ldq $21,24($17) \n\ - \n\ - ldq $22,24($18) \n\ - ldq $24,32($17) \n\ - ldq $25,32($18) \n\ - ldq $5,8($19) \n\ - \n\ - ldq $20,16($19) \n\ - ldq $23,24($19) \n\ - ldq $27,32($19) \n\ - nop \n\ - \n\ - xor $0,$1,$1 # 8 cycles from $0 load \n\ - xor $3,$4,$4 # 6 cycles from $4 load \n\ - xor $6,$7,$7 # 6 cycles from $7 load \n\ - xor $21,$22,$22 # 5 cycles from $22 load \n\ - \n\ - xor $1,$2,$2 # 9 cycles from $2 load \n\ - xor $24,$25,$25 # 5 cycles from $25 load \n\ - stq $2,0($17) \n\ - xor $4,$5,$5 # 6 cycles from $5 load \n\ - \n\ - stq $5,8($17) \n\ - xor $7,$20,$20 # 7 cycles from $20 load \n\ - stq $20,16($17) \n\ - xor $22,$23,$23 # 7 cycles from $23 load \n\ - \n\ - stq $23,24($17) \n\ - xor $25,$27,$27 # 7 cycles from $27 load \n\ - stq $27,32($17) \n\ - nop \n\ - \n\ - ldq $0,40($17) \n\ - ldq $1,40($18) \n\ - ldq $3,48($17) \n\ - ldq $4,48($18) \n\ - \n\ - ldq $6,56($17) \n\ - ldq $7,56($18) \n\ - ldq $2,40($19) \n\ - ldq $5,48($19) \n\ - \n\ - ldq $20,56($19) \n\ - xor $0,$1,$1 # 4 cycles from $1 load \n\ - xor $3,$4,$4 # 5 cycles from $4 load \n\ - xor $6,$7,$7 # 5 cycles from $7 load \n\ - \n\ - xor $1,$2,$2 # 4 cycles from $2 load \n\ - xor $4,$5,$5 # 5 cycles from $5 load \n\ - stq $2,40($17) \n\ - xor $7,$20,$20 # 4 cycles from $20 load \n\ - \n\ - stq $5,48($17) \n\ - subq $16,1,$16 \n\ - stq $20,56($17) \n\ - addq $19,64,$19 \n\ - \n\ - addq $18,64,$18 \n\ - addq $17,64,$17 \n\ - bgt $16,3b \n\ - ret \n\ - .end xor_alpha_3 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_4 \n\ -xor_alpha_4: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - .align 4 \n\ -4: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,0($20) \n\ - \n\ - ldq $4,8($17) \n\ - ldq $5,8($18) \n\ - ldq $6,8($19) \n\ - ldq $7,8($20) \n\ - \n\ - ldq $21,16($17) \n\ - ldq $22,16($18) \n\ - ldq $23,16($19) \n\ - ldq $24,16($20) \n\ - \n\ - ldq $25,24($17) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - ldq $27,24($18) \n\ - xor $2,$3,$3 # 6 cycles from $3 load \n\ - \n\ - ldq $0,24($19) \n\ - xor $1,$3,$3 \n\ - ldq $1,24($20) \n\ - xor $4,$5,$5 # 7 cycles from $5 load \n\ - \n\ - stq $3,0($17) \n\ - xor $6,$7,$7 \n\ - xor $21,$22,$22 # 7 cycles from $22 load \n\ - xor $5,$7,$7 \n\ - \n\ - stq $7,8($17) \n\ - xor $23,$24,$24 # 7 cycles from $24 load \n\ - ldq $2,32($17) \n\ - xor $22,$24,$24 \n\ - \n\ - ldq $3,32($18) \n\ - ldq $4,32($19) \n\ - ldq $5,32($20) \n\ - xor $25,$27,$27 # 8 cycles from $27 load \n\ - \n\ - ldq $6,40($17) \n\ - ldq $7,40($18) \n\ - ldq $21,40($19) \n\ - ldq $22,40($20) \n\ - \n\ - stq $24,16($17) \n\ - xor $0,$1,$1 # 9 cycles from $1 load \n\ - xor $2,$3,$3 # 5 cycles from $3 load \n\ - xor $27,$1,$1 \n\ - \n\ - stq $1,24($17) \n\ - xor $4,$5,$5 # 5 cycles from $5 load \n\ - ldq $23,48($17) \n\ - ldq $24,48($18) \n\ - \n\ - ldq $25,48($19) \n\ - xor $3,$5,$5 \n\ - ldq $27,48($20) \n\ - ldq $0,56($17) \n\ - \n\ - ldq $1,56($18) \n\ - ldq $2,56($19) \n\ - xor $6,$7,$7 # 8 cycles from $6 load \n\ - ldq $3,56($20) \n\ - \n\ - stq $5,32($17) \n\ - xor $21,$22,$22 # 8 cycles from $22 load \n\ - xor $7,$22,$22 \n\ - xor $23,$24,$24 # 5 cycles from $24 load \n\ - \n\ - stq $22,40($17) \n\ - xor $25,$27,$27 # 5 cycles from $27 load \n\ - xor $24,$27,$27 \n\ - xor $0,$1,$1 # 5 cycles from $1 load \n\ - \n\ - stq $27,48($17) \n\ - xor $2,$3,$3 # 4 cycles from $3 load \n\ - xor $1,$3,$3 \n\ - subq $16,1,$16 \n\ - \n\ - stq $3,56($17) \n\ - addq $20,64,$20 \n\ - addq $19,64,$19 \n\ - addq $18,64,$18 \n\ - \n\ - addq $17,64,$17 \n\ - bgt $16,4b \n\ - ret \n\ - .end xor_alpha_4 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_5 \n\ -xor_alpha_5: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - .align 4 \n\ -5: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,0($20) \n\ - \n\ - ldq $4,0($21) \n\ - ldq $5,8($17) \n\ - ldq $6,8($18) \n\ - ldq $7,8($19) \n\ - \n\ - ldq $22,8($20) \n\ - ldq $23,8($21) \n\ - ldq $24,16($17) \n\ - ldq $25,16($18) \n\ - \n\ - ldq $27,16($19) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - ldq $28,16($20) \n\ - xor $2,$3,$3 # 6 cycles from $3 load \n\ - \n\ - ldq $0,16($21) \n\ - xor $1,$3,$3 \n\ - ldq $1,24($17) \n\ - xor $3,$4,$4 # 7 cycles from $4 load \n\ - \n\ - stq $4,0($17) \n\ - xor $5,$6,$6 # 7 cycles from $6 load \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - xor $6,$23,$23 # 7 cycles from $23 load \n\ - \n\ - ldq $2,24($18) \n\ - xor $22,$23,$23 \n\ - ldq $3,24($19) \n\ - xor $24,$25,$25 # 8 cycles from $25 load \n\ - \n\ - stq $23,8($17) \n\ - xor $25,$27,$27 # 8 cycles from $27 load \n\ - ldq $4,24($20) \n\ - xor $28,$0,$0 # 7 cycles from $0 load \n\ - \n\ - ldq $5,24($21) \n\ - xor $27,$0,$0 \n\ - ldq $6,32($17) \n\ - ldq $7,32($18) \n\ - \n\ - stq $0,16($17) \n\ - xor $1,$2,$2 # 6 cycles from $2 load \n\ - ldq $22,32($19) \n\ - xor $3,$4,$4 # 4 cycles from $4 load \n\ - \n\ - ldq $23,32($20) \n\ - xor $2,$4,$4 \n\ - ldq $24,32($21) \n\ - ldq $25,40($17) \n\ - \n\ - ldq $27,40($18) \n\ - ldq $28,40($19) \n\ - ldq $0,40($20) \n\ - xor $4,$5,$5 # 7 cycles from $5 load \n\ - \n\ - stq $5,24($17) \n\ - xor $6,$7,$7 # 7 cycles from $7 load \n\ - ldq $1,40($21) \n\ - ldq $2,48($17) \n\ - \n\ - ldq $3,48($18) \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - ldq $4,48($19) \n\ - xor $23,$24,$24 # 6 cycles from $24 load \n\ - \n\ - ldq $5,48($20) \n\ - xor $22,$24,$24 \n\ - ldq $6,48($21) \n\ - xor $25,$27,$27 # 7 cycles from $27 load \n\ - \n\ - stq $24,32($17) \n\ - xor $27,$28,$28 # 8 cycles from $28 load \n\ - ldq $7,56($17) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - \n\ - ldq $22,56($18) \n\ - ldq $23,56($19) \n\ - ldq $24,56($20) \n\ - ldq $25,56($21) \n\ - \n\ - xor $28,$1,$1 \n\ - xor $2,$3,$3 # 9 cycles from $3 load \n\ - xor $3,$4,$4 # 9 cycles from $4 load \n\ - xor $5,$6,$6 # 8 cycles from $6 load \n\ - \n\ - stq $1,40($17) \n\ - xor $4,$6,$6 \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - xor $23,$24,$24 # 6 cycles from $24 load \n\ - \n\ - stq $6,48($17) \n\ - xor $22,$24,$24 \n\ - subq $16,1,$16 \n\ - xor $24,$25,$25 # 8 cycles from $25 load \n\ - \n\ - stq $25,56($17) \n\ - addq $21,64,$21 \n\ - addq $20,64,$20 \n\ - addq $19,64,$19 \n\ - \n\ - addq $18,64,$18 \n\ - addq $17,64,$17 \n\ - bgt $16,5b \n\ - ret \n\ - .end xor_alpha_5 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_prefetch_2 \n\ -xor_alpha_prefetch_2: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - \n\ - ldq $31, 0($17) \n\ - ldq $31, 0($18) \n\ - \n\ - ldq $31, 64($17) \n\ - ldq $31, 64($18) \n\ - \n\ - ldq $31, 128($17) \n\ - ldq $31, 128($18) \n\ - \n\ - ldq $31, 192($17) \n\ - ldq $31, 192($18) \n\ - .align 4 \n\ -2: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,8($17) \n\ - ldq $3,8($18) \n\ - \n\ - ldq $4,16($17) \n\ - ldq $5,16($18) \n\ - ldq $6,24($17) \n\ - ldq $7,24($18) \n\ - \n\ - ldq $19,32($17) \n\ - ldq $20,32($18) \n\ - ldq $21,40($17) \n\ - ldq $22,40($18) \n\ - \n\ - ldq $23,48($17) \n\ - ldq $24,48($18) \n\ - ldq $25,56($17) \n\ - ldq $27,56($18) \n\ - \n\ - ldq $31,256($17) \n\ - xor $0,$1,$0 # 8 cycles from $1 load \n\ - ldq $31,256($18) \n\ - xor $2,$3,$2 \n\ - \n\ - stq $0,0($17) \n\ - xor $4,$5,$4 \n\ - stq $2,8($17) \n\ - xor $6,$7,$6 \n\ - \n\ - stq $4,16($17) \n\ - xor $19,$20,$19 \n\ - stq $6,24($17) \n\ - xor $21,$22,$21 \n\ - \n\ - stq $19,32($17) \n\ - xor $23,$24,$23 \n\ - stq $21,40($17) \n\ - xor $25,$27,$25 \n\ - \n\ - stq $23,48($17) \n\ - subq $16,1,$16 \n\ - stq $25,56($17) \n\ - addq $17,64,$17 \n\ - \n\ - addq $18,64,$18 \n\ - bgt $16,2b \n\ - ret \n\ - .end xor_alpha_prefetch_2 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_prefetch_3 \n\ -xor_alpha_prefetch_3: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - \n\ - ldq $31, 0($17) \n\ - ldq $31, 0($18) \n\ - ldq $31, 0($19) \n\ - \n\ - ldq $31, 64($17) \n\ - ldq $31, 64($18) \n\ - ldq $31, 64($19) \n\ - \n\ - ldq $31, 128($17) \n\ - ldq $31, 128($18) \n\ - ldq $31, 128($19) \n\ - \n\ - ldq $31, 192($17) \n\ - ldq $31, 192($18) \n\ - ldq $31, 192($19) \n\ - .align 4 \n\ -3: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,8($17) \n\ - \n\ - ldq $4,8($18) \n\ - ldq $6,16($17) \n\ - ldq $7,16($18) \n\ - ldq $21,24($17) \n\ - \n\ - ldq $22,24($18) \n\ - ldq $24,32($17) \n\ - ldq $25,32($18) \n\ - ldq $5,8($19) \n\ - \n\ - ldq $20,16($19) \n\ - ldq $23,24($19) \n\ - ldq $27,32($19) \n\ - nop \n\ - \n\ - xor $0,$1,$1 # 8 cycles from $0 load \n\ - xor $3,$4,$4 # 7 cycles from $4 load \n\ - xor $6,$7,$7 # 6 cycles from $7 load \n\ - xor $21,$22,$22 # 5 cycles from $22 load \n\ - \n\ - xor $1,$2,$2 # 9 cycles from $2 load \n\ - xor $24,$25,$25 # 5 cycles from $25 load \n\ - stq $2,0($17) \n\ - xor $4,$5,$5 # 6 cycles from $5 load \n\ - \n\ - stq $5,8($17) \n\ - xor $7,$20,$20 # 7 cycles from $20 load \n\ - stq $20,16($17) \n\ - xor $22,$23,$23 # 7 cycles from $23 load \n\ - \n\ - stq $23,24($17) \n\ - xor $25,$27,$27 # 7 cycles from $27 load \n\ - stq $27,32($17) \n\ - nop \n\ - \n\ - ldq $0,40($17) \n\ - ldq $1,40($18) \n\ - ldq $3,48($17) \n\ - ldq $4,48($18) \n\ - \n\ - ldq $6,56($17) \n\ - ldq $7,56($18) \n\ - ldq $2,40($19) \n\ - ldq $5,48($19) \n\ - \n\ - ldq $20,56($19) \n\ - ldq $31,256($17) \n\ - ldq $31,256($18) \n\ - ldq $31,256($19) \n\ - \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - xor $3,$4,$4 # 5 cycles from $4 load \n\ - xor $6,$7,$7 # 5 cycles from $7 load \n\ - xor $1,$2,$2 # 4 cycles from $2 load \n\ - \n\ - xor $4,$5,$5 # 5 cycles from $5 load \n\ - xor $7,$20,$20 # 4 cycles from $20 load \n\ - stq $2,40($17) \n\ - subq $16,1,$16 \n\ - \n\ - stq $5,48($17) \n\ - addq $19,64,$19 \n\ - stq $20,56($17) \n\ - addq $18,64,$18 \n\ - \n\ - addq $17,64,$17 \n\ - bgt $16,3b \n\ - ret \n\ - .end xor_alpha_prefetch_3 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_prefetch_4 \n\ -xor_alpha_prefetch_4: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - \n\ - ldq $31, 0($17) \n\ - ldq $31, 0($18) \n\ - ldq $31, 0($19) \n\ - ldq $31, 0($20) \n\ - \n\ - ldq $31, 64($17) \n\ - ldq $31, 64($18) \n\ - ldq $31, 64($19) \n\ - ldq $31, 64($20) \n\ - \n\ - ldq $31, 128($17) \n\ - ldq $31, 128($18) \n\ - ldq $31, 128($19) \n\ - ldq $31, 128($20) \n\ - \n\ - ldq $31, 192($17) \n\ - ldq $31, 192($18) \n\ - ldq $31, 192($19) \n\ - ldq $31, 192($20) \n\ - .align 4 \n\ -4: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,0($20) \n\ - \n\ - ldq $4,8($17) \n\ - ldq $5,8($18) \n\ - ldq $6,8($19) \n\ - ldq $7,8($20) \n\ - \n\ - ldq $21,16($17) \n\ - ldq $22,16($18) \n\ - ldq $23,16($19) \n\ - ldq $24,16($20) \n\ - \n\ - ldq $25,24($17) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - ldq $27,24($18) \n\ - xor $2,$3,$3 # 6 cycles from $3 load \n\ - \n\ - ldq $0,24($19) \n\ - xor $1,$3,$3 \n\ - ldq $1,24($20) \n\ - xor $4,$5,$5 # 7 cycles from $5 load \n\ - \n\ - stq $3,0($17) \n\ - xor $6,$7,$7 \n\ - xor $21,$22,$22 # 7 cycles from $22 load \n\ - xor $5,$7,$7 \n\ - \n\ - stq $7,8($17) \n\ - xor $23,$24,$24 # 7 cycles from $24 load \n\ - ldq $2,32($17) \n\ - xor $22,$24,$24 \n\ - \n\ - ldq $3,32($18) \n\ - ldq $4,32($19) \n\ - ldq $5,32($20) \n\ - xor $25,$27,$27 # 8 cycles from $27 load \n\ - \n\ - ldq $6,40($17) \n\ - ldq $7,40($18) \n\ - ldq $21,40($19) \n\ - ldq $22,40($20) \n\ - \n\ - stq $24,16($17) \n\ - xor $0,$1,$1 # 9 cycles from $1 load \n\ - xor $2,$3,$3 # 5 cycles from $3 load \n\ - xor $27,$1,$1 \n\ - \n\ - stq $1,24($17) \n\ - xor $4,$5,$5 # 5 cycles from $5 load \n\ - ldq $23,48($17) \n\ - xor $3,$5,$5 \n\ - \n\ - ldq $24,48($18) \n\ - ldq $25,48($19) \n\ - ldq $27,48($20) \n\ - ldq $0,56($17) \n\ - \n\ - ldq $1,56($18) \n\ - ldq $2,56($19) \n\ - ldq $3,56($20) \n\ - xor $6,$7,$7 # 8 cycles from $6 load \n\ - \n\ - ldq $31,256($17) \n\ - xor $21,$22,$22 # 8 cycles from $22 load \n\ - ldq $31,256($18) \n\ - xor $7,$22,$22 \n\ - \n\ - ldq $31,256($19) \n\ - xor $23,$24,$24 # 6 cycles from $24 load \n\ - ldq $31,256($20) \n\ - xor $25,$27,$27 # 6 cycles from $27 load \n\ - \n\ - stq $5,32($17) \n\ - xor $24,$27,$27 \n\ - xor $0,$1,$1 # 7 cycles from $1 load \n\ - xor $2,$3,$3 # 6 cycles from $3 load \n\ - \n\ - stq $22,40($17) \n\ - xor $1,$3,$3 \n\ - stq $27,48($17) \n\ - subq $16,1,$16 \n\ - \n\ - stq $3,56($17) \n\ - addq $20,64,$20 \n\ - addq $19,64,$19 \n\ - addq $18,64,$18 \n\ - \n\ - addq $17,64,$17 \n\ - bgt $16,4b \n\ - ret \n\ - .end xor_alpha_prefetch_4 \n\ - \n\ - .align 3 \n\ - .ent xor_alpha_prefetch_5 \n\ -xor_alpha_prefetch_5: \n\ - .prologue 0 \n\ - srl $16, 6, $16 \n\ - \n\ - ldq $31, 0($17) \n\ - ldq $31, 0($18) \n\ - ldq $31, 0($19) \n\ - ldq $31, 0($20) \n\ - ldq $31, 0($21) \n\ - \n\ - ldq $31, 64($17) \n\ - ldq $31, 64($18) \n\ - ldq $31, 64($19) \n\ - ldq $31, 64($20) \n\ - ldq $31, 64($21) \n\ - \n\ - ldq $31, 128($17) \n\ - ldq $31, 128($18) \n\ - ldq $31, 128($19) \n\ - ldq $31, 128($20) \n\ - ldq $31, 128($21) \n\ - \n\ - ldq $31, 192($17) \n\ - ldq $31, 192($18) \n\ - ldq $31, 192($19) \n\ - ldq $31, 192($20) \n\ - ldq $31, 192($21) \n\ - .align 4 \n\ -5: \n\ - ldq $0,0($17) \n\ - ldq $1,0($18) \n\ - ldq $2,0($19) \n\ - ldq $3,0($20) \n\ - \n\ - ldq $4,0($21) \n\ - ldq $5,8($17) \n\ - ldq $6,8($18) \n\ - ldq $7,8($19) \n\ - \n\ - ldq $22,8($20) \n\ - ldq $23,8($21) \n\ - ldq $24,16($17) \n\ - ldq $25,16($18) \n\ - \n\ - ldq $27,16($19) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - ldq $28,16($20) \n\ - xor $2,$3,$3 # 6 cycles from $3 load \n\ - \n\ - ldq $0,16($21) \n\ - xor $1,$3,$3 \n\ - ldq $1,24($17) \n\ - xor $3,$4,$4 # 7 cycles from $4 load \n\ - \n\ - stq $4,0($17) \n\ - xor $5,$6,$6 # 7 cycles from $6 load \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - xor $6,$23,$23 # 7 cycles from $23 load \n\ - \n\ - ldq $2,24($18) \n\ - xor $22,$23,$23 \n\ - ldq $3,24($19) \n\ - xor $24,$25,$25 # 8 cycles from $25 load \n\ - \n\ - stq $23,8($17) \n\ - xor $25,$27,$27 # 8 cycles from $27 load \n\ - ldq $4,24($20) \n\ - xor $28,$0,$0 # 7 cycles from $0 load \n\ - \n\ - ldq $5,24($21) \n\ - xor $27,$0,$0 \n\ - ldq $6,32($17) \n\ - ldq $7,32($18) \n\ - \n\ - stq $0,16($17) \n\ - xor $1,$2,$2 # 6 cycles from $2 load \n\ - ldq $22,32($19) \n\ - xor $3,$4,$4 # 4 cycles from $4 load \n\ - \n\ - ldq $23,32($20) \n\ - xor $2,$4,$4 \n\ - ldq $24,32($21) \n\ - ldq $25,40($17) \n\ - \n\ - ldq $27,40($18) \n\ - ldq $28,40($19) \n\ - ldq $0,40($20) \n\ - xor $4,$5,$5 # 7 cycles from $5 load \n\ - \n\ - stq $5,24($17) \n\ - xor $6,$7,$7 # 7 cycles from $7 load \n\ - ldq $1,40($21) \n\ - ldq $2,48($17) \n\ - \n\ - ldq $3,48($18) \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - ldq $4,48($19) \n\ - xor $23,$24,$24 # 6 cycles from $24 load \n\ - \n\ - ldq $5,48($20) \n\ - xor $22,$24,$24 \n\ - ldq $6,48($21) \n\ - xor $25,$27,$27 # 7 cycles from $27 load \n\ - \n\ - stq $24,32($17) \n\ - xor $27,$28,$28 # 8 cycles from $28 load \n\ - ldq $7,56($17) \n\ - xor $0,$1,$1 # 6 cycles from $1 load \n\ - \n\ - ldq $22,56($18) \n\ - ldq $23,56($19) \n\ - ldq $24,56($20) \n\ - ldq $25,56($21) \n\ - \n\ - ldq $31,256($17) \n\ - xor $28,$1,$1 \n\ - ldq $31,256($18) \n\ - xor $2,$3,$3 # 9 cycles from $3 load \n\ - \n\ - ldq $31,256($19) \n\ - xor $3,$4,$4 # 9 cycles from $4 load \n\ - ldq $31,256($20) \n\ - xor $5,$6,$6 # 8 cycles from $6 load \n\ - \n\ - stq $1,40($17) \n\ - xor $4,$6,$6 \n\ - xor $7,$22,$22 # 7 cycles from $22 load \n\ - xor $23,$24,$24 # 6 cycles from $24 load \n\ - \n\ - stq $6,48($17) \n\ - xor $22,$24,$24 \n\ - ldq $31,256($21) \n\ - xor $24,$25,$25 # 8 cycles from $25 load \n\ - \n\ - stq $25,56($17) \n\ - subq $16,1,$16 \n\ - addq $21,64,$21 \n\ - addq $20,64,$20 \n\ - \n\ - addq $19,64,$19 \n\ - addq $18,64,$18 \n\ - addq $17,64,$17 \n\ - bgt $16,5b \n\ - \n\ - ret \n\ - .end xor_alpha_prefetch_5 \n\ -"); - -static struct xor_block_template xor_block_alpha = { - .name = "alpha", - .do_2 = xor_alpha_2, - .do_3 = xor_alpha_3, - .do_4 = xor_alpha_4, - .do_5 = xor_alpha_5, -}; - -static struct xor_block_template xor_block_alpha_prefetch = { - .name = "alpha prefetch", - .do_2 = xor_alpha_prefetch_2, - .do_3 = xor_alpha_prefetch_3, - .do_4 = xor_alpha_prefetch_4, - .do_5 = xor_alpha_prefetch_5, -}; - -/* For grins, also test the generic routines. */ +#include #include +extern struct xor_block_template xor_block_alpha; +extern struct xor_block_template xor_block_alpha_prefetch; + /* * Force the use of alpha_prefetch if EV6, as it is significantly faster in the * cold cache case. diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 89a944c9f990..6d03c27c37c7 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -7,3 +7,5 @@ xor-y += xor-8regs.o xor-y += xor-32regs.o xor-y += xor-8regs-prefetch.o xor-y += xor-32regs-prefetch.o + +xor-$(CONFIG_ALPHA) += alpha/xor.o diff --git a/lib/raid/xor/alpha/xor.c b/lib/raid/xor/alpha/xor.c new file mode 100644 index 000000000000..0964ac420604 --- /dev/null +++ b/lib/raid/xor/alpha/xor.c @@ -0,0 +1,849 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Optimized XOR parity functions for alpha EV5 and EV6 + */ +#include +#include + +extern void +xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); + +extern void +xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); + +asm(" \n\ + .text \n\ + .align 3 \n\ + .ent xor_alpha_2 \n\ +xor_alpha_2: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +2: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,8($17) \n\ + ldq $3,8($18) \n\ + \n\ + ldq $4,16($17) \n\ + ldq $5,16($18) \n\ + ldq $6,24($17) \n\ + ldq $7,24($18) \n\ + \n\ + ldq $19,32($17) \n\ + ldq $20,32($18) \n\ + ldq $21,40($17) \n\ + ldq $22,40($18) \n\ + \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + ldq $25,56($17) \n\ + xor $0,$1,$0 # 7 cycles from $1 load \n\ + \n\ + ldq $27,56($18) \n\ + xor $2,$3,$2 \n\ + stq $0,0($17) \n\ + xor $4,$5,$4 \n\ + \n\ + stq $2,8($17) \n\ + xor $6,$7,$6 \n\ + stq $4,16($17) \n\ + xor $19,$20,$19 \n\ + \n\ + stq $6,24($17) \n\ + xor $21,$22,$21 \n\ + stq $19,32($17) \n\ + xor $23,$24,$23 \n\ + \n\ + stq $21,40($17) \n\ + xor $25,$27,$25 \n\ + stq $23,48($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $25,56($17) \n\ + addq $17,64,$17 \n\ + addq $18,64,$18 \n\ + bgt $16,2b \n\ + \n\ + ret \n\ + .end xor_alpha_2 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_3 \n\ +xor_alpha_3: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +3: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,8($17) \n\ + \n\ + ldq $4,8($18) \n\ + ldq $6,16($17) \n\ + ldq $7,16($18) \n\ + ldq $21,24($17) \n\ + \n\ + ldq $22,24($18) \n\ + ldq $24,32($17) \n\ + ldq $25,32($18) \n\ + ldq $5,8($19) \n\ + \n\ + ldq $20,16($19) \n\ + ldq $23,24($19) \n\ + ldq $27,32($19) \n\ + nop \n\ + \n\ + xor $0,$1,$1 # 8 cycles from $0 load \n\ + xor $3,$4,$4 # 6 cycles from $4 load \n\ + xor $6,$7,$7 # 6 cycles from $7 load \n\ + xor $21,$22,$22 # 5 cycles from $22 load \n\ + \n\ + xor $1,$2,$2 # 9 cycles from $2 load \n\ + xor $24,$25,$25 # 5 cycles from $25 load \n\ + stq $2,0($17) \n\ + xor $4,$5,$5 # 6 cycles from $5 load \n\ + \n\ + stq $5,8($17) \n\ + xor $7,$20,$20 # 7 cycles from $20 load \n\ + stq $20,16($17) \n\ + xor $22,$23,$23 # 7 cycles from $23 load \n\ + \n\ + stq $23,24($17) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + stq $27,32($17) \n\ + nop \n\ + \n\ + ldq $0,40($17) \n\ + ldq $1,40($18) \n\ + ldq $3,48($17) \n\ + ldq $4,48($18) \n\ + \n\ + ldq $6,56($17) \n\ + ldq $7,56($18) \n\ + ldq $2,40($19) \n\ + ldq $5,48($19) \n\ + \n\ + ldq $20,56($19) \n\ + xor $0,$1,$1 # 4 cycles from $1 load \n\ + xor $3,$4,$4 # 5 cycles from $4 load \n\ + xor $6,$7,$7 # 5 cycles from $7 load \n\ + \n\ + xor $1,$2,$2 # 4 cycles from $2 load \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + stq $2,40($17) \n\ + xor $7,$20,$20 # 4 cycles from $20 load \n\ + \n\ + stq $5,48($17) \n\ + subq $16,1,$16 \n\ + stq $20,56($17) \n\ + addq $19,64,$19 \n\ + \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,3b \n\ + ret \n\ + .end xor_alpha_3 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_4 \n\ +xor_alpha_4: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +4: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,8($17) \n\ + ldq $5,8($18) \n\ + ldq $6,8($19) \n\ + ldq $7,8($20) \n\ + \n\ + ldq $21,16($17) \n\ + ldq $22,16($18) \n\ + ldq $23,16($19) \n\ + ldq $24,16($20) \n\ + \n\ + ldq $25,24($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $27,24($18) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,24($19) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $3,0($17) \n\ + xor $6,$7,$7 \n\ + xor $21,$22,$22 # 7 cycles from $22 load \n\ + xor $5,$7,$7 \n\ + \n\ + stq $7,8($17) \n\ + xor $23,$24,$24 # 7 cycles from $24 load \n\ + ldq $2,32($17) \n\ + xor $22,$24,$24 \n\ + \n\ + ldq $3,32($18) \n\ + ldq $4,32($19) \n\ + ldq $5,32($20) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + \n\ + ldq $6,40($17) \n\ + ldq $7,40($18) \n\ + ldq $21,40($19) \n\ + ldq $22,40($20) \n\ + \n\ + stq $24,16($17) \n\ + xor $0,$1,$1 # 9 cycles from $1 load \n\ + xor $2,$3,$3 # 5 cycles from $3 load \n\ + xor $27,$1,$1 \n\ + \n\ + stq $1,24($17) \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + \n\ + ldq $25,48($19) \n\ + xor $3,$5,$5 \n\ + ldq $27,48($20) \n\ + ldq $0,56($17) \n\ + \n\ + ldq $1,56($18) \n\ + ldq $2,56($19) \n\ + xor $6,$7,$7 # 8 cycles from $6 load \n\ + ldq $3,56($20) \n\ + \n\ + stq $5,32($17) \n\ + xor $21,$22,$22 # 8 cycles from $22 load \n\ + xor $7,$22,$22 \n\ + xor $23,$24,$24 # 5 cycles from $24 load \n\ + \n\ + stq $22,40($17) \n\ + xor $25,$27,$27 # 5 cycles from $27 load \n\ + xor $24,$27,$27 \n\ + xor $0,$1,$1 # 5 cycles from $1 load \n\ + \n\ + stq $27,48($17) \n\ + xor $2,$3,$3 # 4 cycles from $3 load \n\ + xor $1,$3,$3 \n\ + subq $16,1,$16 \n\ + \n\ + stq $3,56($17) \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,4b \n\ + ret \n\ + .end xor_alpha_4 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_5 \n\ +xor_alpha_5: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + .align 4 \n\ +5: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,0($21) \n\ + ldq $5,8($17) \n\ + ldq $6,8($18) \n\ + ldq $7,8($19) \n\ + \n\ + ldq $22,8($20) \n\ + ldq $23,8($21) \n\ + ldq $24,16($17) \n\ + ldq $25,16($18) \n\ + \n\ + ldq $27,16($19) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $28,16($20) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,16($21) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($17) \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + \n\ + stq $4,0($17) \n\ + xor $5,$6,$6 # 7 cycles from $6 load \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $6,$23,$23 # 7 cycles from $23 load \n\ + \n\ + ldq $2,24($18) \n\ + xor $22,$23,$23 \n\ + ldq $3,24($19) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $23,8($17) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + ldq $4,24($20) \n\ + xor $28,$0,$0 # 7 cycles from $0 load \n\ + \n\ + ldq $5,24($21) \n\ + xor $27,$0,$0 \n\ + ldq $6,32($17) \n\ + ldq $7,32($18) \n\ + \n\ + stq $0,16($17) \n\ + xor $1,$2,$2 # 6 cycles from $2 load \n\ + ldq $22,32($19) \n\ + xor $3,$4,$4 # 4 cycles from $4 load \n\ + \n\ + ldq $23,32($20) \n\ + xor $2,$4,$4 \n\ + ldq $24,32($21) \n\ + ldq $25,40($17) \n\ + \n\ + ldq $27,40($18) \n\ + ldq $28,40($19) \n\ + ldq $0,40($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $5,24($17) \n\ + xor $6,$7,$7 # 7 cycles from $7 load \n\ + ldq $1,40($21) \n\ + ldq $2,48($17) \n\ + \n\ + ldq $3,48($18) \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + ldq $4,48($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + ldq $5,48($20) \n\ + xor $22,$24,$24 \n\ + ldq $6,48($21) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + \n\ + stq $24,32($17) \n\ + xor $27,$28,$28 # 8 cycles from $28 load \n\ + ldq $7,56($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + \n\ + ldq $22,56($18) \n\ + ldq $23,56($19) \n\ + ldq $24,56($20) \n\ + ldq $25,56($21) \n\ + \n\ + xor $28,$1,$1 \n\ + xor $2,$3,$3 # 9 cycles from $3 load \n\ + xor $3,$4,$4 # 9 cycles from $4 load \n\ + xor $5,$6,$6 # 8 cycles from $6 load \n\ + \n\ + stq $1,40($17) \n\ + xor $4,$6,$6 \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + stq $6,48($17) \n\ + xor $22,$24,$24 \n\ + subq $16,1,$16 \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $25,56($17) \n\ + addq $21,64,$21 \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,5b \n\ + ret \n\ + .end xor_alpha_5 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_2 \n\ +xor_alpha_prefetch_2: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + .align 4 \n\ +2: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,8($17) \n\ + ldq $3,8($18) \n\ + \n\ + ldq $4,16($17) \n\ + ldq $5,16($18) \n\ + ldq $6,24($17) \n\ + ldq $7,24($18) \n\ + \n\ + ldq $19,32($17) \n\ + ldq $20,32($18) \n\ + ldq $21,40($17) \n\ + ldq $22,40($18) \n\ + \n\ + ldq $23,48($17) \n\ + ldq $24,48($18) \n\ + ldq $25,56($17) \n\ + ldq $27,56($18) \n\ + \n\ + ldq $31,256($17) \n\ + xor $0,$1,$0 # 8 cycles from $1 load \n\ + ldq $31,256($18) \n\ + xor $2,$3,$2 \n\ + \n\ + stq $0,0($17) \n\ + xor $4,$5,$4 \n\ + stq $2,8($17) \n\ + xor $6,$7,$6 \n\ + \n\ + stq $4,16($17) \n\ + xor $19,$20,$19 \n\ + stq $6,24($17) \n\ + xor $21,$22,$21 \n\ + \n\ + stq $19,32($17) \n\ + xor $23,$24,$23 \n\ + stq $21,40($17) \n\ + xor $25,$27,$25 \n\ + \n\ + stq $23,48($17) \n\ + subq $16,1,$16 \n\ + stq $25,56($17) \n\ + addq $17,64,$17 \n\ + \n\ + addq $18,64,$18 \n\ + bgt $16,2b \n\ + ret \n\ + .end xor_alpha_prefetch_2 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_3 \n\ +xor_alpha_prefetch_3: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + .align 4 \n\ +3: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,8($17) \n\ + \n\ + ldq $4,8($18) \n\ + ldq $6,16($17) \n\ + ldq $7,16($18) \n\ + ldq $21,24($17) \n\ + \n\ + ldq $22,24($18) \n\ + ldq $24,32($17) \n\ + ldq $25,32($18) \n\ + ldq $5,8($19) \n\ + \n\ + ldq $20,16($19) \n\ + ldq $23,24($19) \n\ + ldq $27,32($19) \n\ + nop \n\ + \n\ + xor $0,$1,$1 # 8 cycles from $0 load \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + xor $6,$7,$7 # 6 cycles from $7 load \n\ + xor $21,$22,$22 # 5 cycles from $22 load \n\ + \n\ + xor $1,$2,$2 # 9 cycles from $2 load \n\ + xor $24,$25,$25 # 5 cycles from $25 load \n\ + stq $2,0($17) \n\ + xor $4,$5,$5 # 6 cycles from $5 load \n\ + \n\ + stq $5,8($17) \n\ + xor $7,$20,$20 # 7 cycles from $20 load \n\ + stq $20,16($17) \n\ + xor $22,$23,$23 # 7 cycles from $23 load \n\ + \n\ + stq $23,24($17) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + stq $27,32($17) \n\ + nop \n\ + \n\ + ldq $0,40($17) \n\ + ldq $1,40($18) \n\ + ldq $3,48($17) \n\ + ldq $4,48($18) \n\ + \n\ + ldq $6,56($17) \n\ + ldq $7,56($18) \n\ + ldq $2,40($19) \n\ + ldq $5,48($19) \n\ + \n\ + ldq $20,56($19) \n\ + ldq $31,256($17) \n\ + ldq $31,256($18) \n\ + ldq $31,256($19) \n\ + \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + xor $3,$4,$4 # 5 cycles from $4 load \n\ + xor $6,$7,$7 # 5 cycles from $7 load \n\ + xor $1,$2,$2 # 4 cycles from $2 load \n\ + \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + xor $7,$20,$20 # 4 cycles from $20 load \n\ + stq $2,40($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $5,48($17) \n\ + addq $19,64,$19 \n\ + stq $20,56($17) \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,3b \n\ + ret \n\ + .end xor_alpha_prefetch_3 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_4 \n\ +xor_alpha_prefetch_4: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + ldq $31, 0($20) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + ldq $31, 64($20) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + ldq $31, 128($20) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + ldq $31, 192($20) \n\ + .align 4 \n\ +4: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,8($17) \n\ + ldq $5,8($18) \n\ + ldq $6,8($19) \n\ + ldq $7,8($20) \n\ + \n\ + ldq $21,16($17) \n\ + ldq $22,16($18) \n\ + ldq $23,16($19) \n\ + ldq $24,16($20) \n\ + \n\ + ldq $25,24($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $27,24($18) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,24($19) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $3,0($17) \n\ + xor $6,$7,$7 \n\ + xor $21,$22,$22 # 7 cycles from $22 load \n\ + xor $5,$7,$7 \n\ + \n\ + stq $7,8($17) \n\ + xor $23,$24,$24 # 7 cycles from $24 load \n\ + ldq $2,32($17) \n\ + xor $22,$24,$24 \n\ + \n\ + ldq $3,32($18) \n\ + ldq $4,32($19) \n\ + ldq $5,32($20) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + \n\ + ldq $6,40($17) \n\ + ldq $7,40($18) \n\ + ldq $21,40($19) \n\ + ldq $22,40($20) \n\ + \n\ + stq $24,16($17) \n\ + xor $0,$1,$1 # 9 cycles from $1 load \n\ + xor $2,$3,$3 # 5 cycles from $3 load \n\ + xor $27,$1,$1 \n\ + \n\ + stq $1,24($17) \n\ + xor $4,$5,$5 # 5 cycles from $5 load \n\ + ldq $23,48($17) \n\ + xor $3,$5,$5 \n\ + \n\ + ldq $24,48($18) \n\ + ldq $25,48($19) \n\ + ldq $27,48($20) \n\ + ldq $0,56($17) \n\ + \n\ + ldq $1,56($18) \n\ + ldq $2,56($19) \n\ + ldq $3,56($20) \n\ + xor $6,$7,$7 # 8 cycles from $6 load \n\ + \n\ + ldq $31,256($17) \n\ + xor $21,$22,$22 # 8 cycles from $22 load \n\ + ldq $31,256($18) \n\ + xor $7,$22,$22 \n\ + \n\ + ldq $31,256($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + ldq $31,256($20) \n\ + xor $25,$27,$27 # 6 cycles from $27 load \n\ + \n\ + stq $5,32($17) \n\ + xor $24,$27,$27 \n\ + xor $0,$1,$1 # 7 cycles from $1 load \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + stq $22,40($17) \n\ + xor $1,$3,$3 \n\ + stq $27,48($17) \n\ + subq $16,1,$16 \n\ + \n\ + stq $3,56($17) \n\ + addq $20,64,$20 \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + \n\ + addq $17,64,$17 \n\ + bgt $16,4b \n\ + ret \n\ + .end xor_alpha_prefetch_4 \n\ + \n\ + .align 3 \n\ + .ent xor_alpha_prefetch_5 \n\ +xor_alpha_prefetch_5: \n\ + .prologue 0 \n\ + srl $16, 6, $16 \n\ + \n\ + ldq $31, 0($17) \n\ + ldq $31, 0($18) \n\ + ldq $31, 0($19) \n\ + ldq $31, 0($20) \n\ + ldq $31, 0($21) \n\ + \n\ + ldq $31, 64($17) \n\ + ldq $31, 64($18) \n\ + ldq $31, 64($19) \n\ + ldq $31, 64($20) \n\ + ldq $31, 64($21) \n\ + \n\ + ldq $31, 128($17) \n\ + ldq $31, 128($18) \n\ + ldq $31, 128($19) \n\ + ldq $31, 128($20) \n\ + ldq $31, 128($21) \n\ + \n\ + ldq $31, 192($17) \n\ + ldq $31, 192($18) \n\ + ldq $31, 192($19) \n\ + ldq $31, 192($20) \n\ + ldq $31, 192($21) \n\ + .align 4 \n\ +5: \n\ + ldq $0,0($17) \n\ + ldq $1,0($18) \n\ + ldq $2,0($19) \n\ + ldq $3,0($20) \n\ + \n\ + ldq $4,0($21) \n\ + ldq $5,8($17) \n\ + ldq $6,8($18) \n\ + ldq $7,8($19) \n\ + \n\ + ldq $22,8($20) \n\ + ldq $23,8($21) \n\ + ldq $24,16($17) \n\ + ldq $25,16($18) \n\ + \n\ + ldq $27,16($19) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + ldq $28,16($20) \n\ + xor $2,$3,$3 # 6 cycles from $3 load \n\ + \n\ + ldq $0,16($21) \n\ + xor $1,$3,$3 \n\ + ldq $1,24($17) \n\ + xor $3,$4,$4 # 7 cycles from $4 load \n\ + \n\ + stq $4,0($17) \n\ + xor $5,$6,$6 # 7 cycles from $6 load \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $6,$23,$23 # 7 cycles from $23 load \n\ + \n\ + ldq $2,24($18) \n\ + xor $22,$23,$23 \n\ + ldq $3,24($19) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $23,8($17) \n\ + xor $25,$27,$27 # 8 cycles from $27 load \n\ + ldq $4,24($20) \n\ + xor $28,$0,$0 # 7 cycles from $0 load \n\ + \n\ + ldq $5,24($21) \n\ + xor $27,$0,$0 \n\ + ldq $6,32($17) \n\ + ldq $7,32($18) \n\ + \n\ + stq $0,16($17) \n\ + xor $1,$2,$2 # 6 cycles from $2 load \n\ + ldq $22,32($19) \n\ + xor $3,$4,$4 # 4 cycles from $4 load \n\ + \n\ + ldq $23,32($20) \n\ + xor $2,$4,$4 \n\ + ldq $24,32($21) \n\ + ldq $25,40($17) \n\ + \n\ + ldq $27,40($18) \n\ + ldq $28,40($19) \n\ + ldq $0,40($20) \n\ + xor $4,$5,$5 # 7 cycles from $5 load \n\ + \n\ + stq $5,24($17) \n\ + xor $6,$7,$7 # 7 cycles from $7 load \n\ + ldq $1,40($21) \n\ + ldq $2,48($17) \n\ + \n\ + ldq $3,48($18) \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + ldq $4,48($19) \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + ldq $5,48($20) \n\ + xor $22,$24,$24 \n\ + ldq $6,48($21) \n\ + xor $25,$27,$27 # 7 cycles from $27 load \n\ + \n\ + stq $24,32($17) \n\ + xor $27,$28,$28 # 8 cycles from $28 load \n\ + ldq $7,56($17) \n\ + xor $0,$1,$1 # 6 cycles from $1 load \n\ + \n\ + ldq $22,56($18) \n\ + ldq $23,56($19) \n\ + ldq $24,56($20) \n\ + ldq $25,56($21) \n\ + \n\ + ldq $31,256($17) \n\ + xor $28,$1,$1 \n\ + ldq $31,256($18) \n\ + xor $2,$3,$3 # 9 cycles from $3 load \n\ + \n\ + ldq $31,256($19) \n\ + xor $3,$4,$4 # 9 cycles from $4 load \n\ + ldq $31,256($20) \n\ + xor $5,$6,$6 # 8 cycles from $6 load \n\ + \n\ + stq $1,40($17) \n\ + xor $4,$6,$6 \n\ + xor $7,$22,$22 # 7 cycles from $22 load \n\ + xor $23,$24,$24 # 6 cycles from $24 load \n\ + \n\ + stq $6,48($17) \n\ + xor $22,$24,$24 \n\ + ldq $31,256($21) \n\ + xor $24,$25,$25 # 8 cycles from $25 load \n\ + \n\ + stq $25,56($17) \n\ + subq $16,1,$16 \n\ + addq $21,64,$21 \n\ + addq $20,64,$20 \n\ + \n\ + addq $19,64,$19 \n\ + addq $18,64,$18 \n\ + addq $17,64,$17 \n\ + bgt $16,5b \n\ + \n\ + ret \n\ + .end xor_alpha_prefetch_5 \n\ +"); + +struct xor_block_template xor_block_alpha = { + .name = "alpha", + .do_2 = xor_alpha_2, + .do_3 = xor_alpha_3, + .do_4 = xor_alpha_4, + .do_5 = xor_alpha_5, +}; + +struct xor_block_template xor_block_alpha_prefetch = { + .name = "alpha prefetch", + .do_2 = xor_alpha_prefetch_2, + .do_3 = xor_alpha_prefetch_3, + .do_4 = xor_alpha_prefetch_4, + .do_5 = xor_alpha_prefetch_5, +}; From 0d64a24ec0c02f75e5068065b503d98e10a60d01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:44 +0100 Subject: [PATCH 082/127] arm: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in the main xor.ko instead of building a separate module for it. Link: https://lkml.kernel.org/r/20260327061704.3707577-13-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/include/asm/xor.h | 190 +----------------- arch/arm/lib/Makefile | 5 - lib/raid/xor/Makefile | 8 + lib/raid/xor/arm/xor-neon-glue.c | 58 ++++++ {arch/arm/lib => lib/raid/xor/arm}/xor-neon.c | 10 +- lib/raid/xor/arm/xor.c | 136 +++++++++++++ 6 files changed, 205 insertions(+), 202 deletions(-) create mode 100644 lib/raid/xor/arm/xor-neon-glue.c rename {arch/arm/lib => lib/raid/xor/arm}/xor-neon.c (74%) create mode 100644 lib/raid/xor/arm/xor.c diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index b2dcd49186e2..989c55872ef6 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -1,198 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * arch/arm/include/asm/xor.h - * * Copyright (C) 2001 Russell King */ #include -#include #include -#define __XOR(a1, a2) a1 ^= a2 - -#define GET_BLOCK_2(dst) \ - __asm__("ldmia %0, {%1, %2}" \ - : "=r" (dst), "=r" (a1), "=r" (a2) \ - : "0" (dst)) - -#define GET_BLOCK_4(dst) \ - __asm__("ldmia %0, {%1, %2, %3, %4}" \ - : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ - : "0" (dst)) - -#define XOR_BLOCK_2(src) \ - __asm__("ldmia %0!, {%1, %2}" \ - : "=r" (src), "=r" (b1), "=r" (b2) \ - : "0" (src)); \ - __XOR(a1, b1); __XOR(a2, b2); - -#define XOR_BLOCK_4(src) \ - __asm__("ldmia %0!, {%1, %2, %3, %4}" \ - : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ - : "0" (src)); \ - __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) - -#define PUT_BLOCK_2(dst) \ - __asm__ __volatile__("stmia %0!, {%2, %3}" \ - : "=r" (dst) \ - : "0" (dst), "r" (a1), "r" (a2)) - -#define PUT_BLOCK_4(dst) \ - __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ - : "=r" (dst) \ - : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) - -static void -xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - unsigned int lines = bytes / sizeof(unsigned long) / 4; - register unsigned int a1 __asm__("r4"); - register unsigned int a2 __asm__("r5"); - register unsigned int a3 __asm__("r6"); - register unsigned int a4 __asm__("r10"); - register unsigned int b1 __asm__("r8"); - register unsigned int b2 __asm__("r9"); - register unsigned int b3 __asm__("ip"); - register unsigned int b4 __asm__("lr"); - - do { - GET_BLOCK_4(p1); - XOR_BLOCK_4(p2); - PUT_BLOCK_4(p1); - } while (--lines); -} - -static void -xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - unsigned int lines = bytes / sizeof(unsigned long) / 4; - register unsigned int a1 __asm__("r4"); - register unsigned int a2 __asm__("r5"); - register unsigned int a3 __asm__("r6"); - register unsigned int a4 __asm__("r10"); - register unsigned int b1 __asm__("r8"); - register unsigned int b2 __asm__("r9"); - register unsigned int b3 __asm__("ip"); - register unsigned int b4 __asm__("lr"); - - do { - GET_BLOCK_4(p1); - XOR_BLOCK_4(p2); - XOR_BLOCK_4(p3); - PUT_BLOCK_4(p1); - } while (--lines); -} - -static void -xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - unsigned int lines = bytes / sizeof(unsigned long) / 2; - register unsigned int a1 __asm__("r8"); - register unsigned int a2 __asm__("r9"); - register unsigned int b1 __asm__("ip"); - register unsigned int b2 __asm__("lr"); - - do { - GET_BLOCK_2(p1); - XOR_BLOCK_2(p2); - XOR_BLOCK_2(p3); - XOR_BLOCK_2(p4); - PUT_BLOCK_2(p1); - } while (--lines); -} - -static void -xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - unsigned int lines = bytes / sizeof(unsigned long) / 2; - register unsigned int a1 __asm__("r8"); - register unsigned int a2 __asm__("r9"); - register unsigned int b1 __asm__("ip"); - register unsigned int b2 __asm__("lr"); - - do { - GET_BLOCK_2(p1); - XOR_BLOCK_2(p2); - XOR_BLOCK_2(p3); - XOR_BLOCK_2(p4); - XOR_BLOCK_2(p5); - PUT_BLOCK_2(p1); - } while (--lines); -} - -static struct xor_block_template xor_block_arm4regs = { - .name = "arm4regs", - .do_2 = xor_arm4regs_2, - .do_3 = xor_arm4regs_3, - .do_4 = xor_arm4regs_4, - .do_5 = xor_arm4regs_5, -}; - -#ifdef CONFIG_KERNEL_MODE_NEON - -extern struct xor_block_template const xor_block_neon_inner; - -static void -xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_2(bytes, p1, p2); - kernel_neon_end(); -} - -static void -xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_3(bytes, p1, p2, p3); - kernel_neon_end(); -} - -static void -xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); - kernel_neon_end(); -} - -static void -xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); - kernel_neon_end(); -} - -static struct xor_block_template xor_block_neon = { - .name = "neon", - .do_2 = xor_neon_2, - .do_3 = xor_neon_3, - .do_4 = xor_neon_4, - .do_5 = xor_neon_5 -}; - -#endif /* CONFIG_KERNEL_MODE_NEON */ +extern struct xor_block_template xor_block_arm4regs; +extern struct xor_block_template xor_block_neon; #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0ca5aae1bcc3..9295055cdfc9 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -39,9 +39,4 @@ endif $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S -ifeq ($(CONFIG_KERNEL_MODE_NEON),y) - CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) - obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o -endif - obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 6d03c27c37c7..fb760edae54b 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -9,3 +9,11 @@ xor-y += xor-8regs-prefetch.o xor-y += xor-32regs-prefetch.o xor-$(CONFIG_ALPHA) += alpha/xor.o +xor-$(CONFIG_ARM) += arm/xor.o +ifeq ($(CONFIG_ARM),y) +xor-$(CONFIG_KERNEL_MODE_NEON) += arm/xor-neon.o arm/xor-neon-glue.o +endif + + +CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) diff --git a/lib/raid/xor/arm/xor-neon-glue.c b/lib/raid/xor/arm/xor-neon-glue.c new file mode 100644 index 000000000000..c7b162b383a2 --- /dev/null +++ b/lib/raid/xor/arm/xor-neon-glue.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001 Russell King + */ +#include +#include + +extern struct xor_block_template const xor_block_neon_inner; + +static void +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + kernel_neon_begin(); + xor_block_neon_inner.do_2(bytes, p1, p2); + kernel_neon_end(); +} + +static void +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + kernel_neon_begin(); + xor_block_neon_inner.do_3(bytes, p1, p2, p3); + kernel_neon_end(); +} + +static void +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + kernel_neon_begin(); + xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); + kernel_neon_end(); +} + +static void +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + kernel_neon_begin(); + xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); + kernel_neon_end(); +} + +struct xor_block_template xor_block_neon = { + .name = "neon", + .do_2 = xor_neon_2, + .do_3 = xor_neon_3, + .do_4 = xor_neon_4, + .do_5 = xor_neon_5 +}; diff --git a/arch/arm/lib/xor-neon.c b/lib/raid/xor/arm/xor-neon.c similarity index 74% rename from arch/arm/lib/xor-neon.c rename to lib/raid/xor/arm/xor-neon.c index b5be50567991..c9d4378b0f0e 100644 --- a/arch/arm/lib/xor-neon.c +++ b/lib/raid/xor/arm/xor-neon.c @@ -1,16 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/arch/arm/lib/xor-neon.c - * * Copyright (C) 2013 Linaro Ltd */ -#include #include -#include - -MODULE_DESCRIPTION("NEON accelerated XOR implementation"); -MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' @@ -27,7 +20,7 @@ MODULE_LICENSE("GPL"); #endif #define NO_TEMPLATE -#include "../../../lib/raid/xor/xor-8regs.c" +#include "../xor-8regs.c" struct xor_block_template const xor_block_neon_inner = { .name = "__inner_neon__", @@ -36,4 +29,3 @@ struct xor_block_template const xor_block_neon_inner = { .do_4 = xor_8regs_4, .do_5 = xor_8regs_5, }; -EXPORT_SYMBOL(xor_block_neon_inner); diff --git a/lib/raid/xor/arm/xor.c b/lib/raid/xor/arm/xor.c new file mode 100644 index 000000000000..2263341dbbcd --- /dev/null +++ b/lib/raid/xor/arm/xor.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001 Russell King + */ +#include +#include + +#define __XOR(a1, a2) a1 ^= a2 + +#define GET_BLOCK_2(dst) \ + __asm__("ldmia %0, {%1, %2}" \ + : "=r" (dst), "=r" (a1), "=r" (a2) \ + : "0" (dst)) + +#define GET_BLOCK_4(dst) \ + __asm__("ldmia %0, {%1, %2, %3, %4}" \ + : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ + : "0" (dst)) + +#define XOR_BLOCK_2(src) \ + __asm__("ldmia %0!, {%1, %2}" \ + : "=r" (src), "=r" (b1), "=r" (b2) \ + : "0" (src)); \ + __XOR(a1, b1); __XOR(a2, b2); + +#define XOR_BLOCK_4(src) \ + __asm__("ldmia %0!, {%1, %2, %3, %4}" \ + : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ + : "0" (src)); \ + __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) + +#define PUT_BLOCK_2(dst) \ + __asm__ __volatile__("stmia %0!, {%2, %3}" \ + : "=r" (dst) \ + : "0" (dst), "r" (a1), "r" (a2)) + +#define PUT_BLOCK_4(dst) \ + __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ + : "=r" (dst) \ + : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) + +static void +xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned int lines = bytes / sizeof(unsigned long) / 4; + register unsigned int a1 __asm__("r4"); + register unsigned int a2 __asm__("r5"); + register unsigned int a3 __asm__("r6"); + register unsigned int a4 __asm__("r10"); + register unsigned int b1 __asm__("r8"); + register unsigned int b2 __asm__("r9"); + register unsigned int b3 __asm__("ip"); + register unsigned int b4 __asm__("lr"); + + do { + GET_BLOCK_4(p1); + XOR_BLOCK_4(p2); + PUT_BLOCK_4(p1); + } while (--lines); +} + +static void +xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned int lines = bytes / sizeof(unsigned long) / 4; + register unsigned int a1 __asm__("r4"); + register unsigned int a2 __asm__("r5"); + register unsigned int a3 __asm__("r6"); + register unsigned int a4 __asm__("r10"); + register unsigned int b1 __asm__("r8"); + register unsigned int b2 __asm__("r9"); + register unsigned int b3 __asm__("ip"); + register unsigned int b4 __asm__("lr"); + + do { + GET_BLOCK_4(p1); + XOR_BLOCK_4(p2); + XOR_BLOCK_4(p3); + PUT_BLOCK_4(p1); + } while (--lines); +} + +static void +xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned int lines = bytes / sizeof(unsigned long) / 2; + register unsigned int a1 __asm__("r8"); + register unsigned int a2 __asm__("r9"); + register unsigned int b1 __asm__("ip"); + register unsigned int b2 __asm__("lr"); + + do { + GET_BLOCK_2(p1); + XOR_BLOCK_2(p2); + XOR_BLOCK_2(p3); + XOR_BLOCK_2(p4); + PUT_BLOCK_2(p1); + } while (--lines); +} + +static void +xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned int lines = bytes / sizeof(unsigned long) / 2; + register unsigned int a1 __asm__("r8"); + register unsigned int a2 __asm__("r9"); + register unsigned int b1 __asm__("ip"); + register unsigned int b2 __asm__("lr"); + + do { + GET_BLOCK_2(p1); + XOR_BLOCK_2(p2); + XOR_BLOCK_2(p3); + XOR_BLOCK_2(p4); + XOR_BLOCK_2(p5); + PUT_BLOCK_2(p1); + } while (--lines); +} + +struct xor_block_template xor_block_arm4regs = { + .name = "arm4regs", + .do_2 = xor_arm4regs_2, + .do_3 = xor_arm4regs_3, + .do_4 = xor_arm4regs_4, + .do_5 = xor_arm4regs_5, +}; From 3786f2ad009549c9e5e2af86e5829b31ad788eb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:45 +0100 Subject: [PATCH 083/127] arm64: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in the main xor.ko instead of building a separate module for it. Note that this drops the CONFIG_KERNEL_MODE_NEON dependency, as that is always set for arm64. Link: https://lkml.kernel.org/r/20260327061704.3707577-14-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/include/asm/xor.h | 58 +------------------ arch/arm64/lib/Makefile | 6 -- lib/raid/xor/Makefile | 4 ++ lib/raid/xor/arm64/xor-neon-glue.c | 57 ++++++++++++++++++ .../lib => lib/raid/xor/arm64}/xor-neon.c | 20 +------ 5 files changed, 67 insertions(+), 78 deletions(-) create mode 100644 lib/raid/xor/arm64/xor-neon-glue.c rename {arch/arm64/lib => lib/raid/xor/arm64}/xor-neon.c (95%) diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index 3cee1eb86371..81718f010761 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -1,73 +1,21 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * arch/arm64/include/asm/xor.h - * * Authors: Jackie Liu * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. */ -#include #include -#include #include -#ifdef CONFIG_KERNEL_MODE_NEON - -extern struct xor_block_template xor_block_inner_neon __ro_after_init; - -static void -xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - scoped_ksimd() - xor_block_inner_neon.do_2(bytes, p1, p2); -} - -static void -xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - scoped_ksimd() - xor_block_inner_neon.do_3(bytes, p1, p2, p3); -} - -static void -xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - scoped_ksimd() - xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); -} - -static void -xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - scoped_ksimd() - xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); -} - -static struct xor_block_template xor_block_arm64 = { - .name = "arm64_neon", - .do_2 = xor_neon_2, - .do_3 = xor_neon_3, - .do_4 = xor_neon_4, - .do_5 = xor_neon_5 -}; +extern struct xor_block_template xor_block_arm64; +void __init xor_neon_init(void); #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { + xor_neon_init(); xor_register(&xor_block_8regs); xor_register(&xor_block_32regs); if (cpu_has_neon()) xor_register(&xor_block_arm64); } - -#endif /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 633e5223d944..448c917494f3 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -5,12 +5,6 @@ lib-y := clear_user.o delay.o copy_from_user.o \ memset.o memcmp.o strcmp.o strncmp.o strlen.o \ strnlen.o strchr.o strrchr.o tishift.o -ifeq ($(CONFIG_KERNEL_MODE_NEON), y) -obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o -CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) -CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU) -endif - lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index fb760edae54b..4ab0e7411ff7 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -13,7 +13,11 @@ xor-$(CONFIG_ARM) += arm/xor.o ifeq ($(CONFIG_ARM),y) xor-$(CONFIG_KERNEL_MODE_NEON) += arm/xor-neon.o arm/xor-neon-glue.o endif +xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) + +CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU) diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c new file mode 100644 index 000000000000..067a2095659a --- /dev/null +++ b/lib/raid/xor/arm64/xor-neon-glue.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Authors: Jackie Liu + * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. + */ + +#include +#include +#include + +extern struct xor_block_template const xor_block_inner_neon; + +static void +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + scoped_ksimd() + xor_block_inner_neon.do_2(bytes, p1, p2); +} + +static void +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + scoped_ksimd() + xor_block_inner_neon.do_3(bytes, p1, p2, p3); +} + +static void +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + scoped_ksimd() + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); +} + +static void +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + scoped_ksimd() + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); +} + +struct xor_block_template xor_block_arm64 = { + .name = "arm64_neon", + .do_2 = xor_neon_2, + .do_3 = xor_neon_3, + .do_4 = xor_neon_4, + .do_5 = xor_neon_5 +}; diff --git a/arch/arm64/lib/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c similarity index 95% rename from arch/arm64/lib/xor-neon.c rename to lib/raid/xor/arm64/xor-neon.c index 351aba92d932..8d2d185090db 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/lib/raid/xor/arm64/xor-neon.c @@ -1,15 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * arch/arm64/lib/xor-neon.c - * * Authors: Jackie Liu * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. */ -#include #include -#include +#include #include +#include static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) @@ -180,7 +178,6 @@ struct xor_block_template xor_block_inner_neon __ro_after_init = { .do_4 = xor_arm64_neon_4, .do_5 = xor_arm64_neon_5, }; -EXPORT_SYMBOL(xor_block_inner_neon); static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) { @@ -318,22 +315,11 @@ static void xor_arm64_eor3_5(unsigned long bytes, } while (--lines > 0); } -static int __init xor_neon_init(void) +void __init xor_neon_init(void) { if (cpu_have_named_feature(SHA3)) { xor_block_inner_neon.do_3 = xor_arm64_eor3_3; xor_block_inner_neon.do_4 = xor_arm64_eor3_4; xor_block_inner_neon.do_5 = xor_arm64_eor3_5; } - return 0; } -module_init(xor_neon_init); - -static void __exit xor_neon_exit(void) -{ -} -module_exit(xor_neon_exit); - -MODULE_AUTHOR("Jackie Liu "); -MODULE_DESCRIPTION("ARMv8 XOR Extensions"); -MODULE_LICENSE("GPL"); From 033bee3e49631bd0c7e081aeafeadc7623495107 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:46 +0100 Subject: [PATCH 084/127] loongarch: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-15-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/loongarch/include/asm/xor.h | 24 ++---------- arch/loongarch/include/asm/xor_simd.h | 34 ---------------- arch/loongarch/lib/Makefile | 2 - lib/raid/xor/Makefile | 2 + .../lib => lib/raid/xor/loongarch}/xor_simd.c | 0 .../lib => lib/raid/xor/loongarch}/xor_simd.h | 0 .../raid/xor/loongarch}/xor_simd_glue.c | 39 +++++++++++-------- .../raid/xor/loongarch}/xor_template.c | 0 8 files changed, 27 insertions(+), 74 deletions(-) delete mode 100644 arch/loongarch/include/asm/xor_simd.h rename {arch/loongarch/lib => lib/raid/xor/loongarch}/xor_simd.c (100%) rename {arch/loongarch/lib => lib/raid/xor/loongarch}/xor_simd.h (100%) rename {arch/loongarch/lib => lib/raid/xor/loongarch}/xor_simd_glue.c (64%) rename {arch/loongarch/lib => lib/raid/xor/loongarch}/xor_template.c (100%) diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h index d17c0e3b047f..7e32f72f8b03 100644 --- a/arch/loongarch/include/asm/xor.h +++ b/arch/loongarch/include/asm/xor.h @@ -6,27 +6,6 @@ #define _ASM_LOONGARCH_XOR_H #include -#include - -#ifdef CONFIG_CPU_HAS_LSX -static struct xor_block_template xor_block_lsx = { - .name = "lsx", - .do_2 = xor_lsx_2, - .do_3 = xor_lsx_3, - .do_4 = xor_lsx_4, - .do_5 = xor_lsx_5, -}; -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX -static struct xor_block_template xor_block_lasx = { - .name = "lasx", - .do_2 = xor_lasx_2, - .do_3 = xor_lasx_3, - .do_4 = xor_lasx_4, - .do_5 = xor_lasx_5, -}; -#endif /* CONFIG_CPU_HAS_LASX */ /* * For grins, also test the generic routines. @@ -38,6 +17,9 @@ static struct xor_block_template xor_block_lasx = { */ #include +extern struct xor_block_template xor_block_lsx; +extern struct xor_block_template xor_block_lasx; + #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h deleted file mode 100644 index 471b96332f38..000000000000 --- a/arch/loongarch/include/asm/xor_simd.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2023 WANG Xuerui - */ -#ifndef _ASM_LOONGARCH_XOR_SIMD_H -#define _ASM_LOONGARCH_XOR_SIMD_H - -#ifdef CONFIG_CPU_HAS_LSX -void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LSX */ - -#ifdef CONFIG_CPU_HAS_LASX -void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3); -void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, const unsigned long * __restrict p3, - const unsigned long * __restrict p4, const unsigned long * __restrict p5); -#endif /* CONFIG_CPU_HAS_LASX */ - -#endif /* _ASM_LOONGARCH_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd4353..827a88529a42 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -8,6 +8,4 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o -obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o - obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 4ab0e7411ff7..e8868f5fc396 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -14,6 +14,8 @@ ifeq ($(CONFIG_ARM),y) xor-$(CONFIG_KERNEL_MODE_NEON) += arm/xor-neon.o arm/xor-neon-glue.o endif xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o +xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o +xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) diff --git a/arch/loongarch/lib/xor_simd.c b/lib/raid/xor/loongarch/xor_simd.c similarity index 100% rename from arch/loongarch/lib/xor_simd.c rename to lib/raid/xor/loongarch/xor_simd.c diff --git a/arch/loongarch/lib/xor_simd.h b/lib/raid/xor/loongarch/xor_simd.h similarity index 100% rename from arch/loongarch/lib/xor_simd.h rename to lib/raid/xor/loongarch/xor_simd.h diff --git a/arch/loongarch/lib/xor_simd_glue.c b/lib/raid/xor/loongarch/xor_simd_glue.c similarity index 64% rename from arch/loongarch/lib/xor_simd_glue.c rename to lib/raid/xor/loongarch/xor_simd_glue.c index 393f689dbcf6..11fa3b47ba83 100644 --- a/arch/loongarch/lib/xor_simd_glue.c +++ b/lib/raid/xor/loongarch/xor_simd_glue.c @@ -5,24 +5,23 @@ * Copyright (C) 2023 WANG Xuerui */ -#include #include +#include #include -#include +#include #include "xor_simd.h" #define MAKE_XOR_GLUE_2(flavor) \ -void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \ +static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\ const unsigned long * __restrict p2) \ { \ kernel_fpu_begin(); \ __xor_##flavor##_2(bytes, p1, p2); \ kernel_fpu_end(); \ } \ -EXPORT_SYMBOL_GPL(xor_##flavor##_2) #define MAKE_XOR_GLUE_3(flavor) \ -void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ +static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\ const unsigned long * __restrict p2, \ const unsigned long * __restrict p3) \ { \ @@ -30,10 +29,9 @@ void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \ __xor_##flavor##_3(bytes, p1, p2, p3); \ kernel_fpu_end(); \ } \ -EXPORT_SYMBOL_GPL(xor_##flavor##_3) #define MAKE_XOR_GLUE_4(flavor) \ -void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ +static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\ const unsigned long * __restrict p2, \ const unsigned long * __restrict p3, \ const unsigned long * __restrict p4) \ @@ -42,10 +40,9 @@ void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \ __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ kernel_fpu_end(); \ } \ -EXPORT_SYMBOL_GPL(xor_##flavor##_4) #define MAKE_XOR_GLUE_5(flavor) \ -void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ +static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\ const unsigned long * __restrict p2, \ const unsigned long * __restrict p3, \ const unsigned long * __restrict p4, \ @@ -55,18 +52,26 @@ void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \ __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ kernel_fpu_end(); \ } \ -EXPORT_SYMBOL_GPL(xor_##flavor##_5) -#define MAKE_XOR_GLUES(flavor) \ - MAKE_XOR_GLUE_2(flavor); \ - MAKE_XOR_GLUE_3(flavor); \ - MAKE_XOR_GLUE_4(flavor); \ - MAKE_XOR_GLUE_5(flavor) +#define MAKE_XOR_GLUES(flavor) \ + MAKE_XOR_GLUE_2(flavor); \ + MAKE_XOR_GLUE_3(flavor); \ + MAKE_XOR_GLUE_4(flavor); \ + MAKE_XOR_GLUE_5(flavor); \ + \ +struct xor_block_template xor_block_##flavor = { \ + .name = __stringify(flavor), \ + .do_2 = xor_##flavor##_2, \ + .do_3 = xor_##flavor##_3, \ + .do_4 = xor_##flavor##_4, \ + .do_5 = xor_##flavor##_5, \ +} + #ifdef CONFIG_CPU_HAS_LSX MAKE_XOR_GLUES(lsx); -#endif +#endif /* CONFIG_CPU_HAS_LSX */ #ifdef CONFIG_CPU_HAS_LASX MAKE_XOR_GLUES(lasx); -#endif +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/arch/loongarch/lib/xor_template.c b/lib/raid/xor/loongarch/xor_template.c similarity index 100% rename from arch/loongarch/lib/xor_template.c rename to lib/raid/xor/loongarch/xor_template.c From 3f276cece4dd9e8bf199d9bf3901eef8ca904c2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:47 +0100 Subject: [PATCH 085/127] powerpc: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-16-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/xor.h | 17 +---- arch/powerpc/include/asm/xor_altivec.h | 22 ------ arch/powerpc/lib/Makefile | 5 -- arch/powerpc/lib/xor_vmx_glue.c | 63 ----------------- lib/raid/xor/Makefile | 5 ++ .../lib => lib/raid/xor/powerpc}/xor_vmx.c | 0 .../lib => lib/raid/xor/powerpc}/xor_vmx.h | 0 lib/raid/xor/powerpc/xor_vmx_glue.c | 67 +++++++++++++++++++ 8 files changed, 74 insertions(+), 105 deletions(-) delete mode 100644 arch/powerpc/include/asm/xor_altivec.h delete mode 100644 arch/powerpc/lib/xor_vmx_glue.c rename {arch/powerpc/lib => lib/raid/xor/powerpc}/xor_vmx.c (100%) rename {arch/powerpc/lib => lib/raid/xor/powerpc}/xor_vmx.h (100%) create mode 100644 lib/raid/xor/powerpc/xor_vmx_glue.c diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h index 30224c5279c4..3293ac87181c 100644 --- a/arch/powerpc/include/asm/xor.h +++ b/arch/powerpc/include/asm/xor.h @@ -8,24 +8,11 @@ #ifndef _ASM_POWERPC_XOR_H #define _ASM_POWERPC_XOR_H -#ifdef CONFIG_ALTIVEC - -#include #include -#include - -static struct xor_block_template xor_block_altivec = { - .name = "altivec", - .do_2 = xor_altivec_2, - .do_3 = xor_altivec_3, - .do_4 = xor_altivec_4, - .do_5 = xor_altivec_5, -}; -#endif /* CONFIG_ALTIVEC */ - -/* Also try the generic routines. */ #include +extern struct xor_block_template xor_block_altivec; + #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h deleted file mode 100644 index 294620a25f80..000000000000 --- a/arch/powerpc/include/asm/xor_altivec.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_XOR_ALTIVEC_H -#define _ASM_POWERPC_XOR_ALTIVEC_H - -#ifdef CONFIG_ALTIVEC -void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -#endif -#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f14ecab674a3..002edc3f01d5 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -73,9 +73,4 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o -obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o -CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec) -# Enable -CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include) - obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c deleted file mode 100644 index 35d917ece4d1..000000000000 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Altivec XOR operations - * - * Copyright 2017 IBM Corp. - */ - -#include -#include -#include -#include -#include -#include "xor_vmx.h" - -void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_2(bytes, p1, p2); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_2); - -void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_3(bytes, p1, p2, p3); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_3); - -void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_4(bytes, p1, p2, p3, p4); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_4); - -void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_5(bytes, p1, p2, p3, p4, p5); - disable_kernel_altivec(); - preempt_enable(); -} -EXPORT_SYMBOL(xor_altivec_5); diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index e8868f5fc396..006b44ce46bf 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -16,6 +16,7 @@ endif xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o +xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) @@ -23,3 +24,7 @@ CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU) + +CFLAGS_powerpc/xor_vmx.o += -mhard-float -maltivec \ + $(call cc-option,-mabi=altivec) \ + -isystem $(shell $(CC) -print-file-name=include) diff --git a/arch/powerpc/lib/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c similarity index 100% rename from arch/powerpc/lib/xor_vmx.c rename to lib/raid/xor/powerpc/xor_vmx.c diff --git a/arch/powerpc/lib/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h similarity index 100% rename from arch/powerpc/lib/xor_vmx.h rename to lib/raid/xor/powerpc/xor_vmx.h diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c new file mode 100644 index 000000000000..c41e38340700 --- /dev/null +++ b/lib/raid/xor/powerpc/xor_vmx_glue.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Altivec XOR operations + * + * Copyright 2017 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include "xor_vmx.h" + +static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_2(bytes, p1, p2); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_3(bytes, p1, p2, p3); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_4(bytes, p1, p2, p3, p4); + disable_kernel_altivec(); + preempt_enable(); +} + +static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + preempt_disable(); + enable_kernel_altivec(); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); + disable_kernel_altivec(); + preempt_enable(); +} + +struct xor_block_template xor_block_altivec = { + .name = "altivec", + .do_2 = xor_altivec_2, + .do_3 = xor_altivec_3, + .do_4 = xor_altivec_4, + .do_5 = xor_altivec_5, +}; From 5265d55b214647f56b46330ec1b30641073608c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:48 +0100 Subject: [PATCH 086/127] riscv: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-17-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/riscv/include/asm/xor.h | 54 +------------------ arch/riscv/lib/Makefile | 1 - lib/raid/xor/Makefile | 1 + lib/raid/xor/riscv/xor-glue.c | 56 ++++++++++++++++++++ {arch/riscv/lib => lib/raid/xor/riscv}/xor.S | 4 -- 5 files changed, 59 insertions(+), 57 deletions(-) create mode 100644 lib/raid/xor/riscv/xor-glue.c rename {arch/riscv/lib => lib/raid/xor/riscv}/xor.S (92%) diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h index ed5f27903efc..614d9209d078 100644 --- a/arch/riscv/include/asm/xor.h +++ b/arch/riscv/include/asm/xor.h @@ -2,60 +2,10 @@ /* * Copyright (C) 2021 SiFive */ - -#include -#include -#ifdef CONFIG_RISCV_ISA_V #include -#include -#include +#include -static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2) -{ - kernel_vector_begin(); - xor_regs_2_(bytes, p1, p2); - kernel_vector_end(); -} - -static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3) -{ - kernel_vector_begin(); - xor_regs_3_(bytes, p1, p2, p3); - kernel_vector_end(); -} - -static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3, - const unsigned long *__restrict p4) -{ - kernel_vector_begin(); - xor_regs_4_(bytes, p1, p2, p3, p4); - kernel_vector_end(); -} - -static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3, - const unsigned long *__restrict p4, - const unsigned long *__restrict p5) -{ - kernel_vector_begin(); - xor_regs_5_(bytes, p1, p2, p3, p4, p5); - kernel_vector_end(); -} - -static struct xor_block_template xor_block_rvv = { - .name = "rvv", - .do_2 = xor_vector_2, - .do_3 = xor_vector_3, - .do_4 = xor_vector_4, - .do_5 = xor_vector_5 -}; -#endif /* CONFIG_RISCV_ISA_V */ +extern struct xor_block_template xor_block_rvv; #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index bbc031124974..e220c35764eb 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -16,5 +16,4 @@ lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o -lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 006b44ce46bf..9e729b50e775 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -17,6 +17,7 @@ xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o +xor-$(CONFIG_RISCV_ISA_V) += riscv/xor.o riscv/xor-glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) diff --git a/lib/raid/xor/riscv/xor-glue.c b/lib/raid/xor/riscv/xor-glue.c new file mode 100644 index 000000000000..11666a4b6b68 --- /dev/null +++ b/lib/raid/xor/riscv/xor-glue.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 SiFive + */ + +#include +#include +#include +#include +#include + +static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2) +{ + kernel_vector_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_vector_end(); +} + +static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3) +{ + kernel_vector_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_vector_end(); +} + +static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4) +{ + kernel_vector_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_vector_end(); +} + +static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5) +{ + kernel_vector_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_vector_end(); +} + +struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_vector_2, + .do_3 = xor_vector_3, + .do_4 = xor_vector_4, + .do_5 = xor_vector_5 +}; diff --git a/arch/riscv/lib/xor.S b/lib/raid/xor/riscv/xor.S similarity index 92% rename from arch/riscv/lib/xor.S rename to lib/raid/xor/riscv/xor.S index b28f2430e52f..56fb7fc1e2cd 100644 --- a/arch/riscv/lib/xor.S +++ b/lib/raid/xor/riscv/xor.S @@ -18,7 +18,6 @@ SYM_FUNC_START(xor_regs_2_) bnez a0, xor_regs_2_ ret SYM_FUNC_END(xor_regs_2_) -EXPORT_SYMBOL(xor_regs_2_) SYM_FUNC_START(xor_regs_3_) vsetvli a4, a0, e8, m8, ta, ma @@ -35,7 +34,6 @@ SYM_FUNC_START(xor_regs_3_) bnez a0, xor_regs_3_ ret SYM_FUNC_END(xor_regs_3_) -EXPORT_SYMBOL(xor_regs_3_) SYM_FUNC_START(xor_regs_4_) vsetvli a5, a0, e8, m8, ta, ma @@ -55,7 +53,6 @@ SYM_FUNC_START(xor_regs_4_) bnez a0, xor_regs_4_ ret SYM_FUNC_END(xor_regs_4_) -EXPORT_SYMBOL(xor_regs_4_) SYM_FUNC_START(xor_regs_5_) vsetvli a6, a0, e8, m8, ta, ma @@ -78,4 +75,3 @@ SYM_FUNC_START(xor_regs_5_) bnez a0, xor_regs_5_ ret SYM_FUNC_END(xor_regs_5_) -EXPORT_SYMBOL(xor_regs_5_) From 7f96362396ee27fbe3aafc0d49943367258d5fdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:49 +0100 Subject: [PATCH 087/127] sparc: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of always building it into the main kernel image. The code should probably be split into separate files for the two implementations, but for now this just does the trivial move. Link: https://lkml.kernel.org/r/20260327061704.3707577-18-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/sparc/include/asm/asm-prototypes.h | 1 - arch/sparc/include/asm/xor.h | 45 ++++++++++++++++--- arch/sparc/lib/Makefile | 2 +- lib/raid/xor/Makefile | 2 + .../raid/xor/sparc/xor-sparc32.c | 23 ++-------- .../raid/xor/sparc/xor-sparc64-glue.c | 26 +++-------- .../xor.S => lib/raid/xor/sparc/xor-sparc64.S | 10 ----- 7 files changed, 52 insertions(+), 57 deletions(-) rename arch/sparc/include/asm/xor_32.h => lib/raid/xor/sparc/xor-sparc32.c (93%) rename arch/sparc/include/asm/xor_64.h => lib/raid/xor/sparc/xor-sparc64-glue.c (74%) rename arch/sparc/lib/xor.S => lib/raid/xor/sparc/xor-sparc64.S (98%) diff --git a/arch/sparc/include/asm/asm-prototypes.h b/arch/sparc/include/asm/asm-prototypes.h index 08810808ca6d..bbd1a8afaabf 100644 --- a/arch/sparc/include/asm/asm-prototypes.h +++ b/arch/sparc/include/asm/asm-prototypes.h @@ -14,7 +14,6 @@ #include #include #include -#include void *__memscan_zero(void *, size_t); void *__memscan_generic(void *, int, size_t); diff --git a/arch/sparc/include/asm/xor.h b/arch/sparc/include/asm/xor.h index f4c651e203c4..f923b009fc24 100644 --- a/arch/sparc/include/asm/xor.h +++ b/arch/sparc/include/asm/xor.h @@ -1,9 +1,44 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 2006 David S. Miller + */ #ifndef ___ASM_SPARC_XOR_H #define ___ASM_SPARC_XOR_H + #if defined(__sparc__) && defined(__arch64__) -#include -#else -#include -#endif -#endif +#include + +extern struct xor_block_template xor_block_VIS; +extern struct xor_block_template xor_block_niagara; + +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + /* Force VIS for everything except Niagara. */ + if (tlb_type == hypervisor && + (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) + xor_force(&xor_block_niagara); + else + xor_force(&xor_block_VIS); +} +#else /* sparc64 */ + +/* For grins, also test the generic routines. */ +#include + +extern struct xor_block_template xor_block_SPARC; + +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) +{ + xor_register(&xor_block_8regs); + xor_register(&xor_block_32regs); + xor_register(&xor_block_SPARC); +} +#endif /* !sparc64 */ +#endif /* ___ASM_SPARC_XOR_H */ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 783bdec0d7be..dd10cdd6f062 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -48,7 +48,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o -lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o +lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 9e729b50e775..3a7c887d08ee 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -18,6 +18,8 @@ xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o xor-$(CONFIG_RISCV_ISA_V) += riscv/xor.o riscv/xor-glue.o +xor-$(CONFIG_SPARC32) += sparc/xor-sparc32.o +xor-$(CONFIG_SPARC64) += sparc/xor-sparc64.o sparc/xor-sparc64-glue.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) diff --git a/arch/sparc/include/asm/xor_32.h b/lib/raid/xor/sparc/xor-sparc32.c similarity index 93% rename from arch/sparc/include/asm/xor_32.h rename to lib/raid/xor/sparc/xor-sparc32.c index 8fbf0c07ec28..b65a75a6e59d 100644 --- a/arch/sparc/include/asm/xor_32.h +++ b/lib/raid/xor/sparc/xor-sparc32.c @@ -1,16 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm/xor.h - * - * Optimized RAID-5 checksumming functions for 32-bit Sparc. - */ - +// SPDX-License-Identifier: GPL-2.0-or-later /* * High speed xor_block operation for RAID4/5 utilizing the * ldd/std SPARC instructions. * * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ +#include +#include static void sparc_2(unsigned long bytes, unsigned long * __restrict p1, @@ -248,21 +244,10 @@ sparc_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -static struct xor_block_template xor_block_SPARC = { +struct xor_block_template xor_block_SPARC = { .name = "SPARC", .do_2 = sparc_2, .do_3 = sparc_3, .do_4 = sparc_4, .do_5 = sparc_5, }; - -/* For grins, also test the generic routines. */ -#include - -#define arch_xor_init arch_xor_init -static __always_inline void __init arch_xor_init(void) -{ - xor_register(&xor_block_8regs); - xor_register(&xor_block_32regs); - xor_register(&xor_block_SPARC); -} diff --git a/arch/sparc/include/asm/xor_64.h b/lib/raid/xor/sparc/xor-sparc64-glue.c similarity index 74% rename from arch/sparc/include/asm/xor_64.h rename to lib/raid/xor/sparc/xor-sparc64-glue.c index e0482ecc0a68..3c67c8c3a0e8 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/lib/raid/xor/sparc/xor-sparc64-glue.c @@ -1,7 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +// SPDX-License-Identifier: GPL-2.0-or-later /* - * include/asm/xor.h - * * High speed xor_block operation for RAID4/5 utilizing the * UltraSparc Visual Instruction Set and Niagara block-init * twin-load instructions. @@ -10,7 +8,8 @@ * Copyright (C) 2006 David S. Miller */ -#include +#include +#include void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2); @@ -29,7 +28,7 @@ void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1, /* XXX Ugh, write cheetah versions... -DaveM */ -static struct xor_block_template xor_block_VIS = { +struct xor_block_template xor_block_VIS = { .name = "VIS", .do_2 = xor_vis_2, .do_3 = xor_vis_3, @@ -52,25 +51,10 @@ void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4, const unsigned long * __restrict p5); -static struct xor_block_template xor_block_niagara = { +struct xor_block_template xor_block_niagara = { .name = "Niagara", .do_2 = xor_niagara_2, .do_3 = xor_niagara_3, .do_4 = xor_niagara_4, .do_5 = xor_niagara_5, }; - -#define arch_xor_init arch_xor_init -static __always_inline void __init arch_xor_init(void) -{ - /* Force VIS for everything except Niagara. */ - if (tlb_type == hypervisor && - (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) - xor_force(&xor_block_niagara); - else - xor_force(&xor_block_VIS); -} diff --git a/arch/sparc/lib/xor.S b/lib/raid/xor/sparc/xor-sparc64.S similarity index 98% rename from arch/sparc/lib/xor.S rename to lib/raid/xor/sparc/xor-sparc64.S index 35461e3b2a9b..a7b74d473bd4 100644 --- a/arch/sparc/lib/xor.S +++ b/lib/raid/xor/sparc/xor-sparc64.S @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * arch/sparc64/lib/xor.S - * * High speed xor_block operation for RAID4/5 utilizing the * UltraSparc Visual Instruction Set and Niagara store-init/twin-load. * @@ -92,7 +90,6 @@ ENTRY(xor_vis_2) retl wr %g0, 0, %fprs ENDPROC(xor_vis_2) -EXPORT_SYMBOL(xor_vis_2) ENTRY(xor_vis_3) rd %fprs, %o5 @@ -159,7 +156,6 @@ ENTRY(xor_vis_3) retl wr %g0, 0, %fprs ENDPROC(xor_vis_3) -EXPORT_SYMBOL(xor_vis_3) ENTRY(xor_vis_4) rd %fprs, %o5 @@ -245,7 +241,6 @@ ENTRY(xor_vis_4) retl wr %g0, 0, %fprs ENDPROC(xor_vis_4) -EXPORT_SYMBOL(xor_vis_4) ENTRY(xor_vis_5) save %sp, -192, %sp @@ -352,7 +347,6 @@ ENTRY(xor_vis_5) ret restore ENDPROC(xor_vis_5) -EXPORT_SYMBOL(xor_vis_5) /* Niagara versions. */ ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ @@ -399,7 +393,6 @@ ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */ ret restore ENDPROC(xor_niagara_2) -EXPORT_SYMBOL(xor_niagara_2) ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ save %sp, -192, %sp @@ -461,7 +454,6 @@ ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */ ret restore ENDPROC(xor_niagara_3) -EXPORT_SYMBOL(xor_niagara_3) ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ save %sp, -192, %sp @@ -544,7 +536,6 @@ ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */ ret restore ENDPROC(xor_niagara_4) -EXPORT_SYMBOL(xor_niagara_4) ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */ save %sp, -192, %sp @@ -643,4 +634,3 @@ ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=s ret restore ENDPROC(xor_niagara_5) -EXPORT_SYMBOL(xor_niagara_5) From 95c104cc55713b90012810e8965c27b9ef990097 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:50 +0100 Subject: [PATCH 088/127] s390: move the XOR code to lib/raid/ Move the optimized XOR into lib/raid and include it it in xor.ko instead of unconditionally building it into the main kernel image. Link: https://lkml.kernel.org/r/20260327061704.3707577-19-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Heiko Carstens Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/s390/lib/Makefile | 2 +- lib/raid/xor/Makefile | 1 + {arch/s390/lib => lib/raid/xor/s390}/xor.c | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) rename {arch/s390/lib => lib/raid/xor/s390}/xor.c (98%) diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index f43f897d3fc0..2bf47204f6ab 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -5,7 +5,7 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o lib-y += csum-partial.o -obj-y += mem.o xor.o +obj-y += mem.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 3a7c887d08ee..3db6c2b2f26a 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -20,6 +20,7 @@ xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o xor-$(CONFIG_RISCV_ISA_V) += riscv/xor.o riscv/xor-glue.o xor-$(CONFIG_SPARC32) += sparc/xor-sparc32.o xor-$(CONFIG_SPARC64) += sparc/xor-sparc64.o sparc/xor-sparc64-glue.o +xor-$(CONFIG_S390) += s390/xor.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) diff --git a/arch/s390/lib/xor.c b/lib/raid/xor/s390/xor.c similarity index 98% rename from arch/s390/lib/xor.c rename to lib/raid/xor/s390/xor.c index 3bbe21b40e66..acbd268adfc8 100644 --- a/arch/s390/lib/xor.c +++ b/lib/raid/xor/s390/xor.c @@ -7,7 +7,6 @@ */ #include -#include #include #include @@ -133,4 +132,3 @@ struct xor_block_template xor_block_xc = { .do_4 = xor_xc_4, .do_5 = xor_xc_5, }; -EXPORT_SYMBOL(xor_block_xc); From 77fd47e57a0931eb462ea7b76228df6624b563e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:51 +0100 Subject: [PATCH 089/127] x86: move the XOR code to lib/raid/ Move the optimized XOR code out of line into lib/raid. Link: https://lkml.kernel.org/r/20260327061704.3707577-20-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/include/asm/xor.h | 518 ++---------------- arch/x86/include/asm/xor_64.h | 32 -- lib/raid/xor/Makefile | 2 + .../xor_avx.h => lib/raid/xor/x86/xor-avx.c | 14 +- .../xor_32.h => lib/raid/xor/x86/xor-mmx.c | 60 +- lib/raid/xor/x86/xor-sse.c | 476 ++++++++++++++++ 6 files changed, 522 insertions(+), 580 deletions(-) delete mode 100644 arch/x86/include/asm/xor_64.h rename arch/x86/include/asm/xor_avx.h => lib/raid/xor/x86/xor-avx.c (95%) rename arch/x86/include/asm/xor_32.h => lib/raid/xor/x86/xor-mmx.c (90%) create mode 100644 lib/raid/xor/x86/xor-sse.c diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 33f5620d8d69..d1aab8275908 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -2,498 +2,42 @@ #ifndef _ASM_X86_XOR_H #define _ASM_X86_XOR_H -/* - * Optimized RAID-5 checksumming functions for SSE. - */ +#include +#include + +extern struct xor_block_template xor_block_pII_mmx; +extern struct xor_block_template xor_block_p5_mmx; +extern struct xor_block_template xor_block_sse; +extern struct xor_block_template xor_block_sse_pf64; +extern struct xor_block_template xor_block_avx; /* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -/* - * Based on - * High-speed RAID5 checksumming functions utilizing SSE instructions. - * Copyright (C) 1998 Ingo Molnar. - */ - -/* - * x86-64 changes / gcc fixes from Andi Kleen. - * Copyright 2002 Andi Kleen, SuSE Labs. + * When SSE is available, use it as it can write around L2. We may also be able + * to load into the L1 only depending on how the cpu deals with a load to a line + * that is being prefetched. * - * This hasn't been optimized for the hammer yet, but there are likely - * no advantages to be gotten from x86-64 here anyways. + * When AVX2 is available, force using it as it is better by all measures. + * + * 32-bit without MMX can fall back to the generic routines. */ - -#include - -#ifdef CONFIG_X86_32 -/* reduce register pressure */ -# define XOR_CONSTANT_CONSTRAINT "i" -#else -# define XOR_CONSTANT_CONSTRAINT "re" -#endif - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" -#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" -#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" -#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" -#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" -#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" -#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" -#define NOP(x) - -#define BLK64(pf, op, i) \ - pf(i) \ - op(i, 0) \ - op(i + 1, 1) \ - op(i + 2, 2) \ - op(i + 3, 3) - -static void -xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) +#define arch_xor_init arch_xor_init +static __always_inline void __init arch_xor_init(void) { - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - LD(i, 0) \ - LD(i + 1, 1) \ - PF1(i) \ - PF1(i + 2) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_OSXSAVE)) { + xor_force(&xor_block_avx); + } else if (IS_ENABLED(CONFIG_X86_64) || boot_cpu_has(X86_FEATURE_XMM)) { + xor_register(&xor_block_sse); + xor_register(&xor_block_sse_pf64); + } else if (boot_cpu_has(X86_FEATURE_MMX)) { + xor_register(&xor_block_pII_mmx); + xor_register(&xor_block_p5_mmx); + } else { + xor_register(&xor_block_8regs); + xor_register(&xor_block_8regs_p); + xor_register(&xor_block_32regs); + xor_register(&xor_block_32regs_p); + } } -static void -xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - BLK64(PF0, LD, i) \ - BLK64(PF1, XO1, i) \ - BLK64(NOP, ST, i) \ - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - BLK64(PF0, LD, i) \ - BLK64(PF1, XO1, i) \ - BLK64(PF2, XO2, i) \ - BLK64(NOP, ST, i) \ - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " add %[inc], %[p4] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), [p1] "+r" (p1), - [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - BLK64(PF0, LD, i) \ - BLK64(PF1, XO1, i) \ - BLK64(PF2, XO2, i) \ - BLK64(PF3, XO3, i) \ - BLK64(NOP, ST, i) \ - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " add %[inc], %[p4] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), [p1] "+r" (p1), - [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - PF4(i) \ - PF4(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - XO4(i, 0) \ - XO4(i + 1, 1) \ - XO4(i + 2, 2) \ - XO4(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " add %[inc], %[p4] ;\n" - " add %[inc], %[p5] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), - [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - BLK64(PF0, LD, i) \ - BLK64(PF1, XO1, i) \ - BLK64(PF2, XO2, i) \ - BLK64(PF3, XO3, i) \ - BLK64(PF4, XO4, i) \ - BLK64(NOP, ST, i) \ - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " add %[inc], %[p1] ;\n" - " add %[inc], %[p2] ;\n" - " add %[inc], %[p3] ;\n" - " add %[inc], %[p4] ;\n" - " add %[inc], %[p5] ;\n" - " dec %[cnt] ;\n" - " jnz 1b ;\n" - : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), - [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) - : [inc] XOR_CONSTANT_CONSTRAINT (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static struct xor_block_template xor_block_sse_pf64 = { - .name = "prefetch64-sse", - .do_2 = xor_sse_2_pf64, - .do_3 = xor_sse_3_pf64, - .do_4 = xor_sse_4_pf64, - .do_5 = xor_sse_5_pf64, -}; - -#undef LD -#undef XO1 -#undef XO2 -#undef XO3 -#undef XO4 -#undef ST -#undef NOP -#undef BLK64 -#undef BLOCK - -#undef XOR_CONSTANT_CONSTRAINT - -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif - #endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h deleted file mode 100644 index 2d2ceb241866..000000000000 --- a/arch/x86/include/asm/xor_64.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_XOR_64_H -#define _ASM_X86_XOR_64_H - -static struct xor_block_template xor_block_sse = { - .name = "generic_sse", - .do_2 = xor_sse_2, - .do_3 = xor_sse_3, - .do_4 = xor_sse_4, - .do_5 = xor_sse_5, -}; - - -/* Also try the AVX routines */ -#include - -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define arch_xor_init arch_xor_init -static __always_inline void __init arch_xor_init(void) -{ - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_OSXSAVE)) { - xor_force(&xor_block_avx); - } else { - xor_register(&xor_block_sse_pf64); - xor_register(&xor_block_sse); - } -} - -#endif /* _ASM_X86_XOR_64_H */ diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 3db6c2b2f26a..05aca96041b3 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -21,6 +21,8 @@ xor-$(CONFIG_RISCV_ISA_V) += riscv/xor.o riscv/xor-glue.o xor-$(CONFIG_SPARC32) += sparc/xor-sparc32.o xor-$(CONFIG_SPARC64) += sparc/xor-sparc64.o sparc/xor-sparc64-glue.o xor-$(CONFIG_S390) += s390/xor.o +xor-$(CONFIG_X86_32) += x86/xor-avx.o x86/xor-sse.o x86/xor-mmx.o +xor-$(CONFIG_X86_64) += x86/xor-avx.o x86/xor-sse.o CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) diff --git a/arch/x86/include/asm/xor_avx.h b/lib/raid/xor/x86/xor-avx.c similarity index 95% rename from arch/x86/include/asm/xor_avx.h rename to lib/raid/xor/x86/xor-avx.c index c600888436bb..b49cb5199e70 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/lib/raid/xor/x86/xor-avx.c @@ -1,18 +1,16 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _ASM_X86_XOR_AVX_H -#define _ASM_X86_XOR_AVX_H - +// SPDX-License-Identifier: GPL-2.0-only /* - * Optimized RAID-5 checksumming functions for AVX + * Optimized XOR parity functions for AVX * * Copyright (C) 2012 Intel Corporation * Author: Jim Kukunas * * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines */ - #include +#include #include +#include #define BLOCK4(i) \ BLOCK(32 * i, 0) \ @@ -158,12 +156,10 @@ do { \ kernel_fpu_end(); } -static struct xor_block_template xor_block_avx = { +struct xor_block_template xor_block_avx = { .name = "avx", .do_2 = xor_avx_2, .do_3 = xor_avx_3, .do_4 = xor_avx_4, .do_5 = xor_avx_5, }; - -#endif diff --git a/arch/x86/include/asm/xor_32.h b/lib/raid/xor/x86/xor-mmx.c similarity index 90% rename from arch/x86/include/asm/xor_32.h rename to lib/raid/xor/x86/xor-mmx.c index ee32d08c27bc..cf0fafea33b7 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/lib/raid/xor/x86/xor-mmx.c @@ -1,15 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_X86_XOR_32_H -#define _ASM_X86_XOR_32_H - +// SPDX-License-Identifier: GPL-2.0-or-later /* - * Optimized RAID-5 checksumming functions for MMX. - */ - -/* - * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Optimized XOR parity functions for MMX. + * * Copyright (C) 1998 Ingo Molnar. */ +#include +#include +#include #define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" #define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" @@ -18,8 +15,6 @@ #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" -#include - static void xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) @@ -519,7 +514,7 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, kernel_fpu_end(); } -static struct xor_block_template xor_block_pII_mmx = { +struct xor_block_template xor_block_pII_mmx = { .name = "pII_mmx", .do_2 = xor_pII_mmx_2, .do_3 = xor_pII_mmx_3, @@ -527,49 +522,10 @@ static struct xor_block_template xor_block_pII_mmx = { .do_5 = xor_pII_mmx_5, }; -static struct xor_block_template xor_block_p5_mmx = { +struct xor_block_template xor_block_p5_mmx = { .name = "p5_mmx", .do_2 = xor_p5_mmx_2, .do_3 = xor_p5_mmx_3, .do_4 = xor_p5_mmx_4, .do_5 = xor_p5_mmx_5, }; - -static struct xor_block_template xor_block_pIII_sse = { - .name = "pIII_sse", - .do_2 = xor_sse_2, - .do_3 = xor_sse_3, - .do_4 = xor_sse_4, - .do_5 = xor_sse_5, -}; - -/* Also try the AVX routines */ -#include - -/* Also try the generic routines. */ -#include - -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define arch_xor_init arch_xor_init -static __always_inline void __init arch_xor_init(void) -{ - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_OSXSAVE)) { - xor_force(&xor_block_avx); - } else if (boot_cpu_has(X86_FEATURE_XMM)) { - xor_register(&xor_block_pIII_sse); - xor_register(&xor_block_sse_pf64); - } else if (boot_cpu_has(X86_FEATURE_MMX)) { - xor_register(&xor_block_pII_mmx); - xor_register(&xor_block_p5_mmx); - } else { - xor_register(&xor_block_8regs); - xor_register(&xor_block_8regs_p); - xor_register(&xor_block_32regs); - xor_register(&xor_block_32regs_p); - } -} - -#endif /* _ASM_X86_XOR_32_H */ diff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c new file mode 100644 index 000000000000..0e727ced8b00 --- /dev/null +++ b/lib/raid/xor/x86/xor-sse.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Optimized XOR parity functions for SSE. + * + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + * + * Based on + * High-speed RAID5 checksumming functions utilizing SSE instructions. + * Copyright (C) 1998 Ingo Molnar. + * + * x86-64 changes / gcc fixes from Andi Kleen. + * Copyright 2002 Andi Kleen, SuSE Labs. + */ +#include +#include +#include + +#ifdef CONFIG_X86_32 +/* reduce register pressure */ +# define XOR_CONSTANT_CONSTRAINT "i" +#else +# define XOR_CONSTANT_CONSTRAINT "re" +#endif + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" +#define NOP(x) + +#define BLK64(pf, op, i) \ + pf(i) \ + op(i, 0) \ + op(i + 1, 1) \ + op(i + 2, 2) \ + op(i + 3, 3) + +static void +xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(PF4, XO4, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +struct xor_block_template xor_block_sse = { + .name = "sse", + .do_2 = xor_sse_2, + .do_3 = xor_sse_3, + .do_4 = xor_sse_4, + .do_5 = xor_sse_5, +}; + +struct xor_block_template xor_block_sse_pf64 = { + .name = "prefetch64-sse", + .do_2 = xor_sse_2_pf64, + .do_3 = xor_sse_3_pf64, + .do_4 = xor_sse_4_pf64, + .do_5 = xor_sse_5_pf64, +}; From 352ebd066b625a5058bd988e5b32e6992f4d5b88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:52 +0100 Subject: [PATCH 090/127] xor: avoid indirect calls for arm64-optimized ops Remove the inner xor_block_templates, and instead have two separate actual template that call into the neon-enabled compilation unit. Link: https://lkml.kernel.org/r/20260327061704.3707577-21-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/include/asm/xor.h | 13 ++-- lib/raid/xor/arm64/xor-neon-glue.c | 95 +++++++++++++++--------------- lib/raid/xor/arm64/xor-neon.c | 73 +++++++++-------------- lib/raid/xor/arm64/xor-neon.h | 30 ++++++++++ 4 files changed, 114 insertions(+), 97 deletions(-) create mode 100644 lib/raid/xor/arm64/xor-neon.h diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index 81718f010761..4782c760bcac 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -7,15 +7,18 @@ #include #include -extern struct xor_block_template xor_block_arm64; -void __init xor_neon_init(void); +extern struct xor_block_template xor_block_neon; +extern struct xor_block_template xor_block_eor3; #define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { - xor_neon_init(); xor_register(&xor_block_8regs); xor_register(&xor_block_32regs); - if (cpu_has_neon()) - xor_register(&xor_block_arm64); + if (cpu_has_neon()) { + if (cpu_have_named_feature(SHA3)) + xor_register(&xor_block_eor3); + else + xor_register(&xor_block_neon); + } } diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c index 067a2095659a..08c3e3573388 100644 --- a/lib/raid/xor/arm64/xor-neon-glue.c +++ b/lib/raid/xor/arm64/xor-neon-glue.c @@ -7,51 +7,54 @@ #include #include #include +#include "xor-neon.h" -extern struct xor_block_template const xor_block_inner_neon; - -static void -xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) -{ - scoped_ksimd() - xor_block_inner_neon.do_2(bytes, p1, p2); -} - -static void -xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - scoped_ksimd() - xor_block_inner_neon.do_3(bytes, p1, p2, p3); -} - -static void -xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - scoped_ksimd() - xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); -} - -static void -xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - scoped_ksimd() - xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); -} - -struct xor_block_template xor_block_arm64 = { - .name = "arm64_neon", - .do_2 = xor_neon_2, - .do_3 = xor_neon_3, - .do_4 = xor_neon_4, - .do_5 = xor_neon_5 +#define XOR_TEMPLATE(_name) \ +static void \ +xor_##_name##_2(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2) \ +{ \ + scoped_ksimd() \ + __xor_##_name##_2(bytes, p1, p2); \ +} \ + \ +static void \ +xor_##_name##_3(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3) \ +{ \ + scoped_ksimd() \ + __xor_##_name##_3(bytes, p1, p2, p3); \ +} \ + \ +static void \ +xor_##_name##_4(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4) \ +{ \ + scoped_ksimd() \ + __xor_##_name##_4(bytes, p1, p2, p3, p4); \ +} \ + \ +static void \ +xor_##_name##_5(unsigned long bytes, unsigned long * __restrict p1, \ + const unsigned long * __restrict p2, \ + const unsigned long * __restrict p3, \ + const unsigned long * __restrict p4, \ + const unsigned long * __restrict p5) \ +{ \ + scoped_ksimd() \ + __xor_##_name##_5(bytes, p1, p2, p3, p4, p5); \ +} \ + \ +struct xor_block_template xor_block_##_name = { \ + .name = __stringify(_name), \ + .do_2 = xor_##_name##_2, \ + .do_3 = xor_##_name##_3, \ + .do_4 = xor_##_name##_4, \ + .do_5 = xor_##_name##_5 \ }; + +XOR_TEMPLATE(neon); +XOR_TEMPLATE(eor3); diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c index 8d2d185090db..61194c292917 100644 --- a/lib/raid/xor/arm64/xor-neon.c +++ b/lib/raid/xor/arm64/xor-neon.c @@ -8,9 +8,10 @@ #include #include #include +#include "xor-neon.h" -static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) +void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -36,9 +37,9 @@ static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) +void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -72,10 +73,10 @@ static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) +void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -117,11 +118,11 @@ static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) +void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -171,14 +172,6 @@ static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -struct xor_block_template xor_block_inner_neon __ro_after_init = { - .name = "__inner_neon__", - .do_2 = xor_arm64_neon_2, - .do_3 = xor_arm64_neon_3, - .do_4 = xor_arm64_neon_4, - .do_5 = xor_arm64_neon_5, -}; - static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) { uint64x2_t res; @@ -189,10 +182,9 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) return res; } -static void xor_arm64_eor3_3(unsigned long bytes, - unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) +void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -224,11 +216,10 @@ static void xor_arm64_eor3_3(unsigned long bytes, } while (--lines > 0); } -static void xor_arm64_eor3_4(unsigned long bytes, - unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) +void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -268,12 +259,11 @@ static void xor_arm64_eor3_4(unsigned long bytes, } while (--lines > 0); } -static void xor_arm64_eor3_5(unsigned long bytes, - unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) +void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -314,12 +304,3 @@ static void xor_arm64_eor3_5(unsigned long bytes, dp5 += 8; } while (--lines > 0); } - -void __init xor_neon_init(void) -{ - if (cpu_have_named_feature(SHA3)) { - xor_block_inner_neon.do_3 = xor_arm64_eor3_3; - xor_block_inner_neon.do_4 = xor_arm64_eor3_4; - xor_block_inner_neon.do_5 = xor_arm64_eor3_5; - } -} diff --git a/lib/raid/xor/arm64/xor-neon.h b/lib/raid/xor/arm64/xor-neon.h new file mode 100644 index 000000000000..cec0ac846fea --- /dev/null +++ b/lib/raid/xor/arm64/xor-neon.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); + +#define __xor_eor3_2 __xor_neon_2 +void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); From e20043b4765cdf7ec8e963d706bb91469cba8cb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:53 +0100 Subject: [PATCH 091/127] xor: make xor.ko self-contained in lib/raid/ Move the asm/xor.h headers to lib/raid/xor/$(SRCARCH)/xor_arch.h and include/linux/raid/xor_impl.h to lib/raid/xor/xor_impl.h so that the xor.ko module implementation is self-contained in lib/raid/. As this remove the asm-generic mechanism a new kconfig symbol is added to indicate that a architecture-specific implementations exists, and xor_arch.h should be included. Link: https://lkml.kernel.org/r/20260327061704.3707577-22-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/um/include/asm/xor.h | 8 -------- include/asm-generic/Kbuild | 1 - include/asm-generic/xor.h | 11 ----------- lib/raid/Kconfig | 15 +++++++++++++++ lib/raid/xor/Makefile | 6 ++++++ lib/raid/xor/alpha/xor.c | 4 ++-- .../asm/xor.h => lib/raid/xor/alpha/xor_arch.h | 2 -- lib/raid/xor/arm/xor-neon-glue.c | 4 ++-- lib/raid/xor/arm/xor-neon.c | 2 +- lib/raid/xor/arm/xor.c | 4 ++-- .../asm/xor.h => lib/raid/xor/arm/xor_arch.h | 2 -- lib/raid/xor/arm64/xor-neon-glue.c | 4 ++-- lib/raid/xor/arm64/xor-neon.c | 4 ++-- .../asm/xor.h => lib/raid/xor/arm64/xor_arch.h | 3 --- .../xor.h => lib/raid/xor/loongarch/xor_arch.h | 7 ------- lib/raid/xor/loongarch/xor_simd_glue.c | 4 ++-- .../xor.h => lib/raid/xor/powerpc/xor_arch.h | 7 ------- lib/raid/xor/powerpc/xor_vmx_glue.c | 4 ++-- lib/raid/xor/riscv/xor-glue.c | 4 ++-- .../asm/xor.h => lib/raid/xor/riscv/xor_arch.h | 2 -- lib/raid/xor/s390/xor.c | 4 ++-- .../asm/xor.h => lib/raid/xor/s390/xor_arch.h | 6 ------ lib/raid/xor/sparc/xor-sparc32.c | 4 ++-- lib/raid/xor/sparc/xor-sparc64-glue.c | 4 ++-- .../asm/xor.h => lib/raid/xor/sparc/xor_arch.h | 9 --------- lib/raid/xor/um/xor_arch.h | 2 ++ lib/raid/xor/x86/xor-avx.c | 4 ++-- lib/raid/xor/x86/xor-mmx.c | 4 ++-- lib/raid/xor/x86/xor-sse.c | 4 ++-- .../asm/xor.h => lib/raid/xor/x86/xor_arch.h | 7 ------- lib/raid/xor/xor-32regs-prefetch.c | 3 +-- lib/raid/xor/xor-32regs.c | 3 +-- lib/raid/xor/xor-8regs-prefetch.c | 3 +-- lib/raid/xor/xor-8regs.c | 3 +-- lib/raid/xor/xor-core.c | 18 +++++++++++------- .../linux/raid => lib/raid/xor}/xor_impl.h | 6 ++++++ 36 files changed, 73 insertions(+), 109 deletions(-) delete mode 100644 arch/um/include/asm/xor.h delete mode 100644 include/asm-generic/xor.h rename arch/alpha/include/asm/xor.h => lib/raid/xor/alpha/xor_arch.h (90%) rename arch/arm/include/asm/xor.h => lib/raid/xor/arm/xor_arch.h (87%) rename arch/arm64/include/asm/xor.h => lib/raid/xor/arm64/xor_arch.h (89%) rename arch/loongarch/include/asm/xor.h => lib/raid/xor/loongarch/xor_arch.h (85%) rename arch/powerpc/include/asm/xor.h => lib/raid/xor/powerpc/xor_arch.h (77%) rename arch/riscv/include/asm/xor.h => lib/raid/xor/riscv/xor_arch.h (84%) rename arch/s390/include/asm/xor.h => lib/raid/xor/s390/xor_arch.h (71%) rename arch/sparc/include/asm/xor.h => lib/raid/xor/sparc/xor_arch.h (81%) create mode 100644 lib/raid/xor/um/xor_arch.h rename arch/x86/include/asm/xor.h => lib/raid/xor/x86/xor_arch.h (89%) rename {include/linux/raid => lib/raid/xor}/xor_impl.h (80%) diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h deleted file mode 100644 index 99e5c7e1f475..000000000000 --- a/arch/um/include/asm/xor.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_UM_XOR_H -#define _ASM_UM_XOR_H - -#include -#include <../../x86/include/asm/xor.h> - -#endif diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 9aff61e7b8f2..2c53a1e0b760 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -65,4 +65,3 @@ mandatory-y += vermagic.h mandatory-y += vga.h mandatory-y += video.h mandatory-y += word-at-a-time.h -mandatory-y += xor.h diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h deleted file mode 100644 index fc151fdc45ab..000000000000 --- a/include/asm-generic/xor.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-generic/xor.h - * - * Generic optimized RAID-5 checksumming functions. - */ - -extern struct xor_block_template xor_block_8regs; -extern struct xor_block_template xor_block_32regs; -extern struct xor_block_template xor_block_8regs_p; -extern struct xor_block_template xor_block_32regs_p; diff --git a/lib/raid/Kconfig b/lib/raid/Kconfig index 01b73a1c303f..81cb3f9c0a7b 100644 --- a/lib/raid/Kconfig +++ b/lib/raid/Kconfig @@ -2,3 +2,18 @@ config XOR_BLOCKS tristate + +# selected by architectures that provide an optimized XOR implementation +config XOR_BLOCKS_ARCH + depends on XOR_BLOCKS + default y if ALPHA + default y if ARM + default y if ARM64 + default y if CPU_HAS_LSX # loongarch + default y if ALTIVEC # powerpc + default y if RISCV_ISA_V + default y if SPARC + default y if S390 + default y if X86_32 + default y if X86_64 + bool diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index 05aca96041b3..df55823c4d82 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +ccflags-y += -I $(src) + obj-$(CONFIG_XOR_BLOCKS) += xor.o xor-y += xor-core.o @@ -8,6 +10,10 @@ xor-y += xor-32regs.o xor-y += xor-8regs-prefetch.o xor-y += xor-32regs-prefetch.o +ifeq ($(CONFIG_XOR_BLOCKS_ARCH),y) +CFLAGS_xor-core.o += -I$(src)/$(SRCARCH) +endif + xor-$(CONFIG_ALPHA) += alpha/xor.o xor-$(CONFIG_ARM) += arm/xor.o ifeq ($(CONFIG_ARM),y) diff --git a/lib/raid/xor/alpha/xor.c b/lib/raid/xor/alpha/xor.c index 0964ac420604..90694cc47395 100644 --- a/lib/raid/xor/alpha/xor.c +++ b/lib/raid/xor/alpha/xor.c @@ -2,8 +2,8 @@ /* * Optimized XOR parity functions for alpha EV5 and EV6 */ -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" extern void xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/arch/alpha/include/asm/xor.h b/lib/raid/xor/alpha/xor_arch.h similarity index 90% rename from arch/alpha/include/asm/xor.h rename to lib/raid/xor/alpha/xor_arch.h index e517be577a09..0dcfea578a48 100644 --- a/arch/alpha/include/asm/xor.h +++ b/lib/raid/xor/alpha/xor_arch.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include -#include extern struct xor_block_template xor_block_alpha; extern struct xor_block_template xor_block_alpha_prefetch; @@ -10,7 +9,6 @@ extern struct xor_block_template xor_block_alpha_prefetch; * Force the use of alpha_prefetch if EV6, as it is significantly faster in the * cold cache case. */ -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { if (implver() == IMPLVER_EV6) { diff --git a/lib/raid/xor/arm/xor-neon-glue.c b/lib/raid/xor/arm/xor-neon-glue.c index c7b162b383a2..7afd6294464b 100644 --- a/lib/raid/xor/arm/xor-neon-glue.c +++ b/lib/raid/xor/arm/xor-neon-glue.c @@ -2,8 +2,8 @@ /* * Copyright (C) 2001 Russell King */ -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" extern struct xor_block_template const xor_block_neon_inner; diff --git a/lib/raid/xor/arm/xor-neon.c b/lib/raid/xor/arm/xor-neon.c index c9d4378b0f0e..806a42c5952c 100644 --- a/lib/raid/xor/arm/xor-neon.c +++ b/lib/raid/xor/arm/xor-neon.c @@ -3,7 +3,7 @@ * Copyright (C) 2013 Linaro Ltd */ -#include +#include "xor_impl.h" #ifndef __ARM_NEON__ #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' diff --git a/lib/raid/xor/arm/xor.c b/lib/raid/xor/arm/xor.c index 2263341dbbcd..5bd5f048bbe9 100644 --- a/lib/raid/xor/arm/xor.c +++ b/lib/raid/xor/arm/xor.c @@ -2,8 +2,8 @@ /* * Copyright (C) 2001 Russell King */ -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" #define __XOR(a1, a2) a1 ^= a2 diff --git a/arch/arm/include/asm/xor.h b/lib/raid/xor/arm/xor_arch.h similarity index 87% rename from arch/arm/include/asm/xor.h rename to lib/raid/xor/arm/xor_arch.h index 989c55872ef6..5a7eedb48fbb 100644 --- a/arch/arm/include/asm/xor.h +++ b/lib/raid/xor/arm/xor_arch.h @@ -2,13 +2,11 @@ /* * Copyright (C) 2001 Russell King */ -#include #include extern struct xor_block_template xor_block_arm4regs; extern struct xor_block_template xor_block_neon; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_arm4regs); diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c index 08c3e3573388..3db0a318cf5b 100644 --- a/lib/raid/xor/arm64/xor-neon-glue.c +++ b/lib/raid/xor/arm64/xor-neon-glue.c @@ -4,9 +4,9 @@ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. */ -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #include "xor-neon.h" #define XOR_TEMPLATE(_name) \ diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c index 61194c292917..61f00c4fee49 100644 --- a/lib/raid/xor/arm64/xor-neon.c +++ b/lib/raid/xor/arm64/xor-neon.c @@ -4,10 +4,10 @@ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. */ -#include #include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #include "xor-neon.h" void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/arch/arm64/include/asm/xor.h b/lib/raid/xor/arm64/xor_arch.h similarity index 89% rename from arch/arm64/include/asm/xor.h rename to lib/raid/xor/arm64/xor_arch.h index 4782c760bcac..5dbb40319501 100644 --- a/arch/arm64/include/asm/xor.h +++ b/lib/raid/xor/arm64/xor_arch.h @@ -3,14 +3,11 @@ * Authors: Jackie Liu * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. */ - -#include #include extern struct xor_block_template xor_block_neon; extern struct xor_block_template xor_block_eor3; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_8regs); diff --git a/arch/loongarch/include/asm/xor.h b/lib/raid/xor/loongarch/xor_arch.h similarity index 85% rename from arch/loongarch/include/asm/xor.h rename to lib/raid/xor/loongarch/xor_arch.h index 7e32f72f8b03..fe5e8244fd0e 100644 --- a/arch/loongarch/include/asm/xor.h +++ b/lib/raid/xor/loongarch/xor_arch.h @@ -2,9 +2,6 @@ /* * Copyright (C) 2023 WANG Xuerui */ -#ifndef _ASM_LOONGARCH_XOR_H -#define _ASM_LOONGARCH_XOR_H - #include /* @@ -15,12 +12,10 @@ * the scalar ones, maybe for errata or micro-op reasons. It may be * appropriate to revisit this after one or two more uarch generations. */ -#include extern struct xor_block_template xor_block_lsx; extern struct xor_block_template xor_block_lasx; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_8regs); @@ -36,5 +31,3 @@ static __always_inline void __init arch_xor_init(void) xor_register(&xor_block_lasx); #endif } - -#endif /* _ASM_LOONGARCH_XOR_H */ diff --git a/lib/raid/xor/loongarch/xor_simd_glue.c b/lib/raid/xor/loongarch/xor_simd_glue.c index 11fa3b47ba83..b387aa0213b4 100644 --- a/lib/raid/xor/loongarch/xor_simd_glue.c +++ b/lib/raid/xor/loongarch/xor_simd_glue.c @@ -6,9 +6,9 @@ */ #include -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #include "xor_simd.h" #define MAKE_XOR_GLUE_2(flavor) \ diff --git a/arch/powerpc/include/asm/xor.h b/lib/raid/xor/powerpc/xor_arch.h similarity index 77% rename from arch/powerpc/include/asm/xor.h rename to lib/raid/xor/powerpc/xor_arch.h index 3293ac87181c..3b00a4a2fd67 100644 --- a/arch/powerpc/include/asm/xor.h +++ b/lib/raid/xor/powerpc/xor_arch.h @@ -5,15 +5,10 @@ * * Author: Anton Blanchard */ -#ifndef _ASM_POWERPC_XOR_H -#define _ASM_POWERPC_XOR_H - #include -#include extern struct xor_block_template xor_block_altivec; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_8regs); @@ -25,5 +20,3 @@ static __always_inline void __init arch_xor_init(void) xor_register(&xor_block_altivec); #endif } - -#endif /* _ASM_POWERPC_XOR_H */ diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c index c41e38340700..56e99ddfb64f 100644 --- a/lib/raid/xor/powerpc/xor_vmx_glue.c +++ b/lib/raid/xor/powerpc/xor_vmx_glue.c @@ -7,9 +7,9 @@ #include #include -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #include "xor_vmx.h" static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/riscv/xor-glue.c b/lib/raid/xor/riscv/xor-glue.c index 11666a4b6b68..060e5f22ebcc 100644 --- a/lib/raid/xor/riscv/xor-glue.c +++ b/lib/raid/xor/riscv/xor-glue.c @@ -3,11 +3,11 @@ * Copyright (C) 2021 SiFive */ -#include #include #include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, const unsigned long *__restrict p2) diff --git a/arch/riscv/include/asm/xor.h b/lib/raid/xor/riscv/xor_arch.h similarity index 84% rename from arch/riscv/include/asm/xor.h rename to lib/raid/xor/riscv/xor_arch.h index 614d9209d078..9240857d760b 100644 --- a/arch/riscv/include/asm/xor.h +++ b/lib/raid/xor/riscv/xor_arch.h @@ -3,11 +3,9 @@ * Copyright (C) 2021 SiFive */ #include -#include extern struct xor_block_template xor_block_rvv; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_8regs); diff --git a/lib/raid/xor/s390/xor.c b/lib/raid/xor/s390/xor.c index acbd268adfc8..c28cb56fec92 100644 --- a/lib/raid/xor/s390/xor.c +++ b/lib/raid/xor/s390/xor.c @@ -7,8 +7,8 @@ */ #include -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) diff --git a/arch/s390/include/asm/xor.h b/lib/raid/xor/s390/xor_arch.h similarity index 71% rename from arch/s390/include/asm/xor.h rename to lib/raid/xor/s390/xor_arch.h index 4e2233f64da9..4a233ed2b97a 100644 --- a/arch/s390/include/asm/xor.h +++ b/lib/raid/xor/s390/xor_arch.h @@ -5,15 +5,9 @@ * Copyright IBM Corp. 2016 * Author(s): Martin Schwidefsky */ -#ifndef _ASM_S390_XOR_H -#define _ASM_S390_XOR_H - extern struct xor_block_template xor_block_xc; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_force(&xor_block_xc); } - -#endif /* _ASM_S390_XOR_H */ diff --git a/lib/raid/xor/sparc/xor-sparc32.c b/lib/raid/xor/sparc/xor-sparc32.c index b65a75a6e59d..307c4a84f535 100644 --- a/lib/raid/xor/sparc/xor-sparc32.c +++ b/lib/raid/xor/sparc/xor-sparc32.c @@ -5,8 +5,8 @@ * * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" static void sparc_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/sparc/xor-sparc64-glue.c b/lib/raid/xor/sparc/xor-sparc64-glue.c index 3c67c8c3a0e8..5f90c2460b54 100644 --- a/lib/raid/xor/sparc/xor-sparc64-glue.c +++ b/lib/raid/xor/sparc/xor-sparc64-glue.c @@ -8,8 +8,8 @@ * Copyright (C) 2006 David S. Miller */ -#include -#include +#include "xor_impl.h" +#include "xor_arch.h" void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2); diff --git a/arch/sparc/include/asm/xor.h b/lib/raid/xor/sparc/xor_arch.h similarity index 81% rename from arch/sparc/include/asm/xor.h rename to lib/raid/xor/sparc/xor_arch.h index f923b009fc24..af288abe4e91 100644 --- a/arch/sparc/include/asm/xor.h +++ b/lib/raid/xor/sparc/xor_arch.h @@ -3,16 +3,12 @@ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) * Copyright (C) 2006 David S. Miller */ -#ifndef ___ASM_SPARC_XOR_H -#define ___ASM_SPARC_XOR_H - #if defined(__sparc__) && defined(__arch64__) #include extern struct xor_block_template xor_block_VIS; extern struct xor_block_template xor_block_niagara; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { /* Force VIS for everything except Niagara. */ @@ -28,12 +24,8 @@ static __always_inline void __init arch_xor_init(void) } #else /* sparc64 */ -/* For grins, also test the generic routines. */ -#include - extern struct xor_block_template xor_block_SPARC; -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_8regs); @@ -41,4 +33,3 @@ static __always_inline void __init arch_xor_init(void) xor_register(&xor_block_SPARC); } #endif /* !sparc64 */ -#endif /* ___ASM_SPARC_XOR_H */ diff --git a/lib/raid/xor/um/xor_arch.h b/lib/raid/xor/um/xor_arch.h new file mode 100644 index 000000000000..a33e57a26c5e --- /dev/null +++ b/lib/raid/xor/um/xor_arch.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <../x86/xor_arch.h> diff --git a/lib/raid/xor/x86/xor-avx.c b/lib/raid/xor/x86/xor-avx.c index b49cb5199e70..d411efa1ff43 100644 --- a/lib/raid/xor/x86/xor-avx.c +++ b/lib/raid/xor/x86/xor-avx.c @@ -8,9 +8,9 @@ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines */ #include -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #define BLOCK4(i) \ BLOCK(32 * i, 0) \ diff --git a/lib/raid/xor/x86/xor-mmx.c b/lib/raid/xor/x86/xor-mmx.c index cf0fafea33b7..e48c58f92874 100644 --- a/lib/raid/xor/x86/xor-mmx.c +++ b/lib/raid/xor/x86/xor-mmx.c @@ -4,9 +4,9 @@ * * Copyright (C) 1998 Ingo Molnar. */ -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" #define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" diff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c index 0e727ced8b00..5993ed688c15 100644 --- a/lib/raid/xor/x86/xor-sse.c +++ b/lib/raid/xor/x86/xor-sse.c @@ -12,9 +12,9 @@ * x86-64 changes / gcc fixes from Andi Kleen. * Copyright 2002 Andi Kleen, SuSE Labs. */ -#include #include -#include +#include "xor_impl.h" +#include "xor_arch.h" #ifdef CONFIG_X86_32 /* reduce register pressure */ diff --git a/arch/x86/include/asm/xor.h b/lib/raid/xor/x86/xor_arch.h similarity index 89% rename from arch/x86/include/asm/xor.h rename to lib/raid/xor/x86/xor_arch.h index d1aab8275908..99fe85a213c6 100644 --- a/arch/x86/include/asm/xor.h +++ b/lib/raid/xor/x86/xor_arch.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_X86_XOR_H -#define _ASM_X86_XOR_H - #include -#include extern struct xor_block_template xor_block_pII_mmx; extern struct xor_block_template xor_block_p5_mmx; @@ -20,7 +16,6 @@ extern struct xor_block_template xor_block_avx; * * 32-bit without MMX can fall back to the generic routines. */ -#define arch_xor_init arch_xor_init static __always_inline void __init arch_xor_init(void) { if (boot_cpu_has(X86_FEATURE_AVX) && @@ -39,5 +34,3 @@ static __always_inline void __init arch_xor_init(void) xor_register(&xor_block_32regs_p); } } - -#endif /* _ASM_X86_XOR_H */ diff --git a/lib/raid/xor/xor-32regs-prefetch.c b/lib/raid/xor/xor-32regs-prefetch.c index 8666c287f777..2856a8e50cb8 100644 --- a/lib/raid/xor/xor-32regs-prefetch.c +++ b/lib/raid/xor/xor-32regs-prefetch.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include +#include "xor_impl.h" static void xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/xor-32regs.c b/lib/raid/xor/xor-32regs.c index 58d4fac43eb4..cc44d64032fa 100644 --- a/lib/raid/xor/xor-32regs.c +++ b/lib/raid/xor/xor-32regs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include +#include "xor_impl.h" static void xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/xor-8regs-prefetch.c b/lib/raid/xor/xor-8regs-prefetch.c index 67061e35a0a6..1d53aec50d27 100644 --- a/lib/raid/xor/xor-8regs-prefetch.c +++ b/lib/raid/xor/xor-8regs-prefetch.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include +#include "xor_impl.h" static void xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/xor-8regs.c b/lib/raid/xor/xor-8regs.c index 769f796ab2cf..72a44e898c55 100644 --- a/lib/raid/xor/xor-8regs.c +++ b/lib/raid/xor/xor-8regs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include +#include "xor_impl.h" static void xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1, diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index 93608b5fece9..de1d2899490a 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -9,10 +9,9 @@ #include #include #include -#include #include #include -#include +#include "xor_impl.h" /* The xor routines to use. */ static struct xor_block_template *active_template; @@ -141,16 +140,21 @@ static int __init calibrate_xor_blocks(void) return 0; } -static int __init xor_init(void) -{ -#ifdef arch_xor_init - arch_xor_init(); +#ifdef CONFIG_XOR_BLOCKS_ARCH +#include "xor_arch.h" /* $SRCARCH/xor_arch.h */ #else +static void __init arch_xor_init(void) +{ xor_register(&xor_block_8regs); xor_register(&xor_block_8regs_p); xor_register(&xor_block_32regs); xor_register(&xor_block_32regs_p); -#endif +} +#endif /* CONFIG_XOR_BLOCKS_ARCH */ + +static int __init xor_init(void) +{ + arch_xor_init(); /* * If this arch/cpu has a short-circuited selection, don't loop through diff --git a/include/linux/raid/xor_impl.h b/lib/raid/xor/xor_impl.h similarity index 80% rename from include/linux/raid/xor_impl.h rename to lib/raid/xor/xor_impl.h index 6ed4c445ab24..44b6c99e2093 100644 --- a/include/linux/raid/xor_impl.h +++ b/lib/raid/xor/xor_impl.h @@ -24,6 +24,12 @@ struct xor_block_template { const unsigned long * __restrict); }; +/* generic implementations */ +extern struct xor_block_template xor_block_8regs; +extern struct xor_block_template xor_block_32regs; +extern struct xor_block_template xor_block_8regs_p; +extern struct xor_block_template xor_block_32regs_p; + void __init xor_register(struct xor_block_template *tmpl); void __init xor_force(struct xor_block_template *tmpl); From e420f0a88b24b80302f57965ceb7387aa3f12488 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:54 +0100 Subject: [PATCH 092/127] xor: add a better public API xor_blocks is very annoying to use, because it is limited to 4 + 1 sources / destinations, has an odd argument order and is completely undocumented. Lift the code that loops around it from btrfs and async_tx/async_xor into common code under the name xor_gen and properly document it. [hch@lst.de: make xor_blocks less annoying to use] Link: https://lkml.kernel.org/r/20260327061704.3707577-24-hch@lst.de Link: https://lkml.kernel.org/r/20260327061704.3707577-23-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/raid/xor.h | 2 ++ lib/raid/xor/xor-core.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 02bda8d99534..6d9a39fd85dd 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -7,4 +7,6 @@ extern void xor_blocks(unsigned int count, unsigned int bytes, void *dest, void **srcs); +void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes); + #endif /* _XOR_H */ diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index de1d2899490a..2e46b6b83b0a 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -46,6 +46,40 @@ xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) } EXPORT_SYMBOL(xor_blocks); +/** + * xor_gen - generate RAID-style XOR information + * @dest: destination vector + * @srcs: source vectors + * @src_cnt: number of source vectors + * @bytes: length in bytes of each vector + * + * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors + * in @srcs for a length of @bytes bytes. @src_cnt must be non-zero, and the + * memory pointed to by @dest and each member of @srcs must be at least 64-byte + * aligned. @bytes must be non-zero and a multiple of 512. + * + * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with + * the first disk, which then needs to be removed from @srcs. + */ +void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) +{ + unsigned int src_off = 0; + + WARN_ON_ONCE(in_interrupt()); + WARN_ON_ONCE(bytes == 0); + WARN_ON_ONCE(bytes & 511); + + while (src_cnt > 0) { + unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS); + + xor_blocks(this_cnt, bytes, dest, srcs + src_off); + + src_cnt -= this_cnt; + src_off += this_cnt; + } +} +EXPORT_SYMBOL(xor_gen); + /* Set of all registered templates. */ static struct xor_block_template *__initdata template_list; static bool __initdata xor_forced = false; From 7c12c32b9f73f64bbdb71abdbd6524b9489063dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:56 +0100 Subject: [PATCH 093/127] async_xor: use xor_gen Replace use of the loop around xor_blocks with the easier to use xor_gen API. Link: https://lkml.kernel.org/r/20260327061704.3707577-25-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- crypto/async_tx/async_xor.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 2c499654a36c..84458375b202 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -103,7 +103,6 @@ do_sync_xor_offs(struct page *dest, unsigned int offset, { int i; int xor_src_cnt = 0; - int src_off = 0; void *dest_buf; void **srcs; @@ -117,23 +116,12 @@ do_sync_xor_offs(struct page *dest, unsigned int offset, if (src_list[i]) srcs[xor_src_cnt++] = page_address(src_list[i]) + (src_offs ? src_offs[i] : offset); - src_cnt = xor_src_cnt; + /* set destination address */ dest_buf = page_address(dest) + offset; - if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); - - while (src_cnt > 0) { - /* process up to 'MAX_XOR_BLOCKS' sources */ - xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); - xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]); - - /* drop completed sources */ - src_cnt -= xor_src_cnt; - src_off += xor_src_cnt; - } - + xor_gen(dest_buf, srcs, xor_src_cnt, len); async_tx_sync_epilog(submit); } @@ -168,11 +156,10 @@ dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset, * * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST * - * xor_blocks always uses the dest as a source so the - * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in - * the calculation. The assumption with dma engines is that they only - * use the destination buffer as a source when it is explicitly specified - * in the source list. + * xor_gen always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST flag + * must be set to not include dest data in the calculation. The assumption with + * dma engines is that they only use the destination buffer as a source when it + * is explicitly specified in the source list. * * src_list note: if the dest is also a source it must be at index zero. * The contents of this array will be overwritten if a scribble region @@ -259,11 +246,10 @@ EXPORT_SYMBOL_GPL(async_xor_offs); * * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST * - * xor_blocks always uses the dest as a source so the - * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in - * the calculation. The assumption with dma engines is that they only - * use the destination buffer as a source when it is explicitly specified - * in the source list. + * xor_gen always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST flag + * must be set to not include dest data in the calculation. The assumption with + * dma engines is that they only use the destination buffer as a source when it + * is explicitly specified in the source list. * * src_list note: if the dest is also a source it must be at index zero. * The contents of this array will be overwritten if a scribble region From 0f629e7283ad50b2efe4451914ddf06c89de463e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:57 +0100 Subject: [PATCH 094/127] btrfs: use xor_gen Use the new xor_gen helper instead of open coding the loop around xor_blocks. This helper is very similar to the existing run_xor helper in btrfs, except that the destination buffer is passed explicitly. Link: https://lkml.kernel.org/r/20260327061704.3707577-26-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: David Sterba Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/btrfs/raid56.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 02105d68accb..ad091c56ce77 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -617,26 +617,6 @@ static void cache_rbio(struct btrfs_raid_bio *rbio) spin_unlock(&table->cache_lock); } -/* - * helper function to run the xor_blocks api. It is only - * able to do MAX_XOR_BLOCKS at a time, so we need to - * loop through. - */ -static void run_xor(void **pages, int src_cnt, ssize_t len) -{ - int src_off = 0; - int xor_src_cnt = 0; - void *dest = pages[src_cnt]; - - while(src_cnt > 0) { - xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); - xor_blocks(xor_src_cnt, len, dest, pages + src_off); - - src_cnt -= xor_src_cnt; - src_off += xor_src_cnt; - } -} - /* * Returns true if the bio list inside this rbio covers an entire stripe (no * rmw required). @@ -1434,7 +1414,8 @@ static void generate_pq_vertical_step(struct btrfs_raid_bio *rbio, unsigned int } else { /* raid5 */ memcpy(pointers[rbio->nr_data], pointers[0], step); - run_xor(pointers + 1, rbio->nr_data - 1, step); + xor_gen(pointers[rbio->nr_data], pointers + 1, rbio->nr_data - 1, + step); } for (stripe = stripe - 1; stripe >= 0; stripe--) kunmap_local(pointers[stripe]); @@ -2034,7 +2015,7 @@ pstripe: pointers[rbio->nr_data - 1] = p; /* Xor in the rest */ - run_xor(pointers, rbio->nr_data - 1, step); + xor_gen(p, pointers, rbio->nr_data - 1, step); } cleanup: @@ -2672,7 +2653,7 @@ static bool verify_one_parity_step(struct btrfs_raid_bio *rbio, } else { /* RAID5. */ memcpy(pointers[nr_data], pointers[0], step); - run_xor(pointers + 1, nr_data - 1, step); + xor_gen(pointers[nr_data], pointers + 1, nr_data - 1, step); } /* Check scrubbing parity and repair it. */ From 80dcf0a7832a5acde0f0701a4dc7b586fc8bcc88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:58 +0100 Subject: [PATCH 095/127] xor: pass the entire operation to the low-level ops Currently the high-level xor code chunks up all operations into small units for only up to 1 + 4 vectors, and passes it to four different methods. This means the FPU/vector context is entered and left a lot for wide stripes, and a lot of indirect expensive indirect calls are performed. Switch to passing the entire gen_xor request to the low-level ops, and provide a macro to dispatch it to the existing helper. This reduce the number of indirect calls and FPU/vector context switches by a factor approaching nr_stripes / 4, and also reduces source and binary code size. Link: https://lkml.kernel.org/r/20260327061704.3707577-27-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/raid/xor.h | 5 -- lib/raid/xor/alpha/xor.c | 19 ++++---- lib/raid/xor/arm/xor-neon-glue.c | 49 ++------------------ lib/raid/xor/arm/xor-neon.c | 9 +--- lib/raid/xor/arm/xor.c | 10 ++-- lib/raid/xor/arm/xor_arch.h | 3 ++ lib/raid/xor/arm64/xor-neon-glue.c | 44 ++---------------- lib/raid/xor/arm64/xor-neon.c | 20 +++++--- lib/raid/xor/arm64/xor-neon.h | 32 ++----------- lib/raid/xor/loongarch/xor_simd_glue.c | 62 +++++-------------------- lib/raid/xor/powerpc/xor_vmx.c | 40 ++++++++-------- lib/raid/xor/powerpc/xor_vmx.h | 16 +------ lib/raid/xor/powerpc/xor_vmx_glue.c | 49 ++------------------ lib/raid/xor/riscv/xor-glue.c | 43 +++-------------- lib/raid/xor/s390/xor.c | 9 ++-- lib/raid/xor/sparc/xor-sparc32.c | 9 ++-- lib/raid/xor/sparc/xor-sparc64-glue.c | 19 ++++---- lib/raid/xor/x86/xor-avx.c | 29 ++++-------- lib/raid/xor/x86/xor-mmx.c | 64 ++++++++++---------------- lib/raid/xor/x86/xor-sse.c | 63 +++++++++---------------- lib/raid/xor/xor-32regs-prefetch.c | 10 ++-- lib/raid/xor/xor-32regs.c | 9 ++-- lib/raid/xor/xor-8regs-prefetch.c | 11 +++-- lib/raid/xor/xor-8regs.c | 9 ++-- lib/raid/xor/xor-core.c | 48 ++----------------- lib/raid/xor/xor_impl.h | 48 +++++++++++++------ 26 files changed, 224 insertions(+), 505 deletions(-) diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 6d9a39fd85dd..870558c9d36e 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -2,11 +2,6 @@ #ifndef _XOR_H #define _XOR_H -#define MAX_XOR_BLOCKS 4 - -extern void xor_blocks(unsigned int count, unsigned int bytes, - void *dest, void **srcs); - void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes); #endif /* _XOR_H */ diff --git a/lib/raid/xor/alpha/xor.c b/lib/raid/xor/alpha/xor.c index 90694cc47395..a8f72f2dd3a5 100644 --- a/lib/raid/xor/alpha/xor.c +++ b/lib/raid/xor/alpha/xor.c @@ -832,18 +832,17 @@ xor_alpha_prefetch_5: \n\ .end xor_alpha_prefetch_5 \n\ "); +DO_XOR_BLOCKS(alpha, xor_alpha_2, xor_alpha_3, xor_alpha_4, xor_alpha_5); + struct xor_block_template xor_block_alpha = { - .name = "alpha", - .do_2 = xor_alpha_2, - .do_3 = xor_alpha_3, - .do_4 = xor_alpha_4, - .do_5 = xor_alpha_5, + .name = "alpha", + .xor_gen = xor_gen_alpha, }; +DO_XOR_BLOCKS(alpha_prefetch, xor_alpha_prefetch_2, xor_alpha_prefetch_3, + xor_alpha_prefetch_4, xor_alpha_prefetch_5); + struct xor_block_template xor_block_alpha_prefetch = { - .name = "alpha prefetch", - .do_2 = xor_alpha_prefetch_2, - .do_3 = xor_alpha_prefetch_3, - .do_4 = xor_alpha_prefetch_4, - .do_5 = xor_alpha_prefetch_5, + .name = "alpha prefetch", + .xor_gen = xor_gen_alpha_prefetch, }; diff --git a/lib/raid/xor/arm/xor-neon-glue.c b/lib/raid/xor/arm/xor-neon-glue.c index 7afd6294464b..cea39e019904 100644 --- a/lib/raid/xor/arm/xor-neon-glue.c +++ b/lib/raid/xor/arm/xor-neon-glue.c @@ -5,54 +5,15 @@ #include "xor_impl.h" #include "xor_arch.h" -extern struct xor_block_template const xor_block_neon_inner; - -static void -xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) +static void xor_gen_neon(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) { kernel_neon_begin(); - xor_block_neon_inner.do_2(bytes, p1, p2); - kernel_neon_end(); -} - -static void -xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_3(bytes, p1, p2, p3); - kernel_neon_end(); -} - -static void -xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); - kernel_neon_end(); -} - -static void -xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - kernel_neon_begin(); - xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); + xor_gen_neon_inner(dest, srcs, src_cnt, bytes); kernel_neon_end(); } struct xor_block_template xor_block_neon = { - .name = "neon", - .do_2 = xor_neon_2, - .do_3 = xor_neon_3, - .do_4 = xor_neon_4, - .do_5 = xor_neon_5 + .name = "neon", + .xor_gen = xor_gen_neon, }; diff --git a/lib/raid/xor/arm/xor-neon.c b/lib/raid/xor/arm/xor-neon.c index 806a42c5952c..23147e3a7904 100644 --- a/lib/raid/xor/arm/xor-neon.c +++ b/lib/raid/xor/arm/xor-neon.c @@ -4,6 +4,7 @@ */ #include "xor_impl.h" +#include "xor_arch.h" #ifndef __ARM_NEON__ #error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' @@ -22,10 +23,4 @@ #define NO_TEMPLATE #include "../xor-8regs.c" -struct xor_block_template const xor_block_neon_inner = { - .name = "__inner_neon__", - .do_2 = xor_8regs_2, - .do_3 = xor_8regs_3, - .do_4 = xor_8regs_4, - .do_5 = xor_8regs_5, -}; +__DO_XOR_BLOCKS(neon_inner, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5); diff --git a/lib/raid/xor/arm/xor.c b/lib/raid/xor/arm/xor.c index 5bd5f048bbe9..45139b6c55ea 100644 --- a/lib/raid/xor/arm/xor.c +++ b/lib/raid/xor/arm/xor.c @@ -127,10 +127,10 @@ xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines); } +DO_XOR_BLOCKS(arm4regs, xor_arm4regs_2, xor_arm4regs_3, xor_arm4regs_4, + xor_arm4regs_5); + struct xor_block_template xor_block_arm4regs = { - .name = "arm4regs", - .do_2 = xor_arm4regs_2, - .do_3 = xor_arm4regs_3, - .do_4 = xor_arm4regs_4, - .do_5 = xor_arm4regs_5, + .name = "arm4regs", + .xor_gen = xor_gen_arm4regs, }; diff --git a/lib/raid/xor/arm/xor_arch.h b/lib/raid/xor/arm/xor_arch.h index 5a7eedb48fbb..775ff835df65 100644 --- a/lib/raid/xor/arm/xor_arch.h +++ b/lib/raid/xor/arm/xor_arch.h @@ -7,6 +7,9 @@ extern struct xor_block_template xor_block_arm4regs; extern struct xor_block_template xor_block_neon; +void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes); + static __always_inline void __init arch_xor_init(void) { xor_register(&xor_block_arm4regs); diff --git a/lib/raid/xor/arm64/xor-neon-glue.c b/lib/raid/xor/arm64/xor-neon-glue.c index 3db0a318cf5b..f0284f86feb4 100644 --- a/lib/raid/xor/arm64/xor-neon-glue.c +++ b/lib/raid/xor/arm64/xor-neon-glue.c @@ -10,50 +10,16 @@ #include "xor-neon.h" #define XOR_TEMPLATE(_name) \ -static void \ -xor_##_name##_2(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2) \ +static void xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \ + unsigned int bytes) \ { \ scoped_ksimd() \ - __xor_##_name##_2(bytes, p1, p2); \ -} \ - \ -static void \ -xor_##_name##_3(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3) \ -{ \ - scoped_ksimd() \ - __xor_##_name##_3(bytes, p1, p2, p3); \ -} \ - \ -static void \ -xor_##_name##_4(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4) \ -{ \ - scoped_ksimd() \ - __xor_##_name##_4(bytes, p1, p2, p3, p4); \ -} \ - \ -static void \ -xor_##_name##_5(unsigned long bytes, unsigned long * __restrict p1, \ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4, \ - const unsigned long * __restrict p5) \ -{ \ - scoped_ksimd() \ - __xor_##_name##_5(bytes, p1, p2, p3, p4, p5); \ + xor_gen_##_name##_inner(dest, srcs, src_cnt, bytes); \ } \ \ struct xor_block_template xor_block_##_name = { \ - .name = __stringify(_name), \ - .do_2 = xor_##_name##_2, \ - .do_3 = xor_##_name##_3, \ - .do_4 = xor_##_name##_4, \ - .do_5 = xor_##_name##_5 \ + .name = __stringify(_name), \ + .xor_gen = xor_gen_##_name, \ }; XOR_TEMPLATE(neon); diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c index 61f00c4fee49..97ef3cb92496 100644 --- a/lib/raid/xor/arm64/xor-neon.c +++ b/lib/raid/xor/arm64/xor-neon.c @@ -10,7 +10,7 @@ #include "xor_arch.h" #include "xor-neon.h" -void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; @@ -37,7 +37,7 @@ void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { @@ -73,7 +73,7 @@ void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4) @@ -118,7 +118,7 @@ void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4, @@ -172,6 +172,9 @@ void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } +__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4, + __xor_neon_5); + static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) { uint64x2_t res; @@ -182,7 +185,7 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) return res; } -void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { @@ -216,7 +219,7 @@ void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4) @@ -259,7 +262,7 @@ void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, +static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4, @@ -304,3 +307,6 @@ void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, dp5 += 8; } while (--lines > 0); } + +__DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4, + __xor_eor3_5); diff --git a/lib/raid/xor/arm64/xor-neon.h b/lib/raid/xor/arm64/xor-neon.h index cec0ac846fea..514699ba8f5f 100644 --- a/lib/raid/xor/arm64/xor-neon.h +++ b/lib/raid/xor/arm64/xor-neon.h @@ -1,30 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -#define __xor_eor3_2 __xor_neon_2 -void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); +void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes); +void xor_gen_eor3_inner(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes); diff --git a/lib/raid/xor/loongarch/xor_simd_glue.c b/lib/raid/xor/loongarch/xor_simd_glue.c index b387aa0213b4..7f324d924f87 100644 --- a/lib/raid/xor/loongarch/xor_simd_glue.c +++ b/lib/raid/xor/loongarch/xor_simd_glue.c @@ -11,63 +11,23 @@ #include "xor_arch.h" #include "xor_simd.h" -#define MAKE_XOR_GLUE_2(flavor) \ -static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\ - const unsigned long * __restrict p2) \ +#define MAKE_XOR_GLUES(flavor) \ +DO_XOR_BLOCKS(flavor##_inner, __xor_##flavor##_2, __xor_##flavor##_3, \ + __xor_##flavor##_4, __xor_##flavor##_5); \ + \ +static void xor_gen_##flavor(void *dest, void **srcs, unsigned int src_cnt, \ + unsigned int bytes) \ { \ kernel_fpu_begin(); \ - __xor_##flavor##_2(bytes, p1, p2); \ + xor_gen_##flavor##_inner(dest, srcs, src_cnt, bytes); \ kernel_fpu_end(); \ } \ - -#define MAKE_XOR_GLUE_3(flavor) \ -static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_3(bytes, p1, p2, p3); \ - kernel_fpu_end(); \ -} \ - -#define MAKE_XOR_GLUE_4(flavor) \ -static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ - kernel_fpu_end(); \ -} \ - -#define MAKE_XOR_GLUE_5(flavor) \ -static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\ - const unsigned long * __restrict p2, \ - const unsigned long * __restrict p3, \ - const unsigned long * __restrict p4, \ - const unsigned long * __restrict p5) \ -{ \ - kernel_fpu_begin(); \ - __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ - kernel_fpu_end(); \ -} \ - -#define MAKE_XOR_GLUES(flavor) \ - MAKE_XOR_GLUE_2(flavor); \ - MAKE_XOR_GLUE_3(flavor); \ - MAKE_XOR_GLUE_4(flavor); \ - MAKE_XOR_GLUE_5(flavor); \ - \ -struct xor_block_template xor_block_##flavor = { \ - .name = __stringify(flavor), \ - .do_2 = xor_##flavor##_2, \ - .do_3 = xor_##flavor##_3, \ - .do_4 = xor_##flavor##_4, \ - .do_5 = xor_##flavor##_5, \ + \ +struct xor_block_template xor_block_##flavor = { \ + .name = __stringify(flavor), \ + .xor_gen = xor_gen_##flavor \ } - #ifdef CONFIG_CPU_HAS_LSX MAKE_XOR_GLUES(lsx); #endif /* CONFIG_CPU_HAS_LSX */ diff --git a/lib/raid/xor/powerpc/xor_vmx.c b/lib/raid/xor/powerpc/xor_vmx.c index aab49d056d18..09bed98c1bc7 100644 --- a/lib/raid/xor/powerpc/xor_vmx.c +++ b/lib/raid/xor/powerpc/xor_vmx.c @@ -10,6 +10,7 @@ * Sparse (as at v0.5.0) gets very, very confused by this file. * Make it a bit simpler for it. */ +#include "xor_impl.h" #if !defined(__CHECKER__) #include #else @@ -49,9 +50,9 @@ typedef vector signed char unative_t; V1##_3 = vec_xor(V1##_3, V2##_3); \ } while (0) -void __xor_altivec_2(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in) +static void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) { DEFINE(v1); DEFINE(v2); @@ -68,10 +69,10 @@ void __xor_altivec_2(unsigned long bytes, } while (--lines > 0); } -void __xor_altivec_3(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in) +static void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) { DEFINE(v1); DEFINE(v2); @@ -92,11 +93,11 @@ void __xor_altivec_3(unsigned long bytes, } while (--lines > 0); } -void __xor_altivec_4(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in, - const unsigned long * __restrict v4_in) +static void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) { DEFINE(v1); DEFINE(v2); @@ -121,12 +122,12 @@ void __xor_altivec_4(unsigned long bytes, } while (--lines > 0); } -void __xor_altivec_5(unsigned long bytes, - unsigned long * __restrict v1_in, - const unsigned long * __restrict v2_in, - const unsigned long * __restrict v3_in, - const unsigned long * __restrict v4_in, - const unsigned long * __restrict v5_in) +static void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) { DEFINE(v1); DEFINE(v2); @@ -154,3 +155,6 @@ void __xor_altivec_5(unsigned long bytes, v5 += 4; } while (--lines > 0); } + +__DO_XOR_BLOCKS(altivec_inner, __xor_altivec_2, __xor_altivec_3, + __xor_altivec_4, __xor_altivec_5); diff --git a/lib/raid/xor/powerpc/xor_vmx.h b/lib/raid/xor/powerpc/xor_vmx.h index 573c41d90dac..1d26c1133a86 100644 --- a/lib/raid/xor/powerpc/xor_vmx.h +++ b/lib/raid/xor/powerpc/xor_vmx.h @@ -6,17 +6,5 @@ * outside of the enable/disable altivec block. */ -void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); +void xor_gen_altivec_inner(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes); diff --git a/lib/raid/xor/powerpc/xor_vmx_glue.c b/lib/raid/xor/powerpc/xor_vmx_glue.c index 56e99ddfb64f..dbfbb5cadc36 100644 --- a/lib/raid/xor/powerpc/xor_vmx_glue.c +++ b/lib/raid/xor/powerpc/xor_vmx_glue.c @@ -12,56 +12,17 @@ #include "xor_arch.h" #include "xor_vmx.h" -static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2) +static void xor_gen_altivec(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_2(bytes, p1, p2); - disable_kernel_altivec(); - preempt_enable(); -} - -static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_3(bytes, p1, p2, p3); - disable_kernel_altivec(); - preempt_enable(); -} - -static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_4(bytes, p1, p2, p3, p4); - disable_kernel_altivec(); - preempt_enable(); -} - -static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5) -{ - preempt_disable(); - enable_kernel_altivec(); - __xor_altivec_5(bytes, p1, p2, p3, p4, p5); + xor_gen_altivec_inner(dest, srcs, src_cnt, bytes); disable_kernel_altivec(); preempt_enable(); } struct xor_block_template xor_block_altivec = { - .name = "altivec", - .do_2 = xor_altivec_2, - .do_3 = xor_altivec_3, - .do_4 = xor_altivec_4, - .do_5 = xor_altivec_5, + .name = "altivec", + .xor_gen = xor_gen_altivec, }; diff --git a/lib/raid/xor/riscv/xor-glue.c b/lib/raid/xor/riscv/xor-glue.c index 060e5f22ebcc..2e4c1b05d998 100644 --- a/lib/raid/xor/riscv/xor-glue.c +++ b/lib/raid/xor/riscv/xor-glue.c @@ -9,48 +9,17 @@ #include "xor_impl.h" #include "xor_arch.h" -static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2) -{ - kernel_vector_begin(); - xor_regs_2_(bytes, p1, p2); - kernel_vector_end(); -} +DO_XOR_BLOCKS(vector_inner, xor_regs_2_, xor_regs_3_, xor_regs_4_, xor_regs_5_); -static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3) +static void xor_gen_vector(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) { kernel_vector_begin(); - xor_regs_3_(bytes, p1, p2, p3); - kernel_vector_end(); -} - -static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3, - const unsigned long *__restrict p4) -{ - kernel_vector_begin(); - xor_regs_4_(bytes, p1, p2, p3, p4); - kernel_vector_end(); -} - -static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1, - const unsigned long *__restrict p2, - const unsigned long *__restrict p3, - const unsigned long *__restrict p4, - const unsigned long *__restrict p5) -{ - kernel_vector_begin(); - xor_regs_5_(bytes, p1, p2, p3, p4, p5); + xor_gen_vector_inner(dest, srcs, src_cnt, bytes); kernel_vector_end(); } struct xor_block_template xor_block_rvv = { - .name = "rvv", - .do_2 = xor_vector_2, - .do_3 = xor_vector_3, - .do_4 = xor_vector_4, - .do_5 = xor_vector_5 + .name = "rvv", + .xor_gen = xor_gen_vector, }; diff --git a/lib/raid/xor/s390/xor.c b/lib/raid/xor/s390/xor.c index c28cb56fec92..0c478678a129 100644 --- a/lib/raid/xor/s390/xor.c +++ b/lib/raid/xor/s390/xor.c @@ -125,10 +125,9 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, : : "0", "cc", "memory"); } +DO_XOR_BLOCKS(xc, xor_xc_2, xor_xc_3, xor_xc_4, xor_xc_5); + struct xor_block_template xor_block_xc = { - .name = "xc", - .do_2 = xor_xc_2, - .do_3 = xor_xc_3, - .do_4 = xor_xc_4, - .do_5 = xor_xc_5, + .name = "xc", + .xor_gen = xor_gen_xc, }; diff --git a/lib/raid/xor/sparc/xor-sparc32.c b/lib/raid/xor/sparc/xor-sparc32.c index 307c4a84f535..fb37631e90e6 100644 --- a/lib/raid/xor/sparc/xor-sparc32.c +++ b/lib/raid/xor/sparc/xor-sparc32.c @@ -244,10 +244,9 @@ sparc_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } +DO_XOR_BLOCKS(sparc32, sparc_2, sparc_3, sparc_4, sparc_5); + struct xor_block_template xor_block_SPARC = { - .name = "SPARC", - .do_2 = sparc_2, - .do_3 = sparc_3, - .do_4 = sparc_4, - .do_5 = sparc_5, + .name = "SPARC", + .xor_gen = xor_gen_sparc32, }; diff --git a/lib/raid/xor/sparc/xor-sparc64-glue.c b/lib/raid/xor/sparc/xor-sparc64-glue.c index 5f90c2460b54..a8a686e0d258 100644 --- a/lib/raid/xor/sparc/xor-sparc64-glue.c +++ b/lib/raid/xor/sparc/xor-sparc64-glue.c @@ -28,12 +28,11 @@ void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1, /* XXX Ugh, write cheetah versions... -DaveM */ +DO_XOR_BLOCKS(vis, xor_vis_2, xor_vis_3, xor_vis_4, xor_vis_5); + struct xor_block_template xor_block_VIS = { - .name = "VIS", - .do_2 = xor_vis_2, - .do_3 = xor_vis_3, - .do_4 = xor_vis_4, - .do_5 = xor_vis_5, + .name = "VIS", + .xor_gen = xor_gen_vis, }; void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1, @@ -51,10 +50,10 @@ void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4, const unsigned long * __restrict p5); +DO_XOR_BLOCKS(niagara, xor_niagara_2, xor_niagara_3, xor_niagara_4, + xor_niagara_5); + struct xor_block_template xor_block_niagara = { - .name = "Niagara", - .do_2 = xor_niagara_2, - .do_3 = xor_niagara_3, - .do_4 = xor_niagara_4, - .do_5 = xor_niagara_5, + .name = "Niagara", + .xor_gen = xor_gen_niagara, }; diff --git a/lib/raid/xor/x86/xor-avx.c b/lib/raid/xor/x86/xor-avx.c index d411efa1ff43..f7777d7aa269 100644 --- a/lib/raid/xor/x86/xor-avx.c +++ b/lib/raid/xor/x86/xor-avx.c @@ -29,8 +29,6 @@ static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0, { unsigned long lines = bytes >> 9; - kernel_fpu_begin(); - while (lines--) { #undef BLOCK #define BLOCK(i, reg) \ @@ -47,8 +45,6 @@ do { \ p0 = (unsigned long *)((uintptr_t)p0 + 512); p1 = (unsigned long *)((uintptr_t)p1 + 512); } - - kernel_fpu_end(); } static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, @@ -57,8 +53,6 @@ static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, { unsigned long lines = bytes >> 9; - kernel_fpu_begin(); - while (lines--) { #undef BLOCK #define BLOCK(i, reg) \ @@ -78,8 +72,6 @@ do { \ p1 = (unsigned long *)((uintptr_t)p1 + 512); p2 = (unsigned long *)((uintptr_t)p2 + 512); } - - kernel_fpu_end(); } static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, @@ -89,8 +81,6 @@ static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, { unsigned long lines = bytes >> 9; - kernel_fpu_begin(); - while (lines--) { #undef BLOCK #define BLOCK(i, reg) \ @@ -113,8 +103,6 @@ do { \ p2 = (unsigned long *)((uintptr_t)p2 + 512); p3 = (unsigned long *)((uintptr_t)p3 + 512); } - - kernel_fpu_end(); } static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, @@ -125,8 +113,6 @@ static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, { unsigned long lines = bytes >> 9; - kernel_fpu_begin(); - while (lines--) { #undef BLOCK #define BLOCK(i, reg) \ @@ -152,14 +138,19 @@ do { \ p3 = (unsigned long *)((uintptr_t)p3 + 512); p4 = (unsigned long *)((uintptr_t)p4 + 512); } +} +DO_XOR_BLOCKS(avx_inner, xor_avx_2, xor_avx_3, xor_avx_4, xor_avx_5); + +static void xor_gen_avx(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + kernel_fpu_begin(); + xor_gen_avx_inner(dest, srcs, src_cnt, bytes); kernel_fpu_end(); } struct xor_block_template xor_block_avx = { - .name = "avx", - .do_2 = xor_avx_2, - .do_3 = xor_avx_3, - .do_4 = xor_avx_4, - .do_5 = xor_avx_5, + .name = "avx", + .xor_gen = xor_gen_avx, }; diff --git a/lib/raid/xor/x86/xor-mmx.c b/lib/raid/xor/x86/xor-mmx.c index e48c58f92874..63a8b0444fce 100644 --- a/lib/raid/xor/x86/xor-mmx.c +++ b/lib/raid/xor/x86/xor-mmx.c @@ -21,8 +21,6 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 7; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -55,8 +53,6 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2) : : "memory"); - - kernel_fpu_end(); } static void @@ -66,8 +62,6 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 7; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -105,8 +99,6 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2), "+r" (p3) : : "memory"); - - kernel_fpu_end(); } static void @@ -117,8 +109,6 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 7; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -161,8 +151,6 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) : : "memory"); - - kernel_fpu_end(); } @@ -175,8 +163,6 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 7; - kernel_fpu_begin(); - /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a register that is shared with any other variable. That's @@ -237,8 +223,6 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, Clobber them just to be sure nobody does something stupid like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); - - kernel_fpu_end(); } #undef LD @@ -255,8 +239,6 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 6; - kernel_fpu_begin(); - asm volatile( " .align 32 ;\n" " 1: ;\n" @@ -293,8 +275,6 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2) : : "memory"); - - kernel_fpu_end(); } static void @@ -304,8 +284,6 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 6; - kernel_fpu_begin(); - asm volatile( " .align 32,0x90 ;\n" " 1: ;\n" @@ -351,8 +329,6 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2), "+r" (p3) : : "memory" ); - - kernel_fpu_end(); } static void @@ -363,8 +339,6 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 6; - kernel_fpu_begin(); - asm volatile( " .align 32,0x90 ;\n" " 1: ;\n" @@ -419,8 +393,6 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) : : "memory"); - - kernel_fpu_end(); } static void @@ -432,8 +404,6 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 6; - kernel_fpu_begin(); - /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a register that is shared with any other variable. That's @@ -510,22 +480,36 @@ xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, Clobber them just to be sure nobody does something stupid like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); +} +DO_XOR_BLOCKS(pII_mmx_inner, xor_pII_mmx_2, xor_pII_mmx_3, xor_pII_mmx_4, + xor_pII_mmx_5); + +static void xor_gen_pII_mmx(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + kernel_fpu_begin(); + xor_gen_pII_mmx_inner(dest, srcs, src_cnt, bytes); kernel_fpu_end(); } struct xor_block_template xor_block_pII_mmx = { - .name = "pII_mmx", - .do_2 = xor_pII_mmx_2, - .do_3 = xor_pII_mmx_3, - .do_4 = xor_pII_mmx_4, - .do_5 = xor_pII_mmx_5, + .name = "pII_mmx", + .xor_gen = xor_gen_pII_mmx, }; +DO_XOR_BLOCKS(p5_mmx_inner, xor_p5_mmx_2, xor_p5_mmx_3, xor_p5_mmx_4, + xor_p5_mmx_5); + +static void xor_gen_p5_mmx(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + kernel_fpu_begin(); + xor_gen_p5_mmx_inner(dest, srcs, src_cnt, bytes); + kernel_fpu_end(); +} + struct xor_block_template xor_block_p5_mmx = { - .name = "p5_mmx", - .do_2 = xor_p5_mmx_2, - .do_3 = xor_p5_mmx_3, - .do_4 = xor_p5_mmx_4, - .do_5 = xor_p5_mmx_5, + .name = "p5_mmx", + .xor_gen = xor_gen_p5_mmx, }; diff --git a/lib/raid/xor/x86/xor-sse.c b/lib/raid/xor/x86/xor-sse.c index 5993ed688c15..c6626ecae6ba 100644 --- a/lib/raid/xor/x86/xor-sse.c +++ b/lib/raid/xor/x86/xor-sse.c @@ -51,8 +51,6 @@ xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -93,8 +91,6 @@ xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, [p1] "+r" (p1), [p2] "+r" (p2) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -103,8 +99,6 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -128,8 +122,6 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, [p1] "+r" (p1), [p2] "+r" (p2) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -139,8 +131,6 @@ xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -188,8 +178,6 @@ xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -199,8 +187,6 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -226,8 +212,6 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -238,8 +222,6 @@ xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -294,8 +276,6 @@ xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -306,8 +286,6 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -335,8 +313,6 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -348,8 +324,6 @@ xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -411,8 +385,6 @@ xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); - - kernel_fpu_end(); } static void @@ -424,8 +396,6 @@ xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, { unsigned long lines = bytes >> 8; - kernel_fpu_begin(); - asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -455,22 +425,35 @@ xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) : [inc] XOR_CONSTANT_CONSTRAINT (256UL) : "memory"); +} +DO_XOR_BLOCKS(sse_inner, xor_sse_2, xor_sse_3, xor_sse_4, xor_sse_5); + +static void xor_gen_sse(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + kernel_fpu_begin(); + xor_gen_sse_inner(dest, srcs, src_cnt, bytes); kernel_fpu_end(); } struct xor_block_template xor_block_sse = { - .name = "sse", - .do_2 = xor_sse_2, - .do_3 = xor_sse_3, - .do_4 = xor_sse_4, - .do_5 = xor_sse_5, + .name = "sse", + .xor_gen = xor_gen_sse, }; +DO_XOR_BLOCKS(sse_pf64_inner, xor_sse_2_pf64, xor_sse_3_pf64, xor_sse_4_pf64, + xor_sse_5_pf64); + +static void xor_gen_sse_pf64(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + kernel_fpu_begin(); + xor_gen_sse_pf64_inner(dest, srcs, src_cnt, bytes); + kernel_fpu_end(); +} + struct xor_block_template xor_block_sse_pf64 = { - .name = "prefetch64-sse", - .do_2 = xor_sse_2_pf64, - .do_3 = xor_sse_3_pf64, - .do_4 = xor_sse_4_pf64, - .do_5 = xor_sse_5_pf64, + .name = "prefetch64-sse", + .xor_gen = xor_gen_sse_pf64, }; diff --git a/lib/raid/xor/xor-32regs-prefetch.c b/lib/raid/xor/xor-32regs-prefetch.c index 2856a8e50cb8..ade2a7d8cbe2 100644 --- a/lib/raid/xor/xor-32regs-prefetch.c +++ b/lib/raid/xor/xor-32regs-prefetch.c @@ -258,10 +258,10 @@ xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, goto once_more; } +DO_XOR_BLOCKS(32regs_p, xor_32regs_p_2, xor_32regs_p_3, xor_32regs_p_4, + xor_32regs_p_5); + struct xor_block_template xor_block_32regs_p = { - .name = "32regs_prefetch", - .do_2 = xor_32regs_p_2, - .do_3 = xor_32regs_p_3, - .do_4 = xor_32regs_p_4, - .do_5 = xor_32regs_p_5, + .name = "32regs_prefetch", + .xor_gen = xor_gen_32regs_p, }; diff --git a/lib/raid/xor/xor-32regs.c b/lib/raid/xor/xor-32regs.c index cc44d64032fa..acb4a10d1e95 100644 --- a/lib/raid/xor/xor-32regs.c +++ b/lib/raid/xor/xor-32regs.c @@ -209,10 +209,9 @@ xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } +DO_XOR_BLOCKS(32regs, xor_32regs_2, xor_32regs_3, xor_32regs_4, xor_32regs_5); + struct xor_block_template xor_block_32regs = { - .name = "32regs", - .do_2 = xor_32regs_2, - .do_3 = xor_32regs_3, - .do_4 = xor_32regs_4, - .do_5 = xor_32regs_5, + .name = "32regs", + .xor_gen = xor_gen_32regs, }; diff --git a/lib/raid/xor/xor-8regs-prefetch.c b/lib/raid/xor/xor-8regs-prefetch.c index 1d53aec50d27..451527a951b1 100644 --- a/lib/raid/xor/xor-8regs-prefetch.c +++ b/lib/raid/xor/xor-8regs-prefetch.c @@ -136,10 +136,11 @@ xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, goto once_more; } + +DO_XOR_BLOCKS(8regs_p, xor_8regs_p_2, xor_8regs_p_3, xor_8regs_p_4, + xor_8regs_p_5); + struct xor_block_template xor_block_8regs_p = { - .name = "8regs_prefetch", - .do_2 = xor_8regs_p_2, - .do_3 = xor_8regs_p_3, - .do_4 = xor_8regs_p_4, - .do_5 = xor_8regs_p_5, + .name = "8regs_prefetch", + .xor_gen = xor_gen_8regs_p, }; diff --git a/lib/raid/xor/xor-8regs.c b/lib/raid/xor/xor-8regs.c index 72a44e898c55..1edaed8acffe 100644 --- a/lib/raid/xor/xor-8regs.c +++ b/lib/raid/xor/xor-8regs.c @@ -94,11 +94,10 @@ xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1, } #ifndef NO_TEMPLATE +DO_XOR_BLOCKS(8regs, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5); + struct xor_block_template xor_block_8regs = { - .name = "8regs", - .do_2 = xor_8regs_2, - .do_3 = xor_8regs_3, - .do_4 = xor_8regs_4, - .do_5 = xor_8regs_5, + .name = "8regs", + .xor_gen = xor_gen_8regs, }; #endif /* NO_TEMPLATE */ diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index 2e46b6b83b0a..9e043d8c3a7a 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -13,39 +13,9 @@ #include #include "xor_impl.h" -/* The xor routines to use. */ +/* The xor routine to use. */ static struct xor_block_template *active_template; -void -xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) -{ - unsigned long *p1, *p2, *p3, *p4; - - WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); - - p1 = (unsigned long *) srcs[0]; - if (src_count == 1) { - active_template->do_2(bytes, dest, p1); - return; - } - - p2 = (unsigned long *) srcs[1]; - if (src_count == 2) { - active_template->do_3(bytes, dest, p1, p2); - return; - } - - p3 = (unsigned long *) srcs[2]; - if (src_count == 3) { - active_template->do_4(bytes, dest, p1, p2, p3); - return; - } - - p4 = (unsigned long *) srcs[3]; - active_template->do_5(bytes, dest, p1, p2, p3, p4); -} -EXPORT_SYMBOL(xor_blocks); - /** * xor_gen - generate RAID-style XOR information * @dest: destination vector @@ -63,20 +33,11 @@ EXPORT_SYMBOL(xor_blocks); */ void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) { - unsigned int src_off = 0; - - WARN_ON_ONCE(in_interrupt()); + WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); WARN_ON_ONCE(bytes == 0); WARN_ON_ONCE(bytes & 511); - while (src_cnt > 0) { - unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS); - - xor_blocks(this_cnt, bytes, dest, srcs + src_off); - - src_cnt -= this_cnt; - src_off += this_cnt; - } + active_template->xor_gen(dest, srcs, src_cnt, bytes); } EXPORT_SYMBOL(xor_gen); @@ -120,6 +81,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) int speed; unsigned long reps; ktime_t min, start, t0; + void *srcs[1] = { b2 }; preempt_disable(); @@ -130,7 +92,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) cpu_relax(); do { mb(); /* prevent loop optimization */ - tmpl->do_2(BENCH_SIZE, b1, b2); + tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE); mb(); } while (reps++ < REPS || (t0 = ktime_get()) == start); min = ktime_sub(t0, start); diff --git a/lib/raid/xor/xor_impl.h b/lib/raid/xor/xor_impl.h index 44b6c99e2093..09ae2916f71e 100644 --- a/lib/raid/xor/xor_impl.h +++ b/lib/raid/xor/xor_impl.h @@ -3,27 +3,47 @@ #define _XOR_IMPL_H #include +#include struct xor_block_template { struct xor_block_template *next; const char *name; int speed; - void (*do_2)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_3)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_4)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); - void (*do_5)(unsigned long, unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict, - const unsigned long * __restrict); + void (*xor_gen)(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes); }; +#define __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \ +void \ +xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \ + unsigned int bytes) \ +{ \ + unsigned int src_off = 0; \ + \ + while (src_cnt > 0) { \ + unsigned int this_cnt = min(src_cnt, 4); \ + \ + if (this_cnt == 1) \ + _handle1(bytes, dest, srcs[src_off]); \ + else if (this_cnt == 2) \ + _handle2(bytes, dest, srcs[src_off], \ + srcs[src_off + 1]); \ + else if (this_cnt == 3) \ + _handle3(bytes, dest, srcs[src_off], \ + srcs[src_off + 1], srcs[src_off + 2]); \ + else \ + _handle4(bytes, dest, srcs[src_off], \ + srcs[src_off + 1], srcs[src_off + 2], \ + srcs[src_off + 3]); \ + \ + src_cnt -= this_cnt; \ + src_off += this_cnt; \ + } \ +} + +#define DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \ + static __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) + /* generic implementations */ extern struct xor_block_template xor_block_8regs; extern struct xor_block_template xor_block_32regs; From a21921dd02d305a440ab0e8f6d991a797db04f85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:16:59 +0100 Subject: [PATCH 096/127] xor: use static_call for xor_gen Avoid the indirect call for xor_generation by using a static_call. Link: https://lkml.kernel.org/r/20260327061704.3707577-28-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/raid/xor/xor-core.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/raid/xor/xor-core.c b/lib/raid/xor/xor-core.c index 9e043d8c3a7a..bd4e6e434418 100644 --- a/lib/raid/xor/xor-core.c +++ b/lib/raid/xor/xor-core.c @@ -11,10 +11,10 @@ #include #include #include +#include #include "xor_impl.h" -/* The xor routine to use. */ -static struct xor_block_template *active_template; +DEFINE_STATIC_CALL_NULL(xor_gen_impl, *xor_block_8regs.xor_gen); /** * xor_gen - generate RAID-style XOR information @@ -37,13 +37,13 @@ void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) WARN_ON_ONCE(bytes == 0); WARN_ON_ONCE(bytes & 511); - active_template->xor_gen(dest, srcs, src_cnt, bytes); + static_call(xor_gen_impl)(dest, srcs, src_cnt, bytes); } EXPORT_SYMBOL(xor_gen); /* Set of all registered templates. */ static struct xor_block_template *__initdata template_list; -static bool __initdata xor_forced = false; +static struct xor_block_template *forced_template; /** * xor_register - register a XOR template @@ -69,7 +69,7 @@ void __init xor_register(struct xor_block_template *tmpl) */ void __init xor_force(struct xor_block_template *tmpl) { - active_template = tmpl; + forced_template = tmpl; } #define BENCH_SIZE 4096 @@ -111,7 +111,7 @@ static int __init calibrate_xor_blocks(void) void *b1, *b2; struct xor_block_template *f, *fastest; - if (xor_forced) + if (forced_template) return 0; b1 = (void *) __get_free_pages(GFP_KERNEL, 2); @@ -128,7 +128,7 @@ static int __init calibrate_xor_blocks(void) if (f->speed > fastest->speed) fastest = f; } - active_template = fastest; + static_call_update(xor_gen_impl, fastest->xor_gen); pr_info("xor: using function: %s (%d MB/sec)\n", fastest->name, fastest->speed); @@ -156,10 +156,10 @@ static int __init xor_init(void) * If this arch/cpu has a short-circuited selection, don't loop through * all the possible functions, just use the best one. */ - if (active_template) { + if (forced_template) { pr_info("xor: automatically using best checksumming function %-10s\n", - active_template->name); - xor_forced = true; + forced_template->name); + static_call_update(xor_gen_impl, forced_template->xor_gen); return 0; } @@ -170,7 +170,7 @@ static int __init xor_init(void) * Pick the first template as the temporary default until calibration * happens. */ - active_template = template_list; + static_call_update(xor_gen_impl, template_list->xor_gen); return 0; #endif } From af53e85ef797d45b364edf330eb008639b5c98c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2026 07:17:00 +0100 Subject: [PATCH 097/127] xor: add a kunit test case Add a test case for the XOR routines loosely based on the CRC kunit test. Link: https://lkml.kernel.org/r/20260327061704.3707577-29-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Eric Biggers Tested-by: Eric Biggers Cc: Albert Ou Cc: Alexander Gordeev Cc: Alexandre Ghiti Cc: Andreas Larsson Cc: Anton Ivanov Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: "Borislav Petkov (AMD)" Cc: Catalin Marinas Cc: Chris Mason Cc: Christian Borntraeger Cc: Dan Williams Cc: David S. Miller Cc: David Sterba Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Huacai Chen Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Johannes Berg Cc: Li Nan Cc: Madhavan Srinivasan Cc: Magnus Lindholm Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Richard Henderson Cc: Richard Weinberger Cc: Russell King Cc: Song Liu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- lib/raid/.kunitconfig | 3 + lib/raid/Kconfig | 11 ++ lib/raid/xor/Makefile | 2 +- lib/raid/xor/tests/Makefile | 3 + lib/raid/xor/tests/xor_kunit.c | 187 +++++++++++++++++++++++++++++++++ 5 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 lib/raid/.kunitconfig create mode 100644 lib/raid/xor/tests/Makefile create mode 100644 lib/raid/xor/tests/xor_kunit.c diff --git a/lib/raid/.kunitconfig b/lib/raid/.kunitconfig new file mode 100644 index 000000000000..351d22ed1954 --- /dev/null +++ b/lib/raid/.kunitconfig @@ -0,0 +1,3 @@ +CONFIG_KUNIT=y +CONFIG_BTRFS_FS=y +CONFIG_XOR_KUNIT_TEST=y diff --git a/lib/raid/Kconfig b/lib/raid/Kconfig index 81cb3f9c0a7b..5ab2b0a7be4c 100644 --- a/lib/raid/Kconfig +++ b/lib/raid/Kconfig @@ -17,3 +17,14 @@ config XOR_BLOCKS_ARCH default y if X86_32 default y if X86_64 bool + +config XOR_KUNIT_TEST + tristate "KUnit tests for xor_gen" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on XOR_BLOCKS + default KUNIT_ALL_TESTS + help + Unit tests for the XOR library functions. + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile index df55823c4d82..4d633dfd5b90 100644 --- a/lib/raid/xor/Makefile +++ b/lib/raid/xor/Makefile @@ -29,7 +29,7 @@ xor-$(CONFIG_SPARC64) += sparc/xor-sparc64.o sparc/xor-sparc64-glue.o xor-$(CONFIG_S390) += s390/xor.o xor-$(CONFIG_X86_32) += x86/xor-avx.o x86/xor-sse.o x86/xor-mmx.o xor-$(CONFIG_X86_64) += x86/xor-avx.o x86/xor-sse.o - +obj-y += tests/ CFLAGS_arm/xor-neon.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_arm/xor-neon.o += $(CC_FLAGS_NO_FPU) diff --git a/lib/raid/xor/tests/Makefile b/lib/raid/xor/tests/Makefile new file mode 100644 index 000000000000..661e8f6ffd1f --- /dev/null +++ b/lib/raid/xor/tests/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_XOR_KUNIT_TEST) += xor_kunit.o diff --git a/lib/raid/xor/tests/xor_kunit.c b/lib/raid/xor/tests/xor_kunit.c new file mode 100644 index 000000000000..0c2a3a420bf9 --- /dev/null +++ b/lib/raid/xor/tests/xor_kunit.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Unit test the XOR library functions. + * + * Copyright 2024 Google LLC + * Copyright 2026 Christoph Hellwig + * + * Based on the CRC tests by Eric Biggers . + */ +#include +#include +#include +#include +#include + +#define XOR_KUNIT_SEED 42 +#define XOR_KUNIT_MAX_BYTES 16384 +#define XOR_KUNIT_MAX_BUFFERS 64 +#define XOR_KUNIT_NUM_TEST_ITERS 1000 + +static struct rnd_state rng; +static void *test_buffers[XOR_KUNIT_MAX_BUFFERS]; +static void *test_dest; +static void *test_ref; +static size_t test_buflen; + +static u32 rand32(void) +{ + return prandom_u32_state(&rng); +} + +/* Reference implementation using dumb byte-wise XOR */ +static void xor_ref(void *dest, void **srcs, unsigned int src_cnt, + unsigned int bytes) +{ + unsigned int off, idx; + u8 *d = dest; + + for (off = 0; off < bytes; off++) { + for (idx = 0; idx < src_cnt; idx++) { + u8 *src = srcs[idx]; + + d[off] ^= src[off]; + } + } +} + +/* Generate a random length that is a multiple of 512. */ +static unsigned int random_length(unsigned int max_length) +{ + return round_up((rand32() % max_length) + 1, 512); +} + +/* Generate a random alignment that is a multiple of 64. */ +static unsigned int random_alignment(unsigned int max_alignment) +{ + return ((rand32() % max_alignment) + 1) & ~63; +} + +static void xor_generate_random_data(void) +{ + int i; + + prandom_bytes_state(&rng, test_dest, test_buflen); + memcpy(test_ref, test_dest, test_buflen); + for (i = 0; i < XOR_KUNIT_MAX_BUFFERS; i++) + prandom_bytes_state(&rng, test_buffers[i], test_buflen); +} + +/* Test that xor_gen gives the same result as a reference implementation. */ +static void xor_test(struct kunit *test) +{ + void *aligned_buffers[XOR_KUNIT_MAX_BUFFERS]; + size_t i; + + for (i = 0; i < XOR_KUNIT_NUM_TEST_ITERS; i++) { + unsigned int nr_buffers = + (rand32() % XOR_KUNIT_MAX_BUFFERS) + 1; + unsigned int len = random_length(XOR_KUNIT_MAX_BYTES); + unsigned int max_alignment, align = 0; + void *buffers; + + if (rand32() % 8 == 0) + /* Refresh the data occasionally. */ + xor_generate_random_data(); + + /* + * If we're not using the entire buffer size, inject randomize + * alignment into the buffer. + */ + max_alignment = XOR_KUNIT_MAX_BYTES - len; + if (max_alignment == 0) { + buffers = test_buffers; + } else if (rand32() % 2 == 0) { + /* Use random alignments mod 64 */ + int j; + + for (j = 0; j < nr_buffers; j++) + aligned_buffers[j] = test_buffers[j] + + random_alignment(max_alignment); + buffers = aligned_buffers; + align = random_alignment(max_alignment); + } else { + /* Go up to the guard page, to catch buffer overreads */ + int j; + + align = test_buflen - len; + for (j = 0; j < nr_buffers; j++) + aligned_buffers[j] = test_buffers[j] + align; + buffers = aligned_buffers; + } + + /* + * Compute the XOR, and verify that it equals the XOR computed + * by a simple byte-at-a-time reference implementation. + */ + xor_ref(test_ref + align, buffers, nr_buffers, len); + xor_gen(test_dest + align, buffers, nr_buffers, len); + KUNIT_EXPECT_MEMEQ_MSG(test, test_ref + align, + test_dest + align, len, + "Wrong result with buffers=%u, len=%u, unaligned=%s, at_end=%s", + nr_buffers, len, + str_yes_no(max_alignment), + str_yes_no(align + len == test_buflen)); + } +} + +static struct kunit_case xor_test_cases[] = { + KUNIT_CASE(xor_test), + {}, +}; + +static int xor_suite_init(struct kunit_suite *suite) +{ + int i; + + /* + * Allocate the test buffer using vmalloc() with a page-aligned length + * so that it is immediately followed by a guard page. This allows + * buffer overreads to be detected, even in assembly code. + */ + test_buflen = round_up(XOR_KUNIT_MAX_BYTES, PAGE_SIZE); + test_ref = vmalloc(test_buflen); + if (!test_ref) + return -ENOMEM; + test_dest = vmalloc(test_buflen); + if (!test_dest) + goto out_free_ref; + for (i = 0; i < XOR_KUNIT_MAX_BUFFERS; i++) { + test_buffers[i] = vmalloc(test_buflen); + if (!test_buffers[i]) + goto out_free_buffers; + } + + prandom_seed_state(&rng, XOR_KUNIT_SEED); + xor_generate_random_data(); + return 0; + +out_free_buffers: + while (--i >= 0) + vfree(test_buffers[i]); + vfree(test_dest); +out_free_ref: + vfree(test_ref); + return -ENOMEM; +} + +static void xor_suite_exit(struct kunit_suite *suite) +{ + int i; + + vfree(test_ref); + vfree(test_dest); + for (i = 0; i < XOR_KUNIT_MAX_BUFFERS; i++) + vfree(test_buffers[i]); +} + +static struct kunit_suite xor_test_suite = { + .name = "xor", + .test_cases = xor_test_cases, + .suite_init = xor_suite_init, + .suite_exit = xor_suite_exit, +}; +kunit_test_suite(xor_test_suite); + +MODULE_DESCRIPTION("Unit test for the XOR library functions"); +MODULE_LICENSE("GPL"); From 237213776d0fd62487da513b55732cfb20f7eee8 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 20 Mar 2026 18:09:37 +0000 Subject: [PATCH 098/127] ubifs: remove unnecessary cond_resched() from list_sort() compare Patch series "lib/list_sort: Clean up list_sort() scheduling workarounds", v3. Historically, list_sort() included a hack in merge_final() that periodically invoked dummy cmp(priv, b, b) calls when merging highly unbalanced lists. This allowed the caller to invoke cond_resched() within their comparison callbacks to avoid soft lockups. However, an audit of the kernel tree shows that fs/ubifs/ has been the sole user of this mechanism. For all other generic list_sort() users, this results in wasted function calls and unnecessary overhead in a tight loop. Recent discussions and code inspection confirmed that the lists being sorted in UBIFS are bounded in size (a few thousand elements at most), and the comparison functions are extremely lightweight. Therefore, UBIFS does not actually need to rely on this mechanism. This patch (of 2): Historically, UBIFS embedded cond_resched() calls inside its list_sort() comparison callbacks (data_nodes_cmp, nondata_nodes_cmp, and replay_entries_cmp) to prevent soft lockups when sorting long lists. However, further inspection by Richard Weinberger reveals that these compare functions are extremely lightweight and do not perform any blocking MTD I/O. Furthermore, the lists being sorted are strictly bounded in size: - In the GC case, the list contains at most the number of nodes that fit into a single LEB. - In the replay case, the list spans across a few LEBs from the UBIFS journal, amounting to at most a few thousand elements. Since the compare functions are called a few thousand times at most, the overhead of frequent scheduling points is unjustified. Removing the cond_resched() calls simplifies the comparison logic and reduces unnecessary context switch checks during the sort. Link: https://lkml.kernel.org/r/20260320180938.1827148-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20260320180938.1827148-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Zhihao Cheng Acked-by: Richard Weinberger Cc: Ching-Chun (Jim) Huang Cc: Christoph Hellwig Cc: Mars Cheng Cc: Yu-Chun Lin Signed-off-by: Andrew Morton --- fs/ubifs/gc.c | 2 -- fs/ubifs/replay.c | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 0bf08b7755b8..933c79b5cd6b 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -109,7 +109,6 @@ static int data_nodes_cmp(void *priv, const struct list_head *a, struct ubifs_info *c = priv; struct ubifs_scan_node *sa, *sb; - cond_resched(); if (a == b) return 0; @@ -153,7 +152,6 @@ static int nondata_nodes_cmp(void *priv, const struct list_head *a, struct ubifs_info *c = priv; struct ubifs_scan_node *sa, *sb; - cond_resched(); if (a == b) return 0; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index a9a568f4a868..263045e05cf1 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -305,7 +305,6 @@ static int replay_entries_cmp(void *priv, const struct list_head *a, struct ubifs_info *c = priv; struct replay_entry *ra, *rb; - cond_resched(); if (a == b) return 0; From 86bda539fbcf17c077b7ff4899968a2dc7c31e2d Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 20 Mar 2026 18:09:38 +0000 Subject: [PATCH 099/127] lib/list_sort: remove dummy cmp() calls to speed up merge_final() Historically, list_sort() implemented a hack in merge_final(): if (unlikely(!++count)) cmp(priv, b, b); This was introduced 16 years ago in commit 835cc0c8477f ("lib: more scalable list_sort()") so that callers could periodically invoke cond_resched() within their comparison functions when merging highly unbalanced lists. An audit of the kernel tree reveals that fs/ubifs/ was the sole user of this mechanism. Recent discussions and inspections by Richard Weinberger confirm that UBIFS lists are strictly bounded in size (a few thousand elements at most), meaning it does not strictly rely on these dummy callbacks to prevent soft lockups. For the vast majority of list_sort() users (such as block layer IO schedulers and file systems), this hack results in completely wasted function calls. In the worst-case scenario (merging an already sorted list where 'a' is exhausted quickly), it results in approximately (N/2)/256 unnecessary cmp() invocations. Remove the dummy cmp(priv, b, b) fallback from merge_final(). This saves unnecessary function calls, avoids branching overhead in the tight loop, and slightly speeds up the final merge step for all generic list_sort() users. [akpm@linux-foundation.org: remove now-unused local] Link: https://lkml.kernel.org/r/20260320180938.1827148-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Christoph Hellwig Cc: Ching-Chun (Jim) Huang Cc: Mars Cheng Cc: Richard Weinberger Cc: Yu-Chun Lin Cc: Zhihao Cheng Signed-off-by: Andrew Morton --- lib/list_sort.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lib/list_sort.c b/lib/list_sort.c index a310ecb7ccc0..ff99203f208f 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -50,7 +50,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -76,15 +75,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; From 07b7d66e65d9cfe6b9c2c34aa22cfcaac37a5c45 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:01 +0100 Subject: [PATCH 100/127] lib/scatterlist: fix length calculations in extract_kvec_to_sg Patch series "Fix bugs in extract_iter_to_sg()", v3. Fix bugs in the kvec and user variants of extract_iter_to_sg. This series is growing due to useful remarks made by sashiko.dev. The main bugs are: - The length for an sglist entry when extracting from a kvec can exceed the number of bytes in the page. This is obviously not intended. - When extracting a user buffer the sglist is temporarily used as a scratch buffer for extracted page pointers. If the sglist already contains some elements this scratch buffer could overlap with existing entries in the sglist. The series adds test cases to the kunit_iov_iter test that demonstrate all of these bugs. Additionally, there is a memory leak fix for the test itself. The bugs were orignally introduced into kernel v6.3 where the function lived in fs/netfs/iterator.c. It was later moved to lib/scatterlist.c in v6.5. Thus the actual fix is only marked for backports to v6.5+. This patch (of 5): When extracting from a kvec to a scatterlist, do not cross page boundaries. The required length was already calculated but not used as intended. Adjust the copied length if the loop runs out of sglist entries without extracting everything. While there, return immediately from extract_iter_to_sg if there are no sglist entries at all. A subsequent commit will add kunit test cases that demonstrate that the patch is necessary. Link: https://lkml.kernel.org/r/20260326214905.818170-1-lk@c--e.de Link: https://lkml.kernel.org/r/20260326214905.818170-2-lk@c--e.de Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Signed-off-by: Christian A. Ehrhardt Cc: David Gow Cc: David Howells Cc: Kees Cook Cc: Petr Mladek Cc: [v6.5+] Signed-off-by: Andrew Morton --- lib/scatterlist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d773720d11bf..befdc4b9c11d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1247,7 +1247,7 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, else page = virt_to_page((void *)kaddr); - sg_set_page(sg, page, len, off); + sg_set_page(sg, page, seg, off); sgtable->nents++; sg++; sg_max--; @@ -1256,6 +1256,7 @@ static ssize_t extract_kvec_to_sg(struct iov_iter *iter, kaddr += PAGE_SIZE; off = 0; } while (len > 0 && sg_max > 0); + ret -= len; if (maxsize <= 0 || sg_max == 0) break; @@ -1409,7 +1410,7 @@ ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, struct sg_table *sgtable, unsigned int sg_max, iov_iter_extraction_t extraction_flags) { - if (maxsize == 0) + if (maxsize == 0 || sg_max == 0) return 0; switch (iov_iter_type(iter)) { From 118cf3f55975352ac357fb194405031458186819 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:02 +0100 Subject: [PATCH 101/127] lib/scatterlist: fix temp buffer in extract_user_to_sg() Instead of allocating a temporary buffer for extracted user pages extract_user_to_sg() uses the end of the to be filled scatterlist as a temporary buffer. Fix the calculation of the start address if the scatterlist already contains elements. The unused space starts at sgtable->sgl + sgtable->nents not directly at sgtable->nents and the temporary buffer is placed at the end of this unused space. A subsequent commit will add kunit test cases that demonstrate that the patch is necessary. Pointed out by sashiko.dev on a previous iteration of this series. Link: https://lkml.kernel.org/r/20260326214905.818170-3-lk@c--e.de Fixes: 018584697533 ("netfs: Add a function to extract an iterator into a scatterlist") Signed-off-by: Christian A. Ehrhardt Cc: David Howells Cc: David Gow Cc: Kees Cook Cc: Petr Mladek Cc: [v6.5+] Signed-off-by: Andrew Morton --- lib/scatterlist.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index befdc4b9c11d..b7fe91ef35b8 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1123,8 +1123,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter, size_t len, off; /* We decant the page list into the tail of the scatterlist */ - pages = (void *)sgtable->sgl + - array_size(sg_max, sizeof(struct scatterlist)); + pages = (void *)sg + array_size(sg_max, sizeof(struct scatterlist)); pages -= sg_max; do { From 0b49c7d0ae697fcecd7377cb7dda220f7cd096ff Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:03 +0100 Subject: [PATCH 102/127] lib: kunit_iov_iter: fix memory leaks Use vfree() instead of vunmap() to free the buffer allocated by iov_kunit_create_buffer() because vunmap() does not honour VM_MAP_PUT_PAGES. In order for this to work the page array itself must not be managed by kunit. Remove the folio_put() when destroying a folioq. This is handled by vfree(), now. Pointed out by sashiko.dev on a previous iteration of this series. Tested by running the kunit test 10000 times in a loop. Link: https://lkml.kernel.org/r/20260326214905.818170-4-lk@c--e.de Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator") Signed-off-by: Christian A. Ehrhardt Cc: David Howells Cc: David Gow Cc: Kees Cook Cc: Petr Mladek Signed-off-by: Andrew Morton --- lib/tests/kunit_iov_iter.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/tests/kunit_iov_iter.c b/lib/tests/kunit_iov_iter.c index bb847e5010eb..d16449bdb833 100644 --- a/lib/tests/kunit_iov_iter.c +++ b/lib/tests/kunit_iov_iter.c @@ -42,7 +42,7 @@ static inline u8 pattern(unsigned long x) static void iov_kunit_unmap(void *data) { - vunmap(data); + vfree(data); } static void *__init iov_kunit_create_buffer(struct kunit *test, @@ -53,17 +53,22 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, unsigned long got; void *buffer; - pages = kunit_kcalloc(test, npages, sizeof(struct page *), GFP_KERNEL); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages); + pages = kzalloc_objs(struct page *, npages, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages); *ppages = pages; got = alloc_pages_bulk(GFP_KERNEL, npages, pages); if (got != npages) { release_pages(pages, got); + kvfree(pages); KUNIT_ASSERT_EQ(test, got, npages); } buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL); + if (buffer == NULL) { + release_pages(pages, got); + kvfree(pages); + } KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer); kunit_add_action_or_reset(test, iov_kunit_unmap, buffer); @@ -369,9 +374,6 @@ static void iov_kunit_destroy_folioq(void *data) for (folioq = data; folioq; folioq = next) { next = folioq->next; - for (int i = 0; i < folioq_nr_slots(folioq); i++) - if (folioq_folio(folioq, i)) - folio_put(folioq_folio(folioq, i)); kfree(folioq); } } From 7278aa840b69e5a31826329ef893230d67cae811 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:04 +0100 Subject: [PATCH 103/127] lib: kunit_iov_iter: improve error detection In the kunit_iov_iter test prevent the kernel buffer from being a single physically contiguous region. Additionally, make sure that the test pattern written to a page in the buffer depends on the offset of the page within the buffer. Link: https://lkml.kernel.org/r/20260326214905.818170-5-lk@c--e.de Signed-off-by: Christian A. Ehrhardt Cc: David Howells Cc: David Gow Cc: Kees Cook Cc: Petr Mladek Signed-off-by: Andrew Morton --- lib/tests/kunit_iov_iter.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/tests/kunit_iov_iter.c b/lib/tests/kunit_iov_iter.c index d16449bdb833..64a4e2f3eafa 100644 --- a/lib/tests/kunit_iov_iter.c +++ b/lib/tests/kunit_iov_iter.c @@ -13,6 +13,7 @@ #include #include #include +#include #include MODULE_DESCRIPTION("iov_iter testing"); @@ -37,7 +38,7 @@ static const struct kvec_test_range kvec_test_ranges[] = { static inline u8 pattern(unsigned long x) { - return x & 0xff; + return (u8)x + (u8)(x >> 8) + (u8)(x >> 16); } static void iov_kunit_unmap(void *data) @@ -52,6 +53,7 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, struct page **pages; unsigned long got; void *buffer; + unsigned int i; pages = kzalloc_objs(struct page *, npages, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages); @@ -63,6 +65,9 @@ static void *__init iov_kunit_create_buffer(struct kunit *test, kvfree(pages); KUNIT_ASSERT_EQ(test, got, npages); } + /* Make sure that we don't get a physically contiguous buffer. */ + for (i = 0; i < npages / 4; ++i) + swap(pages[i], pages[i + npages / 2]); buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL); if (buffer == NULL) { From 0913b7554726aac089cab89b6f0877dafc30b2a0 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Thu, 26 Mar 2026 22:49:05 +0100 Subject: [PATCH 104/127] lib: kunit_iov_iter: add tests for extract_iter_to_sg Add test cases that test extract_iter_to_sg. For each iterator type an iterator is loaded with a suitable buffer. The iterator is then extracted to a scatterlist with multiple calls to extract_iter_to_sg. The final scatterlist is copied into a scratch buffer. The test passes if the scratch buffer contains the same data as the original buffer. The new tests demonstrate bugs in extract_iter_to_sg for kvec and user iterators that are fixed by the previous commits. Link: https://lkml.kernel.org/r/20260326214905.818170-6-lk@c--e.de Signed-off-by: Christian A. Ehrhardt Cc: David Howells Cc: David Gow Cc: Kees Cook Cc: Petr Mladek Signed-off-by: Andrew Morton --- lib/tests/kunit_iov_iter.c | 203 +++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/lib/tests/kunit_iov_iter.c b/lib/tests/kunit_iov_iter.c index 64a4e2f3eafa..37bd6eb25896 100644 --- a/lib/tests/kunit_iov_iter.c +++ b/lib/tests/kunit_iov_iter.c @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include MODULE_DESCRIPTION("iov_iter testing"); @@ -1016,6 +1018,202 @@ stop: KUNIT_SUCCEED(test); } +struct iov_kunit_iter_to_sg_data { + struct sg_table *sgt; + u8 *buffer, *scratch; + u8 __user *ubuf; + struct page **pages; + size_t npages; +}; + +static void __init +iov_kunit_iter_unpin_sgt(void *data) +{ + struct sg_table *sgt = data; + + for (unsigned int i = 0; i < sgt->nents; ++i) + unpin_user_page(sg_page(&sgt->sgl[i])); +} + +static void __init +iov_kunit_iter_to_sg_init(struct kunit *test, size_t bufsize, bool user, + struct iov_kunit_iter_to_sg_data *data) +{ + struct page **spages; + struct scatterlist *sg; + unsigned long uaddr; + size_t i; + + data->npages = bufsize / PAGE_SIZE; + sg = kunit_kmalloc_array(test, data->npages, sizeof(*sg), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sg); + sg_init_table(sg, data->npages); + data->sgt = kunit_kzalloc(test, sizeof(*data->sgt), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, data->sgt); + data->sgt->orig_nents = 0; + data->sgt->sgl = sg; + + data->buffer = NULL; + data->ubuf = NULL; + if (user) { + uaddr = kunit_vm_mmap(test, NULL, 0, bufsize, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + KUNIT_ASSERT_NE(test, uaddr, 0); + data->ubuf = (u8 __user *)uaddr; + for (i = 0; i < bufsize; ++i) + put_user(pattern(i), data->ubuf + i); + } else { + data->buffer = iov_kunit_create_buffer(test, &data->pages, + data->npages); + for (i = 0; i < bufsize; ++i) + data->buffer[i] = pattern(i); + } + data->scratch = iov_kunit_create_buffer(test, &spages, data->npages); + memset(data->scratch, 0, bufsize); +} + +static void __init +iov_kunit_iter_to_sg_check(struct kunit *test, struct iov_iter *iter, + size_t bufsize, + struct iov_kunit_iter_to_sg_data *data) +{ + static const size_t tail = 16 * PAGE_SIZE; + size_t i; + + KUNIT_ASSERT_LT(test, tail, bufsize); + + if (iov_iter_extract_will_pin(iter)) + kunit_add_action_or_reset(test, iov_kunit_iter_unpin_sgt, + data->sgt); + + i = extract_iter_to_sg(iter, bufsize, data->sgt, 0, 0); + KUNIT_ASSERT_EQ(test, i, 0); + KUNIT_ASSERT_EQ(test, data->sgt->nents, 0); + + i = extract_iter_to_sg(iter, bufsize - tail, data->sgt, 1, 0); + KUNIT_ASSERT_LE(test, i, bufsize - tail); + KUNIT_ASSERT_EQ(test, data->sgt->nents, 1); + + i += extract_iter_to_sg(iter, bufsize - tail - i, data->sgt, + data->npages - data->sgt->nents, 0); + KUNIT_ASSERT_EQ(test, i, bufsize - tail); + KUNIT_ASSERT_LE(test, data->sgt->nents, data->npages); + + i += extract_iter_to_sg(iter, tail, data->sgt, + data->npages - data->sgt->nents, 0); + KUNIT_ASSERT_EQ(test, i, bufsize); + KUNIT_ASSERT_LE(test, data->sgt->nents, data->npages); + + sg_mark_end(&data->sgt->sgl[data->sgt->nents - 1]); + + i = sg_copy_to_buffer(data->sgt->sgl, data->sgt->nents, + data->scratch, bufsize); + KUNIT_ASSERT_EQ(test, i, bufsize); + + for (i = 0; i < bufsize; ++i) { + KUNIT_EXPECT_EQ_MSG(test, data->scratch[i], pattern(i), + "at i=%zx", i); + if (data->scratch[i] != pattern(i)) + break; + } + + KUNIT_EXPECT_EQ(test, i, bufsize); +} + +static void __init iov_kunit_iter_to_sg_kvec(struct kunit *test) +{ + struct iov_kunit_iter_to_sg_data data; + struct iov_iter iter; + struct kvec kvec; + size_t bufsize; + + bufsize = 0x100000; + iov_kunit_iter_to_sg_init(test, bufsize, false, &data); + + kvec.iov_base = data.buffer; + kvec.iov_len = bufsize; + iov_iter_kvec(&iter, READ, &kvec, 1, bufsize); + + iov_kunit_iter_to_sg_check(test, &iter, bufsize, &data); +} + +static void __init iov_kunit_iter_to_sg_bvec(struct kunit *test) +{ + struct iov_kunit_iter_to_sg_data data; + struct page *p, *can_merge = NULL; + size_t i, k, bufsize; + struct bio_vec *bvec; + struct iov_iter iter; + + bufsize = 0x100000; + iov_kunit_iter_to_sg_init(test, bufsize, false, &data); + + bvec = kunit_kmalloc_array(test, data.npages, sizeof(*bvec), + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bvec); + k = 0; + for (i = 0; i < data.npages; ++i) { + p = data.pages[i]; + if (p == can_merge) + bvec[k-1].bv_len += PAGE_SIZE; + else + bvec_set_page(&bvec[k++], p, PAGE_SIZE, 0); + can_merge = p + 1; + } + iov_iter_bvec(&iter, READ, bvec, k, bufsize); + + iov_kunit_iter_to_sg_check(test, &iter, bufsize, &data); +} + +static void __init iov_kunit_iter_to_sg_folioq(struct kunit *test) +{ + struct iov_kunit_iter_to_sg_data data; + struct folio_queue *folioq; + struct iov_iter iter; + size_t bufsize; + + bufsize = 0x100000; + iov_kunit_iter_to_sg_init(test, bufsize, false, &data); + + folioq = iov_kunit_create_folioq(test); + iov_kunit_load_folioq(test, &iter, READ, folioq, data.pages, + data.npages); + + iov_kunit_iter_to_sg_check(test, &iter, bufsize, &data); +} + +static void __init iov_kunit_iter_to_sg_xarray(struct kunit *test) +{ + struct iov_kunit_iter_to_sg_data data; + struct xarray *xarray; + struct iov_iter iter; + size_t bufsize; + + bufsize = 0x100000; + iov_kunit_iter_to_sg_init(test, bufsize, false, &data); + + xarray = iov_kunit_create_xarray(test); + iov_kunit_load_xarray(test, &iter, READ, xarray, data.pages, + data.npages); + + iov_kunit_iter_to_sg_check(test, &iter, bufsize, &data); +} + +static void __init iov_kunit_iter_to_sg_ubuf(struct kunit *test) +{ + struct iov_kunit_iter_to_sg_data data; + struct iov_iter iter; + size_t bufsize; + + bufsize = 0x100000; + iov_kunit_iter_to_sg_init(test, bufsize, true, &data); + + iov_iter_ubuf(&iter, READ, data.ubuf, bufsize); + + iov_kunit_iter_to_sg_check(test, &iter, bufsize, &data); +} + static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_copy_to_kvec), KUNIT_CASE(iov_kunit_copy_from_kvec), @@ -1029,6 +1227,11 @@ static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_extract_pages_bvec), KUNIT_CASE(iov_kunit_extract_pages_folioq), KUNIT_CASE(iov_kunit_extract_pages_xarray), + KUNIT_CASE(iov_kunit_iter_to_sg_kvec), + KUNIT_CASE(iov_kunit_iter_to_sg_bvec), + KUNIT_CASE(iov_kunit_iter_to_sg_folioq), + KUNIT_CASE(iov_kunit_iter_to_sg_xarray), + KUNIT_CASE(iov_kunit_iter_to_sg_ubuf), {} }; From d01684a2f0f84a3d4210bb76a7ca62c4253b8e93 Mon Sep 17 00:00:00 2001 From: Inseob Kim Date: Thu, 26 Mar 2026 11:06:04 +0900 Subject: [PATCH 105/127] lib: parser: fix match_wildcard to correctly handle trailing stars This fixes a bug in match_wildcard that incorrectly handles trailing asterisks. For example, `match_wildcard("abc**", "abc")` must return true, but it returns false. Link: https://lkml.kernel.org/r/20260326020630.4139520-1-inseob@google.com Signed-off-by: Inseob Kim Cc: Changbin Du Cc: Jason Baron Cc: Joe Perches Cc: Josh Law Signed-off-by: Andrew Morton --- lib/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parser.c b/lib/parser.c index 73e8f8e5be73..62da0ac0d438 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -315,7 +315,7 @@ bool match_wildcard(const char *pattern, const char *str) } } - if (*p == '*') + while (*p == '*') ++p; return !*p; } From 03738dd159db60a27378e9b5b93fd187218519ba Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 25 Feb 2026 14:03:44 +0800 Subject: [PATCH 106/127] crash_dump/dm-crypt: don't print in arch-specific code Patch series "kdump: Enable LUKS-encrypted dump target support in ARM64 and PowerPC", v5. CONFIG_CRASH_DM_CRYPT has been introduced to support LUKS-encrypted device dump target by addressing two challenges [1], - Kdump kernel may not be able to decrypt the LUKS partition. For some machines, a system administrator may not have a chance to enter the password to decrypt the device in kdump initramfs after the 1st kernel crashes - LUKS2 by default use the memory-hard Argon2 key derivation function which is quite memory-consuming compared to the limited memory reserved for kdump. To also enable this feature for ARM64 and PowerPC, we need to add a device tree property dmcryptkeys [2] as similar to elfcorehdr to pass the memory address of the stored info of dm-crypt keys to the kdump kernel. This patch (of 3): When the vmcore dumping target is not a LUKS-encrypted target, it's expected that there is no dm-crypt key thus no need to return -ENOENT. Also print more logs in crash_load_dm_crypt_keys. The benefit is arch-specific code can be more succinct. Link: https://lkml.kernel.org/r/20260225060347.718905-1-coxu@redhat.com Link: https://lkml.kernel.org/r/20260225060347.718905-2-coxu@redhat.com Link: https://lore.kernel.org/all/20250502011246.99238-1-coxu@redhat.com/ [1] Link: https://github.com/devicetree-org/dt-schema/pull/181 [2] Signed-off-by: Coiby Xu Suggested-by: Will Deacon Acked-by: Baoquan He Cc: Arnaud Lefebvre Cc: Christophe Leroy (CS GROUP) Cc: Dave Young Cc: Kairui Song Cc: Krzysztof Kozlowski Cc: Pingfan Liu Cc: Rob Herring Cc: Sourabh Jain Cc: Thomas Staudt Signed-off-by: Andrew Morton --- arch/x86/kernel/kexec-bzimage64.c | 6 +----- kernel/crash_dump_dm_crypt.c | 7 +++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 5630c7dca1f3..7e980ea49d8d 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -525,12 +525,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel, if (ret) return ERR_PTR(ret); ret = crash_load_dm_crypt_keys(image); - if (ret == -ENOENT) { - kexec_dprintk("No dm crypt key to load\n"); - } else if (ret) { - pr_err("Failed to load dm crypt keys\n"); + if (ret) return ERR_PTR(ret); - } if (image->dm_crypt_keys_addr && cmdline_len + MAX_ELFCOREHDR_STR_LEN + MAX_DMCRYPTKEYS_STR_LEN > header->cmdline_size) { diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index 8638b821ce58..cb875ddb6ba6 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -415,14 +415,16 @@ int crash_load_dm_crypt_keys(struct kimage *image) if (key_count <= 0) { kexec_dprintk("No dm-crypt keys\n"); - return -ENOENT; + return 0; } if (!is_dm_key_reused) { image->dm_crypt_keys_addr = 0; r = build_keys_header(); - if (r) + if (r) { + pr_err("Failed to build dm-crypt keys header, ret=%d\n", r); return r; + } } kbuf.buffer = keys_header; @@ -433,6 +435,7 @@ int crash_load_dm_crypt_keys(struct kimage *image) kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; r = kexec_add_buffer(&kbuf); if (r) { + pr_err("Failed to call kexec_add_buffer, ret=%d\n", r); kvfree((void *)kbuf.buffer); return r; } From fe74eb289163d10b34f8ee571cdc3257306f343f Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 25 Feb 2026 14:03:45 +0800 Subject: [PATCH 107/127] crash: align the declaration of crash_load_dm_crypt_keys with CONFIG_CRASH_DM_CRYPT This will prevent a compilation failure when CONFIG_CRASH_DUMP is enabled but CONFIG_CRASH_DM_CRYPT is disabled, arch/powerpc/kexec/elf_64.c: In function 'elf64_load': >> arch/powerpc/kexec/elf_64.c:82:23: error: implicit declaration of function 'crash_load_dm_crypt_keys' [-Werror=implicit-function-declaration] 82 | ret = crash_load_dm_crypt_keys(image); | ^~~~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors Link: https://lkml.kernel.org/r/20260225060347.718905-3-coxu@redhat.com Signed-off-by: Coiby Xu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602120648.RgQALnnI-lkp@intel.com/ Acked-by: Baoquan He Cc: Arnaud Lefebvre Cc: Christophe Leroy (CS GROUP) Cc: Dave Young Cc: Kairui Song Cc: Krzysztof Kozlowski Cc: Pingfan Liu Cc: Rob Herring Cc: Sourabh Jain Cc: Thomas Staudt Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d35726d6a415..c1dee3f971a9 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,13 +34,6 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif -#ifdef CONFIG_CRASH_DM_CRYPT -int crash_load_dm_crypt_keys(struct kimage *image); -ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); -#else -static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } -#endif - #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } #endif @@ -96,4 +89,11 @@ static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {}; static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; }; #endif /* CONFIG_CRASH_DUMP*/ +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif + #endif /* LINUX_CRASH_CORE_H */ From e3a84be1ec2fd6f06e54bb31642412864d65280f Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Wed, 25 Feb 2026 14:03:46 +0800 Subject: [PATCH 108/127] arm64,ppc64le/kdump: pass dm-crypt keys to kdump kernel CONFIG_CRASH_DM_CRYPT has been introduced to support LUKS-encrypted device dump target by addressing two challenges [1], - Kdump kernel may not be able to decrypt the LUKS partition. For some machines, a system administrator may not have a chance to enter the password to decrypt the device in kdump initramfs after the 1st kernel crashes - LUKS2 by default use the memory-hard Argon2 key derivation function which is quite memory-consuming compared to the limited memory reserved for kdump. To also enable this feature for ARM64 and PowerPC, the missing piece is to let the kdump kernel know where to find the dm-crypt keys which are randomly stored in memory reserved for kdump. Introduce a new device tree property dmcryptkeys [2] as similar to elfcorehdr to pass the memory address of the stored info of dm-crypt keys to the kdump kernel. Since this property is only needed by the kdump kernel, it won't be exposed to userspace. Link: https://lkml.kernel.org/r/20260225060347.718905-4-coxu@redhat.com Link: https://lore.kernel.org/all/20250502011246.99238-1-coxu@redhat.com/ [1] Link: https://github.com/devicetree-org/dt-schema/pull/181 [2] Signed-off-by: Coiby Xu Acked-by: Rob Herring (Arm) Reviewed-by: Sourabh Jain Cc: Arnaud Lefebvre Cc: Baoquan he Cc: Dave Young Cc: Kairui Song Cc: Pingfan Liu Cc: Krzysztof Kozlowski Cc: Thomas Staudt Cc: Will Deacon Cc: Christophe Leroy (CS GROUP) Signed-off-by: Andrew Morton --- arch/arm64/kernel/machine_kexec_file.c | 4 ++++ arch/powerpc/kexec/elf_64.c | 4 ++++ drivers/of/fdt.c | 21 +++++++++++++++++++++ drivers/of/kexec.c | 19 +++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index fba260ad87a9..e31fabed378a 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -134,6 +134,10 @@ int load_other_segments(struct kimage *image, kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + ret = crash_load_dm_crypt_keys(image); + if (ret) + goto out_err; } #endif diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 5d6d616404cf..ea50a072debf 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -79,6 +79,10 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } + ret = crash_load_dm_crypt_keys(image); + if (ret) + goto out; + /* Setup cmdline for kdump kernel case */ modified_cmdline = setup_kdump_cmdline(image, cmdline, cmdline_len); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 331646d667b9..2967e4aff807 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -866,6 +866,26 @@ static void __init early_init_dt_check_for_elfcorehdr(unsigned long node) elfcorehdr_addr, elfcorehdr_size); } +static void __init early_init_dt_check_for_dmcryptkeys(unsigned long node) +{ + const char *prop_name = "linux,dmcryptkeys"; + const __be32 *prop; + + if (!IS_ENABLED(CONFIG_CRASH_DM_CRYPT)) + return; + + pr_debug("Looking for dmcryptkeys property... "); + + prop = of_get_flat_dt_prop(node, prop_name, NULL); + if (!prop) + return; + + dm_crypt_keys_addr = dt_mem_next_cell(dt_root_addr_cells, &prop); + + /* Property only accessible to crash dump kernel */ + fdt_delprop(initial_boot_params, node, prop_name); +} + static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND; /* @@ -1097,6 +1117,7 @@ int __init early_init_dt_scan_chosen(char *cmdline) early_init_dt_check_for_initrd(node); early_init_dt_check_for_elfcorehdr(node); + early_init_dt_check_for_dmcryptkeys(node); rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); if (rng_seed && l > 0) { diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index c4cf3552c018..fbd253f0d3c5 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -423,6 +423,25 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, if (ret) goto out; + if (image->dm_crypt_keys_addr != 0) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,dmcryptkeys", + image->dm_crypt_keys_addr, + image->dm_crypt_keys_sz); + + if (ret) + goto out; + + /* + * Avoid dmcryptkeys from being stomped on in kdump kernel by + * setting up memory reserve map. + */ + ret = fdt_add_mem_rsv(fdt, image->dm_crypt_keys_addr, + image->dm_crypt_keys_sz); + if (ret) + goto out; + } + #ifdef CONFIG_CRASH_DUMP /* add linux,usable-memory-range */ ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, From 5686459423d03d192134166f0ec45f98bb2d5d57 Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Mon, 30 Mar 2026 23:34:28 +0800 Subject: [PATCH 109/127] ocfs2/heartbeat: fix slot mapping rollback leaks on error paths o2hb_map_slot_data() allocates hr_tmp_block, hr_slots, hr_slot_data, and pages in stages. If a later allocation fails, the current code returns without unwinding the earlier allocations. o2hb_region_dev_store() also leaves slot mapping resources behind when setup aborts, and it keeps hr_aborted_start/hr_node_deleted set across retries. That leaves stale state behind after a failed start. Factor the slot cleanup into o2hb_unmap_slot_data(), use it from both o2hb_map_slot_data() and o2hb_region_release(), and call it from the dev_store() rollback after stopping a started heartbeat thread. While freeing pages, clear each hr_slot_data entry as it is released, and reset the start state before each new setup attempt. This closes the slot mapping leak on allocation/setup failure paths and keeps failed setup attempts retryable. Link: https://lkml.kernel.org/r/20260330153428.19586-1-yufan.chen@linux.dev Signed-off-by: Yufan Chen Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/cluster/heartbeat.c | 83 ++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index fe1949578336..d12784aaaa4b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1488,33 +1488,45 @@ static struct o2hb_region *to_o2hb_region(struct config_item *item) return item ? container_of(item, struct o2hb_region, hr_item) : NULL; } -/* drop_item only drops its ref after killing the thread, nothing should - * be using the region anymore. this has to clean up any state that - * attributes might have built up. */ -static void o2hb_region_release(struct config_item *item) +static void o2hb_unmap_slot_data(struct o2hb_region *reg) { int i; struct page *page; - struct o2hb_region *reg = to_o2hb_region(item); - - mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); - - kfree(reg->hr_tmp_block); if (reg->hr_slot_data) { for (i = 0; i < reg->hr_num_pages; i++) { page = reg->hr_slot_data[i]; - if (page) + if (page) { __free_page(page); + reg->hr_slot_data[i] = NULL; + } } kfree(reg->hr_slot_data); + reg->hr_slot_data = NULL; } + kfree(reg->hr_slots); + reg->hr_slots = NULL; + + kfree(reg->hr_tmp_block); + reg->hr_tmp_block = NULL; +} + +/* drop_item only drops its ref after killing the thread, nothing should + * be using the region anymore. this has to clean up any state that + * attributes might have built up. + */ +static void o2hb_region_release(struct config_item *item) +{ + struct o2hb_region *reg = to_o2hb_region(item); + + mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); + + o2hb_unmap_slot_data(reg); + if (reg->hr_bdev_file) fput(reg->hr_bdev_file); - kfree(reg->hr_slots); - debugfs_remove_recursive(reg->hr_debug_dir); kfree(reg->hr_db_livenodes); kfree(reg->hr_db_regnum); @@ -1667,6 +1679,7 @@ static void o2hb_init_region_params(struct o2hb_region *reg) static int o2hb_map_slot_data(struct o2hb_region *reg) { int i, j; + int ret = -ENOMEM; unsigned int last_slot; unsigned int spp = reg->hr_slots_per_page; struct page *page; @@ -1674,14 +1687,14 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) struct o2hb_disk_slot *slot; reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL); - if (reg->hr_tmp_block == NULL) - return -ENOMEM; + if (!reg->hr_tmp_block) + goto out; reg->hr_slots = kzalloc_objs(struct o2hb_disk_slot, reg->hr_blocks); - if (reg->hr_slots == NULL) - return -ENOMEM; + if (!reg->hr_slots) + goto out; - for(i = 0; i < reg->hr_blocks; i++) { + for (i = 0; i < reg->hr_blocks; i++) { slot = ®->hr_slots[i]; slot->ds_node_num = i; INIT_LIST_HEAD(&slot->ds_live_item); @@ -1695,12 +1708,12 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) reg->hr_slot_data = kzalloc_objs(struct page *, reg->hr_num_pages); if (!reg->hr_slot_data) - return -ENOMEM; + goto out; - for(i = 0; i < reg->hr_num_pages; i++) { + for (i = 0; i < reg->hr_num_pages; i++) { page = alloc_page(GFP_KERNEL); if (!page) - return -ENOMEM; + goto out; reg->hr_slot_data[i] = page; @@ -1720,6 +1733,10 @@ static int o2hb_map_slot_data(struct o2hb_region *reg) } return 0; + +out: + o2hb_unmap_slot_data(reg); + return ret; } /* Read in all the slots available and populate the tracking @@ -1809,9 +1826,11 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, "blocksize %u incorrect for device, expected %d", reg->hr_block_bytes, sectsize); ret = -EINVAL; - goto out3; + goto out; } + reg->hr_aborted_start = 0; + reg->hr_node_deleted = 0; o2hb_init_region_params(reg); /* Generation of zero is invalid */ @@ -1823,13 +1842,13 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, ret = o2hb_map_slot_data(reg); if (ret) { mlog_errno(ret); - goto out3; + goto out; } ret = o2hb_populate_slot_data(reg); if (ret) { mlog_errno(ret); - goto out3; + goto out; } INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); @@ -1860,7 +1879,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (IS_ERR(hb_task)) { ret = PTR_ERR(hb_task); mlog_errno(ret); - goto out3; + goto out; } spin_lock(&o2hb_live_lock); @@ -1877,12 +1896,12 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (reg->hr_aborted_start) { ret = -EIO; - goto out3; + goto out; } if (reg->hr_node_deleted) { ret = -EINVAL; - goto out3; + goto out; } /* Ok, we were woken. Make sure it wasn't by drop_item() */ @@ -1901,8 +1920,18 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n", config_item_name(®->hr_item), reg_bdev(reg)); -out3: +out: if (ret < 0) { + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); + + o2hb_unmap_slot_data(reg); + fput(reg->hr_bdev_file); reg->hr_bdev_file = NULL; } From 16c4f0211aaa1ec1422b11b59f64f1abe9009fc0 Mon Sep 17 00:00:00 2001 From: Yiyang Chen Date: Mon, 30 Mar 2026 03:00:40 +0800 Subject: [PATCH 110/127] taskstats: set version in TGID exit notifications delay accounting started populating taskstats records with a valid version field via fill_pid() and fill_tgid(). Later, commit ad4ecbcba728 ("[PATCH] delay accounting taskstats interface send tgid once") changed the TGID exit path to send the cached signal->stats aggregate directly instead of building the outgoing record through fill_tgid(). Unlike fill_tgid(), fill_tgid_exit() only accumulates accounting data and never initializes stats->version. As a result, TGID exit notifications can reach userspace with version == 0 even though PID exit notifications and TASKSTATS_CMD_GET replies carry a valid taskstats version. This is easy to reproduce with `tools/accounting/getdelays.c`. I have a small follow-up patch for that tool which: 1. increases the receive buffer/message size so the pid+tgid combined exit notification is not dropped/truncated 2. prints `stats->version`. With that patch, the reproducer is: Terminal 1: ./getdelays -d -v -l -m 0 Terminal 2: taskset -c 0 python3 -c 'import threading,time; t=threading.Thread(target=time.sleep,args=(0.1,)); t.start(); t.join()' That produces both PID and TGID exit notifications for the same process. The PID exit record reports a valid taskstats version, while the TGID exit record reports `version 0`. This patch (of 2): Set stats->version = TASKSTATS_VERSION after copying the cached TGID aggregate into the outgoing netlink payload so all taskstats records are self-describing again. Link: https://lkml.kernel.org/r/ba83d934e59edd431b693607de573eb9ca059309.1774810498.git.cyyzero16@gmail.com Fixes: ad4ecbcba728 ("[PATCH] delay accounting taskstats interface send tgid once") Signed-off-by: Yiyang Chen Cc: Balbir Singh Cc: Dr. Thomas Orgis Cc: Fan Yu Cc: Wang Yaxin Cc: Signed-off-by: Andrew Morton --- kernel/taskstats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 0cd680ccc7e5..73bd6a6a7893 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -649,6 +649,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) goto err; memcpy(stats, tsk->signal->stats, sizeof(*stats)); + stats->version = TASKSTATS_VERSION; send: send_cpu_listeners(rep_skb, listeners); From cc82b3dcc6a8fa259fbda12ab00d6fc00908a49e Mon Sep 17 00:00:00 2001 From: Yiyang Chen Date: Mon, 30 Mar 2026 03:00:41 +0800 Subject: [PATCH 111/127] tools/accounting: handle truncated taskstats netlink messages procacct and getdelays use a fixed receive buffer for taskstats generic netlink messages. A multi-threaded process exit can emit a single PID+TGID notification large enough to exceed that buffer on newer kernels. Switch to recvmsg() so MSG_TRUNC is detected explicitly, increase the message buffer size, and report truncated datagrams clearly instead of misparsing them as fatal netlink errors. Also print the taskstats version in debug output to make version mismatches easier to diagnose while inspecting taskstats traffic. Link: https://lkml.kernel.org/r/520308bb4cbbaf8dc2c7296b5f60f11e12fb30a5.1774810498.git.cyyzero16@gmail.com Signed-off-by: Yiyang Chen Cc: Balbir Singh Cc: Dr. Thomas Orgis Cc: Fan Yu Cc: Wang Yaxin Cc: Signed-off-by: Andrew Morton --- tools/accounting/getdelays.c | 41 ++++++++++++++++++++++++++++++++---- tools/accounting/procacct.c | 40 +++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 50792df27707..368a622ca027 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -60,7 +60,7 @@ int print_task_context_switch_counts; } /* Maximum size of response requested or message sent */ -#define MAX_MSG_SIZE 1024 +#define MAX_MSG_SIZE 2048 /* Maximum number of cpus expected to be specified in a cpumask */ #define MAX_CPUS 32 @@ -115,6 +115,32 @@ error: return -1; } +static int recv_taskstats_msg(int sd, struct msgtemplate *msg) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = msg, + .iov_len = sizeof(*msg), + }; + struct msghdr hdr = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int ret; + + ret = recvmsg(sd, &hdr, 0); + if (ret < 0) + return -1; + if (hdr.msg_flags & MSG_TRUNC) { + errno = EMSGSIZE; + return -1; + } + + return ret; +} + static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, __u8 genl_cmd, __u16 nla_type, @@ -633,12 +659,16 @@ int main(int argc, char *argv[]) } do { - rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + rep_len = recv_taskstats_msg(nl_sd, &msg); PRINTF("received %d bytes\n", rep_len); if (rep_len < 0) { - fprintf(stderr, "nonfatal reply error: errno %d\n", - errno); + if (errno == EMSGSIZE) + fprintf(stderr, + "dropped truncated taskstats netlink message, please increase MAX_MSG_SIZE\n"); + else + fprintf(stderr, "nonfatal reply error: errno %d\n", + errno); continue; } if (msg.n.nlmsg_type == NLMSG_ERROR || @@ -680,6 +710,9 @@ int main(int argc, char *argv[]) printf("TGID\t%d\n", rtid); break; case TASKSTATS_TYPE_STATS: + PRINTF("version %u\n", + ((struct taskstats *) + NLA_DATA(na))->version); if (print_delays) print_delayacct((struct taskstats *) NLA_DATA(na)); if (print_io_accounting) diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c index e8dee05a6264..46e5986ad927 100644 --- a/tools/accounting/procacct.c +++ b/tools/accounting/procacct.c @@ -71,7 +71,7 @@ int print_task_context_switch_counts; } /* Maximum size of response requested or message sent */ -#define MAX_MSG_SIZE 1024 +#define MAX_MSG_SIZE 2048 /* Maximum number of cpus expected to be specified in a cpumask */ #define MAX_CPUS 32 @@ -121,6 +121,32 @@ error: return -1; } +static int recv_taskstats_msg(int sd, struct msgtemplate *msg) +{ + struct sockaddr_nl nladdr; + struct iovec iov = { + .iov_base = msg, + .iov_len = sizeof(*msg), + }; + struct msghdr hdr = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + int ret; + + ret = recvmsg(sd, &hdr, 0); + if (ret < 0) + return -1; + if (hdr.msg_flags & MSG_TRUNC) { + errno = EMSGSIZE; + return -1; + } + + return ret; +} + static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, __u8 genl_cmd, __u16 nla_type, @@ -239,6 +265,8 @@ void handle_aggr(int mother, struct nlattr *na, int fd) PRINTF("TGID\t%d\n", rtid); break; case TASKSTATS_TYPE_STATS: + PRINTF("version %u\n", + ((struct taskstats *)NLA_DATA(na))->version); if (mother == TASKSTATS_TYPE_AGGR_PID) print_procacct((struct taskstats *) NLA_DATA(na)); if (fd) { @@ -347,12 +375,16 @@ int main(int argc, char *argv[]) } do { - rep_len = recv(nl_sd, &msg, sizeof(msg), 0); + rep_len = recv_taskstats_msg(nl_sd, &msg); PRINTF("received %d bytes\n", rep_len); if (rep_len < 0) { - fprintf(stderr, "nonfatal reply error: errno %d\n", - errno); + if (errno == EMSGSIZE) + fprintf(stderr, + "dropped truncated taskstats netlink message, please increase MAX_MSG_SIZE\n"); + else + fprintf(stderr, "nonfatal reply error: errno %d\n", + errno); continue; } if (msg.n.nlmsg_type == NLMSG_ERROR || From f758440d3d82f2e1804d7df281a64d9ad88b7f52 Mon Sep 17 00:00:00 2001 From: Taylor Nelms Date: Tue, 31 Mar 2026 14:15:09 -0400 Subject: [PATCH 112/127] checkpatch: exclude forward declarations of const structs Limit checkpatch warnings for normally-const structs by excluding patterns consistent with forward declarations. For example, the forward declaration `struct regmap_access_table;` in a header file currently generates a warning recommending that it is generally declared as const; however, this would apply a useless type qualifier in the empty declaration `const struct regmap_access_table;`, and subsequently generate compiler warnings. Link: https://lkml.kernel.org/r/20260331181509.1258693-1-tknelms@google.com Signed-off-by: Taylor Nelms Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 27a43a4d9c43..7e612d3e2c1a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -7512,10 +7512,10 @@ sub process { } # check for various structs that are normally const (ops, kgdb, device_tree) -# and avoid what seem like struct definitions 'struct foo {' +# and avoid what seem like struct definitions 'struct foo {' or forward declarations 'struct foo;' if (defined($const_structs) && $line !~ /\bconst\b/ && - $line =~ /\bstruct\s+($const_structs)\b(?!\s*\{)/) { + $line =~ /\bstruct\s+($const_structs)\b(?!\s*[\{;])/) { WARN("CONST_STRUCT", "struct $1 should normally be const\n" . $herecurr); } From 6110d18e208cc5572158928401246d98cd2b90b4 Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Wed, 1 Apr 2026 10:16:22 +0800 Subject: [PATCH 113/127] ocfs2: validate bg_list extent bounds in discontig groups [BUG] Running ocfs2 on a corrupted image with a discontiguous block group whose bg_list.l_next_free_rec is set to an excessively large value triggers a KASAN use-after-free crash: BUG: KASAN: use-after-free in ocfs2_bg_discontig_fix_by_rec fs/ocfs2/suballoc.c:1678 [inline] BUG: KASAN: use-after-free in ocfs2_bg_discontig_fix_result+0x4a4/0x560 fs/ocfs2/suballoc.c:1715 Read of size 4 at addr ffff88801a85f000 by task syz.0.115/552 Call Trace: ... __asan_report_load4_noabort+0x14/0x30 mm/kasan/report_generic.c:380 ocfs2_bg_discontig_fix_by_rec fs/ocfs2/suballoc.c:1678 [inline] ocfs2_bg_discontig_fix_result+0x4a4/0x560 fs/ocfs2/suballoc.c:1715 ocfs2_search_one_group fs/ocfs2/suballoc.c:1752 [inline] ocfs2_claim_suballoc_bits+0x13c3/0x1cd0 fs/ocfs2/suballoc.c:1984 ocfs2_claim_new_inode+0x2e7/0x8a0 fs/ocfs2/suballoc.c:2292 ocfs2_mknod_locked.constprop.0+0x121/0x2a0 fs/ocfs2/namei.c:637 ocfs2_mknod+0xc71/0x2400 fs/ocfs2/namei.c:384 ocfs2_create+0x158/0x390 fs/ocfs2/namei.c:676 lookup_open.isra.0+0x10a1/0x1460 fs/namei.c:3796 open_last_lookups fs/namei.c:3895 [inline] path_openat+0x11fe/0x2ce0 fs/namei.c:4131 do_filp_open+0x1f6/0x430 fs/namei.c:4161 do_sys_openat2+0x117/0x1c0 fs/open.c:1437 do_sys_open fs/open.c:1452 [inline] __do_sys_openat fs/open.c:1468 [inline] ... [CAUSE] ocfs2_bg_discontig_fix_result() iterates over bg->bg_list.l_recs[] using l_next_free_rec as the upper bound without any sanity check: for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) { rec = &bg->bg_list.l_recs[i]; l_next_free_rec is read directly from the on-disk group descriptor and is trusted blindly. On a 4 KiB block device, bg_list.l_recs[] can hold at most 235 entries (ocfs2_extent_recs_per_gd(sb)). A corrupted or crafted filesystem image can set l_next_free_rec to an arbitrarily large value, causing the loop to index past the end of the group descriptor buffer_head data page and into an adjacent freed page. [FIX] Validate discontiguous bg_list.l_count against ocfs2_extent_recs_per_gd(sb), then reject l_next_free_rec values that exceed l_count. This keeps the on-disk extent list self-consistent and matches how the rest of ocfs2 uses l_count as the extent-list bound. Link: https://lkml.kernel.org/r/20260401021622.3560952-1-gality369@gmail.com Signed-off-by: ZhengYuan Huang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/suballoc.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index bb98bd51338e..d284e0e37252 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -197,6 +197,31 @@ static int ocfs2_validate_gd_self(struct super_block *sb, 8 * le16_to_cpu(gd->bg_size)); } + /* + * For discontiguous block groups, validate the on-disk extent list + * against the maximum number of extent records that can physically + * fit in a single block. + */ + if (ocfs2_gd_is_discontig(gd)) { + u16 max_recs = ocfs2_extent_recs_per_gd(sb); + u16 l_count = le16_to_cpu(gd->bg_list.l_count); + u16 l_next_free_rec = le16_to_cpu(gd->bg_list.l_next_free_rec); + + if (l_count != max_recs) { + do_error("Group descriptor #%llu bad discontig l_count %u expected %u\n", + (unsigned long long)bh->b_blocknr, + l_count, + max_recs); + } + + if (l_next_free_rec > l_count) { + do_error("Group descriptor #%llu bad discontig l_next_free_rec %u max %u\n", + (unsigned long long)bh->b_blocknr, + l_next_free_rec, + l_count); + } + } + return 0; } From 496649d23db6e171cca42f7a5be1a2d0e779ecc7 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Wed, 1 Apr 2026 07:46:19 +0000 Subject: [PATCH 114/127] .get_maintainer.ignore: add Askar I don't want get_maintainer.pl to automatically print my email. Link: https://lkml.kernel.org/r/20260401074619.988459-1-safinaskar@gmail.com Signed-off-by: Askar Safin Signed-off-by: Andrew Morton --- .get_maintainer.ignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore index e8d2269bad9d..60b6b2a374cd 100644 --- a/.get_maintainer.ignore +++ b/.get_maintainer.ignore @@ -1,6 +1,7 @@ Alan Cox Alan Cox Alyssa Rosenzweig +Askar Safin Christoph Hellwig Jeff Kirsher Marc Gonzalez From 4a1c0ddc6e7bcf2e9db0eeaab9340dcfe97f448f Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Wed, 1 Apr 2026 17:23:03 +0800 Subject: [PATCH 115/127] ocfs2: handle invalid dinode in ocfs2_group_extend [BUG] kernel BUG at fs/ocfs2/resize.c:308! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI RIP: 0010:ocfs2_group_extend+0x10aa/0x1ae0 fs/ocfs2/resize.c:308 Code: 8b8520ff ffff83f8 860f8580 030000e8 5cc3c1fe Call Trace: ... ocfs2_ioctl+0x175/0x6e0 fs/ocfs2/ioctl.c:869 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x197/0x1e0 fs/ioctl.c:583 x64_sys_call+0x1144/0x26a0 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x93/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e ... [CAUSE] ocfs2_group_extend() assumes that the global bitmap inode block returned from ocfs2_inode_lock() has already been validated and BUG_ONs when the signature is not a dinode. That assumption is too strong for crafted filesystems because the JBD2-managed buffer path can bypass structural validation and return an invalid dinode to the resize ioctl. [FIX] Validate the dinode explicitly in ocfs2_group_extend(). If the global bitmap buffer does not contain a valid dinode, report filesystem corruption with ocfs2_error() and fail the resize operation instead of crashing the kernel. Link: https://lkml.kernel.org/r/20260401092303.3709187-1-gality369@gmail.com Fixes: 10995aa2451a ("ocfs2: Morph the haphazard OCFS2_IS_VALID_DINODE() checks.") Signed-off-by: ZhengYuan Huang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/resize.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index ac3ec2c21119..09724e7dc01b 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -303,9 +303,13 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) fe = (struct ocfs2_dinode *)main_bm_bh->b_data; - /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(), - * so any corruption is a code bug. */ - BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + /* JBD-managed buffers can bypass validation, so treat this as corruption. */ + if (!OCFS2_IS_VALID_DINODE(fe)) { + ret = ocfs2_error(main_bm_inode->i_sb, + "Invalid dinode #%llu\n", + (unsigned long long)OCFS2_I(main_bm_inode)->ip_blkno); + goto out_unlock; + } if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb, 0, From 7de554cabf160e331e4442e2a9ad874ca9875921 Mon Sep 17 00:00:00 2001 From: Tejas Bharambe Date: Fri, 10 Apr 2026 01:38:16 -0700 Subject: [PATCH 116/127] ocfs2: fix use-after-free in ocfs2_fault() when VM_FAULT_RETRY filemap_fault() may drop the mmap_lock before returning VM_FAULT_RETRY, as documented in mm/filemap.c: "If our return value has VM_FAULT_RETRY set, it's because the mmap_lock may be dropped before doing I/O or by lock_folio_maybe_drop_mmap()." When this happens, a concurrent munmap() can call remove_vma() and free the vm_area_struct via RCU. The saved 'vma' pointer in ocfs2_fault() then becomes a dangling pointer, and the subsequent trace_ocfs2_fault() call dereferences it -- a use-after-free. Fix this by saving ip_blkno as a plain integer before calling filemap_fault(), and removing vma from the trace event. Since ip_blkno is copied by value before the lock can be dropped, it remains valid regardless of what happens to the vma or inode afterward. Link: https://lkml.kernel.org/r/20260410083816.34951-1-tejas.bharambe@outlook.com Fixes: 614a9e849ca6 ("ocfs2: Remove FILE_IO from masklog.") Signed-off-by: Tejas Bharambe Reported-by: syzbot+a49010a0e8fcdeea075f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a49010a0e8fcdeea075f Suggested-by: Joseph Qi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/mmap.c | 7 +++---- fs/ocfs2/ocfs2_trace.h | 10 ++++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 50e2faf64c19..6c570157caf1 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -30,7 +30,8 @@ static vm_fault_t ocfs2_fault(struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; + unsigned long long ip_blkno = + OCFS2_I(file_inode(vmf->vma->vm_file))->ip_blkno; sigset_t oldset; vm_fault_t ret; @@ -38,11 +39,9 @@ static vm_fault_t ocfs2_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); ocfs2_unblock_signals(&oldset); - trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno, - vma, vmf->page, vmf->pgoff); + trace_ocfs2_fault(ip_blkno, vmf->page, vmf->pgoff); return ret; } - static vm_fault_t __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, struct folio *folio) { diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 4b32fb5658ad..6c2c97a9804f 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1246,22 +1246,20 @@ TRACE_EVENT(ocfs2_write_end_inline, TRACE_EVENT(ocfs2_fault, TP_PROTO(unsigned long long ino, - void *area, void *page, unsigned long pgoff), - TP_ARGS(ino, area, page, pgoff), + void *page, unsigned long pgoff), + TP_ARGS(ino, page, pgoff), TP_STRUCT__entry( __field(unsigned long long, ino) - __field(void *, area) __field(void *, page) __field(unsigned long, pgoff) ), TP_fast_assign( __entry->ino = ino; - __entry->area = area; __entry->page = page; __entry->pgoff = pgoff; ), - TP_printk("%llu %p %p %lu", - __entry->ino, __entry->area, __entry->page, __entry->pgoff) + TP_printk("%llu %p %lu", + __entry->ino, __entry->page, __entry->pgoff) ); /* End of trace events for fs/ocfs2/mmap.c. */ From 775c17386a6fd695f999d4cda90e3931386570dd Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 3 Apr 2026 17:08:00 +0800 Subject: [PATCH 117/127] ocfs2: validate dx_root extent list fields during block read Patch series "ocfs2: consolidate extent list validation into block read callbacks". ocfs2 validates extent list fields (l_count, l_next_free_rec) at various points during extent tree traversal. This is fragile because each caller must remember to check for corrupted on-disk data before using it. This series moves those checks into the block read validation callbacks (ocfs2_validate_dx_root and ocfs2_validate_extent_block), so corrupted fields are caught early at block read time. Redundant post-read checks are then removed. This patch (of 4): Move the extent list l_count validation from ocfs2_dx_dir_lookup_rec() into ocfs2_validate_dx_root(), so that corrupted on-disk fields are caught early at block read time rather than during directory lookups. Additionally, add a l_next_free_rec <= l_count check to prevent out-of-bounds access when iterating over extent records. Both checks are skipped for inline dx roots (OCFS2_DX_FLAG_INLINE), which use dr_entries instead of dr_list. Link: https://lkml.kernel.org/r/20260403090803.3860971-1-joseph.qi@linux.alibaba.com Link: https://lkml.kernel.org/r/20260403090803.3860971-2-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 1c8abf2c592c..82e720c8ba32 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -593,7 +593,7 @@ static int ocfs2_validate_dx_root(struct super_block *sb, mlog(ML_ERROR, "Checksum failed for dir index root block %llu\n", (unsigned long long)bh->b_blocknr); - return ret; + goto bail; } if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) { @@ -601,8 +601,32 @@ static int ocfs2_validate_dx_root(struct super_block *sb, "Dir Index Root # %llu has bad signature %.*s\n", (unsigned long long)le64_to_cpu(dx_root->dr_blkno), 7, dx_root->dr_signature); + goto bail; } + if (!(dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)) { + struct ocfs2_extent_list *el = &dx_root->dr_list; + + if (le16_to_cpu(el->l_count) != ocfs2_extent_recs_per_dx_root(sb)) { + ret = ocfs2_error(sb, + "Dir Index Root # %llu has invalid l_count %u (expected %u)\n", + (unsigned long long)le64_to_cpu(dx_root->dr_blkno), + le16_to_cpu(el->l_count), + ocfs2_extent_recs_per_dx_root(sb)); + goto bail; + } + + if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { + ret = ocfs2_error(sb, + "Dir Index Root # %llu has invalid l_next_free_rec %u (l_count %u)\n", + (unsigned long long)le64_to_cpu(dx_root->dr_blkno), + le16_to_cpu(el->l_next_free_rec), + le16_to_cpu(el->l_count)); + goto bail; + } + } + +bail: return ret; } @@ -791,14 +815,6 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, struct ocfs2_extent_block *eb; struct ocfs2_extent_rec *rec = NULL; - if (le16_to_cpu(el->l_count) != - ocfs2_extent_recs_per_dx_root(inode->i_sb)) { - ret = ocfs2_error(inode->i_sb, - "Inode %lu has invalid extent list length %u\n", - inode->i_ino, le16_to_cpu(el->l_count)); - goto out; - } - if (el->l_tree_depth) { ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash, &eb_bh); From 4ae9cca37e328637cf837ac73f6b8b529f4a918d Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 3 Apr 2026 17:08:01 +0800 Subject: [PATCH 118/127] ocfs2: remove empty extent list check in ocfs2_dx_dir_lookup_rec() The full extent list check is introduced by commit 44acc46d182f, which is to avoid NULL pointer dereference if a dirent is not found. Reworking the error message to not reference rec. Instead, report major_hash being looked up and l_next_free_rec, which naturally covers both failure cases (empty extent list and no matching record) without needing a separate l_next_free_rec == 0 guard. Link: https://lkml.kernel.org/r/20260403090803.3860971-3-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 82e720c8ba32..d94595a49923 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -835,14 +835,6 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, } } - if (le16_to_cpu(el->l_next_free_rec) == 0) { - ret = ocfs2_error(inode->i_sb, - "Inode %lu has empty extent list at depth %u\n", - inode->i_ino, - le16_to_cpu(el->l_tree_depth)); - goto out; - } - found = 0; for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { rec = &el->l_recs[i]; @@ -855,10 +847,9 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, if (!found) { ret = ocfs2_error(inode->i_sb, - "Inode %lu has bad extent record (%u, %u, 0) in btree\n", - inode->i_ino, - le32_to_cpu(rec->e_cpos), - ocfs2_rec_clusters(el, rec)); + "Inode %lu has no extent record for hash %u in btree (next_free_rec %u)\n", + inode->i_ino, major_hash, + le16_to_cpu(el->l_next_free_rec)); goto out; } From af5e456c0b1930546c39cb785d120c2d54268d9c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 3 Apr 2026 17:08:02 +0800 Subject: [PATCH 119/127] ocfs2: validate extent block list fields during block read Add extent list validation to ocfs2_validate_extent_block() so that corrupted on-disk fields are caught early at block read time rather than during extent tree traversal. Two checks are added: - l_count must equal the expected value from ocfs2_extent_recs_per_eb(), catching blocks with a corrupted record count before any array iteration. - l_next_free_rec must not exceed l_count, preventing out-of-bounds access when iterating over extent records. Link: https://lkml.kernel.org/r/20260403090803.3860971-4-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 344fd4d95fbc..8639806bcbb8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -917,11 +917,32 @@ static int ocfs2_validate_extent_block(struct super_block *sb, goto bail; } - if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { rc = ocfs2_error(sb, "Extent block #%llu has an invalid h_fs_generation of #%u\n", (unsigned long long)bh->b_blocknr, le32_to_cpu(eb->h_fs_generation)); + goto bail; + } + + if (le16_to_cpu(eb->h_list.l_count) != ocfs2_extent_recs_per_eb(sb)) { + rc = ocfs2_error(sb, + "Extent block #%llu has invalid l_count %u (expected %u)\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(eb->h_list.l_count), + ocfs2_extent_recs_per_eb(sb)); + goto bail; + } + + if (le16_to_cpu(eb->h_list.l_next_free_rec) > le16_to_cpu(eb->h_list.l_count)) { + rc = ocfs2_error(sb, + "Extent block #%llu has invalid l_next_free_rec %u (l_count %u)\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(eb->h_list.l_next_free_rec), + le16_to_cpu(eb->h_list.l_count)); + goto bail; + } + bail: return rc; } From 510a75028707645ecd606b18c6ca98e6834f9c14 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 3 Apr 2026 17:08:03 +0800 Subject: [PATCH 120/127] ocfs2: remove redundant l_next_free_rec check in __ocfs2_find_path() The l_next_free_rec > l_count check after ocfs2_read_extent_block() in __ocfs2_find_path() is now redundant, as ocfs2_validate_extent_block() already performs this validation at block read time. Remove the duplicate check to avoid maintaining the same validation in two places. Link: https://lkml.kernel.org/r/20260403090803.3860971-5-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Heming Zhao Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 8639806bcbb8..08e137928080 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1878,18 +1878,6 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (le16_to_cpu(el->l_next_free_rec) > - le16_to_cpu(el->l_count)) { - ocfs2_error(ocfs2_metadata_cache_get_super(ci), - "Owner %llu has bad count in extent list at block %llu (next free=%u, count=%u)\n", - (unsigned long long)ocfs2_metadata_cache_owner(ci), - (unsigned long long)bh->b_blocknr, - le16_to_cpu(el->l_next_free_rec), - le16_to_cpu(el->l_count)); - ret = -EROFS; - goto out; - } - if (func) func(data, bh); } From d647c5b2fbf81560818dacade360abc8c00a9665 Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Thu, 2 Apr 2026 21:43:27 +0800 Subject: [PATCH 121/127] ocfs2: split transactions in dio completion to avoid credit exhaustion During ocfs2 dio operations, JBD2 may report warnings via following call trace: ocfs2_dio_end_io_write ocfs2_mark_extent_written ocfs2_change_extent_flag ocfs2_split_extent ocfs2_try_to_merge_extent ocfs2_extend_rotate_transaction ocfs2_extend_trans jbd2__journal_restart start_this_handle output: JBD2: kworker/6:2 wants too many credits credits:5450 rsv_credits:0 max:5449 To prevent exceeding the credits limit, modify ocfs2_dio_end_io_write() to handle extents in a batch of transaction. Additionally, relocate ocfs2_del_inode_from_orphan(). The orphan inode should only be removed from the orphan list after the extent tree update is complete. This ensures that if a crash occurs in the middle of extent tree updates, we won't leave stale blocks beyond EOF. This patch also changes the logic for updating the inode size and removing orphan, making it similar to ext4_dio_write_end_io(). Both operations are performed only when everything looks good. Finally, thanks to Jans and Joseph for providing the bug fix prototype and suggestions. Link: https://lkml.kernel.org/r/20260402134328.27334-2-heming.zhao@suse.com Signed-off-by: Heming Zhao Suggested-by: Jan Kara Suggested-by: Joseph Qi Reviewed-by: Jan Kara Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 74 ++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 09146b43d1f0..c6dbec1693b1 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -37,6 +37,8 @@ #include "namei.h" #include "sysfile.h" +#define OCFS2_DIO_MARK_EXTENT_BATCH 200 + static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2277,7 +2279,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; loff_t end = offset + bytes; - int ret = 0, credits = 0; + int ret = 0, credits = 0, batch = 0; ocfs2_init_dealloc_ctxt(&dealloc); @@ -2294,18 +2296,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, goto out; } - /* Delete orphan before acquire i_rwsem. */ - if (dwc->dw_orphaned) { - BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); - - end = end > i_size_read(inode) ? end : 0; - - ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, - !!end, end); - if (ret < 0) - mlog_errno(ret); - } - down_write(&oi->ip_alloc_sem); di = (struct ocfs2_dinode *)di_bh->b_data; @@ -2326,24 +2316,25 @@ static int ocfs2_dio_end_io_write(struct inode *inode, credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto unlock; - } - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto commit; - } - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { + if (!handle) { + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto unlock; + } + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto commit; + } + } ret = ocfs2_assure_trans_credits(handle, credits); if (ret < 0) { mlog_errno(ret); - break; + goto commit; } ret = ocfs2_mark_extent_written(inode, &et, handle, ue->ue_cpos, 1, @@ -2351,19 +2342,44 @@ static int ocfs2_dio_end_io_write(struct inode *inode, meta_ac, &dealloc); if (ret < 0) { mlog_errno(ret); - break; + goto commit; + } + + if (++batch == OCFS2_DIO_MARK_EXTENT_BATCH) { + ocfs2_commit_trans(osb, handle); + handle = NULL; + batch = 0; } } if (end > i_size_read(inode)) { + if (!handle) { + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto unlock; + } + } ret = ocfs2_set_inode_size(handle, inode, di_bh, end); if (ret < 0) mlog_errno(ret); } + commit: - ocfs2_commit_trans(osb, handle); + if (handle) + ocfs2_commit_trans(osb, handle); unlock: up_write(&oi->ip_alloc_sem); + + /* everything looks good, let's start the cleanup */ + if (!ret && dwc->dw_orphaned) { + BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); + + ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 0, 0); + if (ret < 0) + mlog_errno(ret); + } ocfs2_inode_unlock(inode, 1); brelse(di_bh); out: From 6c9340a2ff2b32cc4477cc6c2a969855195bf9a3 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 5 Apr 2026 16:47:20 +0100 Subject: [PATCH 122/127] ocfs2: use get_random_u32() where appropriate Use the typed random integer helpers instead of get_random_bytes() when filling a single integer variable. The helpers return the value directly, require no pointer or size argument, and better express intent. Link: https://lkml.kernel.org/r/20260405154720.4732-1-devnexen@gmail.com Signed-off-by: David Carlier Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d7c58fd7d438..b875f01c9756 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2124,7 +2124,7 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->osb_cluster_stack[0] = '\0'; } - get_random_bytes(&osb->s_next_generation, sizeof(u32)); + osb->s_next_generation = get_random_u32(); /* * FIXME From fb9907939b01338c26dbe17951628e4974470548 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 7 Apr 2026 12:47:21 -0400 Subject: [PATCH 123/127] update Sean's email address Soon I will no longer be working at SECO. Update the mailmap to redirect to my linux.dev address which I still have access to. Link: https://lkml.kernel.org/r/20260407164722.211610-1-sean.anderson@linux.dev Signed-off-by: Sean Anderson Acked-by: Krzysztof Kozlowski Cc: Sean Anderson Cc: Conor Dooley Cc: Daniel Lezcano Cc: Rob Herring Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- .mailmap | 1 + Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml | 2 +- MAINTAINERS | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index e23a69a9a095..6b3833efa00d 100644 --- a/.mailmap +++ b/.mailmap @@ -740,6 +740,7 @@ Sathishkumar Muruganandam Satya Priya S.Çağlar Onur Sayali Lokhande +Sean Anderson Sean Christopherson Sean Nyekjaer Sean Tranchetti diff --git a/Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml b/Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml index b1597db04263..3538eafff6b1 100644 --- a/Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml +++ b/Documentation/devicetree/bindings/timer/xlnx,xps-timer.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx LogiCORE IP AXI Timer maintainers: - - Sean Anderson + - Sean Anderson properties: compatible: diff --git a/MAINTAINERS b/MAINTAINERS index 83e3e87aa053..8357cb3d9023 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10238,7 +10238,7 @@ F: drivers/net/ethernet/freescale/dpaa FREESCALE QORIQ DPAA FMAN DRIVER M: Madalin Bucur -R: Sean Anderson +R: Sean Anderson L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl,fman*.yaml @@ -28899,7 +28899,7 @@ S: Orphan F: drivers/net/ethernet/xilinx/ll_temac* XILINX PWM DRIVER -M: Sean Anderson +M: Sean Anderson S: Maintained F: drivers/pwm/pwm-xilinx.c F: include/clocksource/timer-xilinx.h From 5cc6421aaad34ba3b0830c3fcc4a6845648043d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 8 Apr 2026 14:35:21 -0700 Subject: [PATCH 124/127] doc: watchdog: fix typos etc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct typos in lockup-watchdogs.rst. Link: https://lkml.kernel.org/r/20260408213523.2707947-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: Shuah Khan Cc: Björn Persson Signed-off-by: Andrew Morton --- Documentation/admin-guide/lockup-watchdogs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst index 26634982e4dd..8f245f4a95b7 100644 --- a/Documentation/admin-guide/lockup-watchdogs.rst +++ b/Documentation/admin-guide/lockup-watchdogs.rst @@ -41,7 +41,7 @@ is a trade-off between fast response to lockups and detection overhead. Implementation ============== -The soft and hard lockup detectors are built around a hrtimer. +The soft and hard lockup detectors are built around an hrtimer. In addition, the softlockup detector regularly schedules a job, and the hard lockup detector might use Perf/NMI events on architectures that support it. @@ -49,7 +49,7 @@ that support it. Frequency and Heartbeats ------------------------ -The core of the detectors in a hrtimer. It servers multiple purpose: +The core of the detectors is an hrtimer. It serves multiple purposes: - schedules watchdog job for the softlockup detector - bumps the interrupt counter for hardlockup detectors (heartbeat) From d12f558e6200b3f47dbef9331ed6d115d2410e59 Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Fri, 10 Apr 2026 12:03:39 +0800 Subject: [PATCH 125/127] ocfs2: fix listxattr handling when the buffer is full [BUG] If an OCFS2 inode has both inline and block-based xattrs, listxattr() can return a size larger than the caller's buffer when the inline names consume that buffer exactly. kernel BUG at mm/usercopy.c:102! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI RIP: 0010:usercopy_abort+0xb7/0xd0 mm/usercopy.c:102 Call Trace: __check_heap_object+0xe3/0x120 mm/slub.c:8243 check_heap_object mm/usercopy.c:196 [inline] __check_object_size mm/usercopy.c:250 [inline] __check_object_size+0x5c5/0x780 mm/usercopy.c:215 check_object_size include/linux/ucopysize.h:22 [inline] check_copy_size include/linux/ucopysize.h:59 [inline] copy_to_user include/linux/uaccess.h:219 [inline] listxattr+0xb0/0x170 fs/xattr.c:926 filename_listxattr fs/xattr.c:958 [inline] path_listxattrat+0x137/0x320 fs/xattr.c:988 __do_sys_listxattr fs/xattr.c:1001 [inline] __se_sys_listxattr fs/xattr.c:998 [inline] __x64_sys_listxattr+0x7f/0xd0 fs/xattr.c:998 ... [CAUSE] Commit 936b8834366e ("ocfs2: Refactor xattr list and remove ocfs2_xattr_handler().") replaced the old per-handler list accounting with ocfs2_xattr_list_entry(), but it kept using size == 0 to detect probe mode. That assumption stops being true once ocfs2_listxattr() finishes the inline-xattr pass. If the inline names fill the caller buffer exactly, the block-xattr pass runs with a non-NULL buffer and a remaining size of zero. ocfs2_xattr_list_entry() then skips the bounds check, keeps counting block names, and returns a positive size larger than the supplied buffer. [FIX] Detect probe mode by testing whether the destination buffer pointer is NULL instead of whether the remaining size is zero. That restores the pre-refactor behavior and matches the OCFS2 getxattr helpers. Once the remaining buffer reaches zero while more names are left, the block-xattr pass now returns -ERANGE instead of reporting a size larger than the allocated list buffer. Link: https://lkml.kernel.org/r/20260410040339.3837162-1-gality369@gmail.com Fixes: 936b8834366e ("ocfs2: Refactor xattr list and remove ocfs2_xattr_handler().") Signed-off-by: ZhengYuan Huang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 42ee5db362d3..b9a6bdbf596c 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -911,8 +911,8 @@ static int ocfs2_xattr_list_entry(struct super_block *sb, total_len = prefix_len + name_len + 1; *result += total_len; - /* we are just looking for how big our buffer needs to be */ - if (!size) + /* No buffer means we are only looking for the required size. */ + if (!buffer) return 0; if (*result > size) From 8f687eeed3da3012152b0f9473f578869de0cd7b Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Fri, 10 Apr 2026 11:42:20 +0800 Subject: [PATCH 126/127] ocfs2: validate bg_bits during freefrag scan [BUG] A crafted filesystem can trigger an out-of-bounds bitmap walk when OCFS2_IOC_INFO is issued with OCFS2_INFO_FL_NON_COHERENT. BUG: KASAN: use-after-free in instrument_atomic_read include/linux/instrumented.h:68 [inline] BUG: KASAN: use-after-free in _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] BUG: KASAN: use-after-free in test_bit_le include/asm-generic/bitops/le.h:21 [inline] BUG: KASAN: use-after-free in ocfs2_info_freefrag_scan_chain fs/ocfs2/ioctl.c:495 [inline] BUG: KASAN: use-after-free in ocfs2_info_freefrag_scan_bitmap fs/ocfs2/ioctl.c:588 [inline] BUG: KASAN: use-after-free in ocfs2_info_handle_freefrag fs/ocfs2/ioctl.c:662 [inline] BUG: KASAN: use-after-free in ocfs2_info_handle_request+0x1c66/0x3370 fs/ocfs2/ioctl.c:754 Read of size 8 at addr ffff888031bce000 by task syz.0.636/1435 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xbe/0x130 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xd1/0x650 mm/kasan/report.c:482 kasan_report+0xfb/0x140 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:186 [inline] kasan_check_range+0x11c/0x200 mm/kasan/generic.c:200 __kasan_check_read+0x11/0x20 mm/kasan/shadow.c:31 instrument_atomic_read include/linux/instrumented.h:68 [inline] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] test_bit_le include/asm-generic/bitops/le.h:21 [inline] ocfs2_info_freefrag_scan_chain fs/ocfs2/ioctl.c:495 [inline] ocfs2_info_freefrag_scan_bitmap fs/ocfs2/ioctl.c:588 [inline] ocfs2_info_handle_freefrag fs/ocfs2/ioctl.c:662 [inline] ocfs2_info_handle_request+0x1c66/0x3370 fs/ocfs2/ioctl.c:754 ocfs2_info_handle+0x18d/0x2a0 fs/ocfs2/ioctl.c:828 ocfs2_ioctl+0x632/0x6e0 fs/ocfs2/ioctl.c:913 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x197/0x1e0 fs/ioctl.c:583 ... [CAUSE] ocfs2_info_freefrag_scan_chain() uses on-disk bg_bits directly as the bitmap scan limit. The coherent path reads group descriptors through ocfs2_read_group_descriptor(), which validates the descriptor before use. The non-coherent path uses ocfs2_read_blocks_sync() instead and skips that validation, so an impossible bg_bits value can drive the bitmap walk past the end of the block. [FIX] Compute the bitmap capacity from the filesystem format with ocfs2_group_bitmap_size(), report descriptors whose bg_bits exceeds that limit, and clamp the scan to the computed capacity. This keeps the freefrag report going while avoiding reads beyond the buffer. Link: https://lkml.kernel.org/r/20260410034220.3825769-1-gality369@gmail.com Fixes: d24a10b9f8ed ("Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl.") Signed-off-by: ZhengYuan Huang Reviewed-by: Heming Zhao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/ioctl.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index bfed0fb35f9b..cbe59d231666 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -441,13 +441,16 @@ static int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, struct buffer_head *bh = NULL; struct ocfs2_group_desc *bg = NULL; - unsigned int max_bits, num_clusters; + unsigned int max_bits, max_bitmap_bits, num_clusters; unsigned int offset = 0, cluster, chunk; unsigned int chunk_free, last_chunksize = 0; if (!le32_to_cpu(rec->c_free)) goto bail; + max_bitmap_bits = 8 * ocfs2_group_bitmap_size(osb->sb, 0, + osb->s_feature_incompat); + do { if (!bg) blkno = le64_to_cpu(rec->c_blkno); @@ -479,6 +482,19 @@ static int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, continue; max_bits = le16_to_cpu(bg->bg_bits); + + /* + * Non-coherent scans read raw blocks and do not get the + * bg_bits validation from + * ocfs2_read_group_descriptor(). + */ + if (max_bits > max_bitmap_bits) { + mlog(ML_ERROR, + "Group desc #%llu has %u bits, max bitmap bits %u\n", + (unsigned long long)blkno, max_bits, max_bitmap_bits); + max_bits = max_bitmap_bits; + } + offset = 0; for (chunk = 0; chunk < chunks_in_group; chunk++) { From 70b672833f4025341c11b22c7f83778a5cd611bc Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Fri, 10 Apr 2026 10:02:08 +0800 Subject: [PATCH 127/127] ocfs2: validate group add input before caching [BUG] OCFS2_IOC_GROUP_ADD can trigger a BUG_ON in ocfs2_set_new_buffer_uptodate(): kernel BUG at fs/ocfs2/uptodate.c:509! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI RIP: 0010:ocfs2_set_new_buffer_uptodate+0x194/0x1e0 fs/ocfs2/uptodate.c:509 Code: ffffe88f 42b9fe4c 89e64889 dfe8b4df Call Trace: ocfs2_group_add+0x3f1/0x1510 fs/ocfs2/resize.c:507 ocfs2_ioctl+0x309/0x6e0 fs/ocfs2/ioctl.c:887 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl fs/ioctl.c:583 [inline] __x64_sys_ioctl+0x197/0x1e0 fs/ioctl.c:583 x64_sys_call+0x1144/0x26a0 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x93/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7bbfb55a966d [CAUSE] ocfs2_group_add() calls ocfs2_set_new_buffer_uptodate() on a user-controlled group block before ocfs2_verify_group_and_input() validates that block number. That helper is only valid for newly allocated metadata and asserts that the block is not already present in the chosen metadata cache. The code also uses INODE_CACHE(inode) even though the group descriptor belongs to main_bm_inode and later journal accesses use that cache context instead. [FIX] Validate the on-disk group descriptor before caching it, then add it to the metadata cache tracked by INODE_CACHE(main_bm_inode). Keep the validation failure path separate from the later cleanup path so we only remove the buffer from that cache after it has actually been inserted. This keeps the group buffer lifetime consistent across validation, journaling, and cleanup. Link: https://lkml.kernel.org/r/20260410020209.3786348-1-gality369@gmail.com Fixes: 7909f2bf8353 ("[PATCH 2/2] ocfs2: Implement group add for online resize") Signed-off-by: ZhengYuan Huang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Heming Zhao Signed-off-by: Andrew Morton --- fs/ocfs2/resize.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 09724e7dc01b..6375d5035972 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -508,14 +508,14 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) goto out_unlock; } - ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh); - ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); if (ret) { mlog_errno(ret); goto out_free_group_bh; } + ocfs2_set_new_buffer_uptodate(INODE_CACHE(main_bm_inode), group_bh); + trace_ocfs2_group_add((unsigned long long)input->group, input->chain, input->clusters, input->frees); @@ -523,7 +523,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; - goto out_free_group_bh; + goto out_remove_cache; } cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); @@ -577,9 +577,11 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) out_commit: ocfs2_commit_trans(osb, handle); -out_free_group_bh: +out_remove_cache: if (ret < 0) - ocfs2_remove_from_cache(INODE_CACHE(inode), group_bh); + ocfs2_remove_from_cache(INODE_CACHE(main_bm_inode), group_bh); + +out_free_group_bh: brelse(group_bh); out_unlock: