diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index b5cdbba3ec2e..4d886e7c7a95 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -2787,6 +2787,13 @@ which avoids the read-side memory barriers, at least for architectures that apply noinstr to kernel entry/exit code (or that build with ``CONFIG_TASKS_TRACE_RCU_NO_MB=y``. +Now that the implementation is based on SRCU-fast, a call +to synchronize_rcu_tasks_trace() implies at least one call to +synchronize_rcu(), that is, every Tasks Trace RCU grace period contains +at least one plain vanilla RCU grace period. Should there ever +be a synchronize_rcu_tasks_trace_expedited(), this guarantee would +*not* necessarily apply to this hypothetical API member. + The tasks-trace-RCU API is also reasonably compact, consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(), rcu_read_lock_trace_held(), call_rcu_tasks_trace(), diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 04f3f86a4145..18a85c30fd4f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -208,12 +208,9 @@ static inline void exit_tasks_rcu_finish(void) { } /** * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? * - * As an accident of implementation, an RCU Tasks Trace grace period also - * acts as an RCU grace period. However, this could change at any time. - * Code relying on this accident must call this function to verify that - * this accident is still happening. - * - * You have been warned! + * Now that RCU Tasks Trace is implemented in terms of SRCU-fast, a + * call to synchronize_rcu_tasks_trace() is guaranteed to imply at least + * one call to synchronize_rcu(). */ static inline bool rcu_trace_implies_rcu_gp(void) { return true; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a5d3dbc9cdf..ffb2ad9716f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -949,6 +949,10 @@ struct task_struct { struct srcu_ctr __percpu *trc_reader_scp; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + int rcu_trivial_preempt_nesting; +#endif /* #ifdef CONFIG_TRIVIAL_PREEMPT_RCU */ + struct sched_info sched_info; struct list_head tasks; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bb44a0bd7696..81b1938512d5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -69,8 +69,8 @@ int init_srcu_struct_fast_updown(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). // 0x4 // SRCU-lite is no longer with us. -#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(). -#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast(). +#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(), also NMI-safe. +#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast_updown(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ SRCU_READ_FLAVOR_FAST | SRCU_READ_FLAVOR_FAST_UPDOWN) // All of the above. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index be76fa4fc170..fd1a9270cb9a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -260,7 +260,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because + * critical sections either because they disable interrupts, because * they are a single instruction, or because they are read-modify-write * atomic operations, depending on the whims of the architecture. * This matters because the SRCU-fast grace-period mechanism uses either diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 625d75392647..e078e988773d 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE This has no value for production and is only for testing. +config TRIVIAL_PREEMPT_RCU + bool "Textbook trivial preemptible RCU in rcutorture" + depends on RCU_EXPERT && RCU_TORTURE_TEST + default n + help + This option enables a textbook preemptible RCU that is + implemented in rcutorture. Its sole purpose is to validate + code used in books, papers, and presentations. + + This has no value for production and is only for testing. + endmenu # "RCU Debugging" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9b10b57b79ad..fa6d30ce73d1 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v); static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU +void synchronize_rcu_trivial_preempt(void); +#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 4ac2b134a983..ac0b1c6b7dae 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -79,12 +79,6 @@ MODULE_AUTHOR("Paul E. McKenney "); * test-end checks, and the pair of calls through pointers. */ -#ifdef MODULE -# define RCUSCALE_SHUTDOWN 0 -#else -# define RCUSCALE_SHUTDOWN 1 -#endif - torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); @@ -92,8 +86,8 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, minruntime, 0, "Minimum run time (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, nwriters, -1, "Number of RCU updater threads"); -torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); @@ -123,7 +117,6 @@ static int nrealreaders; static int nrealwriters; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; -static struct task_struct *shutdown_task; static u64 **writer_durations; static bool *writer_done; @@ -132,7 +125,6 @@ static int *writer_n_durations; static atomic_t n_rcu_scale_reader_started; static atomic_t n_rcu_scale_writer_started; static atomic_t n_rcu_scale_writer_finished; -static wait_queue_head_t shutdown_wq; static u64 t_rcu_scale_writer_started; static u64 t_rcu_scale_writer_finished; static unsigned long b_rcu_gp_test_started; @@ -519,6 +511,8 @@ static void rcu_scale_async_cb(struct rcu_head *rhp) rcu_scale_free(wmbp); } +static void rcu_scale_cleanup(void); + /* * RCU scale writer kthread. Repeatedly does a grace period. */ @@ -622,9 +616,11 @@ rcu_scale_writer(void *arg) b_rcu_gp_test_finished = cur_ops->get_gp_seq(); } - if (shutdown) { + if (shutdown_secs) { + writer_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + rcu_scale_cleanup(); + kernel_power_off(); } } } @@ -668,8 +664,8 @@ static void rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n", - scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown); + "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n", + scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs); } /* @@ -722,6 +718,8 @@ static void kfree_call_rcu(struct rcu_head *rh) kfree(obj); } +static void kfree_scale_cleanup(void); + static int kfree_scale_thread(void *arg) { @@ -791,9 +789,11 @@ kfree_scale_thread(void *arg) rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), PAGES_TO_MB(mem_begin - mem_during)); - if (shutdown) { + if (shutdown_secs) { + kfree_reader_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + kfree_scale_cleanup(); + kernel_power_off(); } } @@ -820,22 +820,6 @@ kfree_scale_cleanup(void) torture_cleanup_end(); } -/* - * shutdown kthread. Just waits to be awakened, then shuts down system. - */ -static int -kfree_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, - atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); - - smp_mb(); /* Wake before output. */ - - kfree_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - // Used if doing RCU-kfree'ing via call_rcu(). static unsigned long jiffies_at_lazy_cb; static struct rcu_head lazy_test1_rh; @@ -895,13 +879,10 @@ kfree_scale_init(void) kfree_nrealthreads = compute_real(kfree_nthreads); /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n", @@ -1058,20 +1039,6 @@ rcu_scale_cleanup(void) torture_cleanup_end(); } -/* - * RCU scalability shutdown kthread. Just waits to be awakened, then shuts - * down system. - */ -static int -rcu_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); - smp_mb(); /* Wake before output. */ - rcu_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - static int __init rcu_scale_init(void) { @@ -1121,13 +1088,10 @@ rcu_scale_init(void) /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders); if (reader_tasks == NULL) { @@ -1201,7 +1165,7 @@ rcu_scale_init(void) unwind: torture_init_end(); rcu_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST)); kernel_power_off(); } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8a9282a0245c..5f2848b828dc 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -842,7 +842,14 @@ static unsigned long srcu_torture_completed(void) static void srcu_torture_deferred_free(struct rcu_torture *rp) { + unsigned long flags; + bool lockit = jiffies & 0x1; + + if (lockit) + raw_spin_lock_irqsave(¤t->pi_lock, flags); call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); + if (lockit) + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } static void srcu_torture_synchronize(void) @@ -1061,6 +1068,61 @@ static struct rcu_torture_ops trivial_ops = { .name = "trivial" }; +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +/* + * Definitions for trivial CONFIG_PREEMPT=y torture testing. This + * implementation does not work well with large numbers of tasks or with + * long-term preemption. Either or both get you RCU CPU stall warnings. + */ + +static void rcu_sync_torture_init_trivial_preempt(void) +{ + rcu_sync_torture_init(); + if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) { + onoff_interval = 0; + shuffle_interval = 0; + } +} + +static int rcu_torture_read_lock_trivial_preempt(void) +{ + struct task_struct *t = current; + + WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1); + smp_mb(); + return 0; +} + +static void rcu_torture_read_unlock_trivial_preempt(int idx) +{ + struct task_struct *t = current; + + smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1); +} + +static struct rcu_torture_ops trivial_preempt_ops = { + .ttype = RCU_TRIVIAL_FLAVOR, + .init = rcu_sync_torture_init_trivial_preempt, + .readlock = rcu_torture_read_lock_trivial_preempt, + .read_delay = rcu_read_delay, // just reuse rcu's version. + .readunlock = rcu_torture_read_unlock_trivial_preempt, + .readlock_held = torture_readlock_not_held, + .get_gp_seq = rcu_no_completed, + .sync = synchronize_rcu_trivial_preempt, + .exp_sync = synchronize_rcu_trivial_preempt, + .irq_capable = 0, // In theory it should be, but let's keep it trivial. + .name = "trivial-preempt" +}; + +#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops, + +#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +#define TRIVIAL_PREEMPT_OPS + +#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #ifdef CONFIG_TASKS_RCU /* @@ -4449,7 +4511,7 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS - &trivial_ops, + &trivial_ops, TRIVIAL_PREEMPT_OPS }; if (!torture_init_begin(torture_type, verbose)) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index c158b6a947cd..a2d9d75d88a1 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -92,15 +92,9 @@ torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); torture_param(int, nruns, 30, "Number of experiments to run."); // Reader delay in nanoseconds, 0 for no delay. torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); - -#ifdef MODULE -# define REFSCALE_SHUTDOWN 0 -#else -# define REFSCALE_SHUTDOWN 1 -#endif - -torture_param(bool, shutdown, REFSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +// Maximum shutdown delay in seconds, or zero for no shutdown. +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); struct reader_task { struct task_struct *task; @@ -109,12 +103,8 @@ struct reader_task { u64 last_duration_ns; }; -static struct task_struct *shutdown_task; -static wait_queue_head_t shutdown_wq; - static struct task_struct *main_task; static wait_queue_head_t main_wq; -static int shutdown_start; static struct reader_task *reader_tasks; @@ -1357,6 +1347,8 @@ static u64 process_durations(int n) return sum; } +static void ref_scale_cleanup(void); + // The main_func is the main orchestrator, it performs a bunch of // experiments. For every experiment, it orders all the readers // involved to start and waits for them to finish the experiment. It @@ -1443,9 +1435,10 @@ static int main_func(void *arg) oom_exit: // This will shutdown everything including us. - if (shutdown) { - shutdown_start = 1; - wake_up(&shutdown_wq); + if (shutdown_secs) { + main_task = NULL; // Avoid self-kill deadlock. + ref_scale_cleanup(); + kernel_power_off(); } // Wait for torture to stop us @@ -1463,8 +1456,8 @@ static void ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, - verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); + "--- %s: verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, + verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); } static void @@ -1497,19 +1490,6 @@ ref_scale_cleanup(void) torture_cleanup_end(); } -// Shutdown kthread. Just waits to be awakened, then shuts down system. -static int -ref_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, shutdown_start); - - smp_mb(); // Wake before output. - ref_scale_cleanup(); - kernel_power_off(); - - return -EINVAL; -} - static int __init ref_scale_init(void) { @@ -1553,13 +1533,10 @@ ref_scale_init(void) ref_scale_print_module_parms(cur_ops, "Start of test"); // Shutdown task - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(ref_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } // Reader tasks (default to ~75% of online CPUs). @@ -1604,7 +1581,7 @@ ref_scale_init(void) unwind: torture_init_end(); ref_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); kernel_power_off(); } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 2b55e6acf3c1..48f0d803c8e2 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -291,9 +291,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp) shift = ilog2(rcu_task_cpu_ids / lim); if (((rcu_task_cpu_ids - 1) >> shift) >= lim) shift++; - WRITE_ONCE(rtp->percpu_enqueue_shift, shift); - WRITE_ONCE(rtp->percpu_dequeue_lim, lim); - smp_store_release(&rtp->percpu_enqueue_lim, lim); + rtp->percpu_enqueue_shift = shift; + rtp->percpu_dequeue_lim = lim; + rtp->percpu_enqueue_lim = lim; pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n", rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b3337c7231cc..1047b30cd46b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -378,6 +378,38 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); } +/* + * Determine if the bypass queue needs to be flushed based on time and size. + * For lazy-only bypass queues, use the lazy flush timeout; otherwise flush + * based on jiffy advancement. The flush_faster controls flush aggressiveness. + */ +static bool nocb_bypass_needs_flush(struct rcu_data *rdp, long bypass_ncbs, + long lazy_ncbs, unsigned long j, + bool flush_faster) +{ + bool bypass_is_lazy; + unsigned long bypass_first; + unsigned long flush_timeout; + long qhimark_thresh; + + if (!bypass_ncbs) + return false; + + qhimark_thresh = flush_faster ? qhimark : 2 * qhimark; + if (bypass_ncbs >= qhimark_thresh) + return true; + + bypass_first = READ_ONCE(rdp->nocb_bypass_first); + bypass_is_lazy = (bypass_ncbs == lazy_ncbs); + + if (bypass_is_lazy) + flush_timeout = rcu_get_jiffies_lazy_flush(); + else + flush_timeout = flush_faster ? 0 : 1; + + return time_after(j, bypass_first + flush_timeout); +} + /* * See whether it is appropriate to use the ->nocb_bypass list in order * to control contention on ->nocb_lock. A limited number of direct @@ -404,7 +436,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); + long lazy_len = READ_ONCE(rdp->lazy_len); + bool bypass_is_lazy = (ncbs == lazy_len); lockdep_assert_irqs_disabled(); @@ -456,10 +489,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || - (ncbs && bypass_is_lazy && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || - ncbs >= qhimark) { + if (nocb_bypass_needs_flush(rdp, ncbs, lazy_len, j, true)) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); @@ -673,15 +703,8 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); lazy_ncbs = READ_ONCE(rdp->lazy_len); - if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { + flush_bypass = nocb_bypass_needs_flush(rdp, bypass_ncbs, lazy_ncbs, j, false); + if (!flush_bypass && !bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); continue; /* No callbacks here, try next. */ } @@ -1081,30 +1104,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_deoffload(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - int ret = 0; - - cpus_read_lock(); - mutex_lock(&rcu_state.nocb_mutex); - if (rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_deoffload(rdp); - if (!ret) - cpumask_clear_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } - } - mutex_unlock(&rcu_state.nocb_mutex); - cpus_read_unlock(); - - return ret; -} -EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); - static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp) { unsigned long flags; @@ -1149,28 +1148,52 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_offload(int cpu) +/* Common helper for CPU offload/deoffload operations. */ +static int rcu_nocb_cpu_toggle_offload(int cpu, bool offload) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); int ret = 0; cpus_read_lock(); mutex_lock(&rcu_state.nocb_mutex); - if (!rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_offload(rdp); - if (!ret) - cpumask_set_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } + + /* Already in desired state, nothing to do. */ + if (rcu_rdp_is_offloaded(rdp) == offload) + goto out_unlock; + + if (cpu_online(cpu)) { + pr_info("NOCB: Cannot CB-%soffload online CPU %d\n", + offload ? "" : "de", rdp->cpu); + ret = -EINVAL; + goto out_unlock; } + + if (offload) { + ret = rcu_nocb_rdp_offload(rdp); + if (!ret) + cpumask_set_cpu(cpu, rcu_nocb_mask); + } else { + ret = rcu_nocb_rdp_deoffload(rdp); + if (!ret) + cpumask_clear_cpu(cpu, rcu_nocb_mask); + } + +out_unlock: mutex_unlock(&rcu_state.nocb_mutex); cpus_read_unlock(); - return ret; } + +int rcu_nocb_cpu_deoffload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, false /* de-offload */); +} +EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); + +int rcu_nocb_cpu_offload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, true /* offload */); +} EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); #ifdef CONFIG_RCU_LAZY diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index d98a5c38e19c..b62735a67884 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif +#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) +// Trivial and stupid grace-period wait. Defined here so that lockdep +// kernels can find tasklist_lock. +void synchronize_rcu_trivial_preempt(void) +{ + struct task_struct *g; + struct task_struct *t; + + smp_mb(); // Order prior accesses before grace-period start. + rcu_read_lock(); // Protect task list. + for_each_process_thread(g, t) { + if (t == current) + continue; // Don't deadlock on ourselves! + // Order later rcu_read_lock() on other tasks after QS. + while (smp_load_acquire(&t->rcu_trivial_preempt_nesting)) + continue; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt); +#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) + int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful) EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers); diff --git a/kernel/torture.c b/kernel/torture.c index ec3370986976..62c1ac777694 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode { ktime_t hto = baset_ns; - if (trsp) + if (trsp && fuzzt_ns) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); return schedule_hrtimeout(&hto, mode); diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh deleted file mode 100755 index ed0ec7f0927e..000000000000 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0+ -# -# Run a group of kvm.sh tests on the specified commits. This currently -# unconditionally does three-minute runs on each scenario in CFLIST, -# taking advantage of all available CPUs and trusting the "make" utility. -# In the short term, adjustments can be made by editing this script and -# CFLIST. If some adjustments appear to have ongoing value, this script -# might grow some command-line arguments. -# -# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ... -# -# This script considers its arguments one at a time. If more elaborate -# specification of commits is needed, please use "git rev-list" to -# produce something that this simple script can understand. The reason -# for retaining the simplicity is that it allows the user to more easily -# see which commit came from which branch. -# -# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res" -# directory. The calls to kvm.sh create the usual entries, but this script -# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own -# directory numbered in run order, that is, "0001", "0002", and so on. -# For successful runs, the large build artifacts are removed. Doing this -# reduces the disk space required by about two orders of magnitude for -# successful runs. -# -# Copyright (C) Facebook, 2020 -# -# Authors: Paul E. McKenney - -if ! git status > /dev/null 2>&1 -then - echo '!!!' This script needs to run in a git archive. 1>&2 - echo '!!!' Giving up. 1>&2 - exit 1 -fi - -# Remember where we started so that we can get back at the end. -curcommit="`git status | head -1 | awk '{ print $NF }'`" - -nfail=0 -ntry=0 -resdir="tools/testing/selftests/rcutorture/res" -ds="`date +%Y.%m.%d-%H.%M.%S`-group" -if ! test -e $resdir -then - mkdir $resdir || : -fi -mkdir $resdir/$ds -echo Results directory: $resdir/$ds - -RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE -PATH=${RCUTORTURE}/bin:$PATH; export PATH -. functions.sh -echo Using all `identify_qemu_vcpus` CPUs. - -# Each pass through this loop does one command-line argument. -for gitbr in $@ -do - echo ' --- git branch ' $gitbr - - # Each pass through this loop tests one commit. - for i in `git rev-list "$gitbr"` - do - ntry=`expr $ntry + 1` - idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null` - echo ' --- commit ' $i from branch $gitbr - date - mkdir $resdir/$ds/$idir - echo $gitbr > $resdir/$ds/$idir/gitbr - echo $i >> $resdir/$ds/$idir/gitbr - - # Test the specified commit. - git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1 - echo git checkout return code: $? "(Commit $ntry: $i)" - kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1 - ret=$? - echo kvm.sh return code $ret for commit $i from branch $gitbr - echo Run results: $resdir/$ds/$idir - if test "$ret" -ne 0 - then - # Failure, so leave all evidence intact. - nfail=`expr $nfail + 1` - else - # Success, so remove large files to save about 1GB. - ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers ) - fi - done -done -date - -# Go back to the original commit. -git checkout "$curcommit" - -if test $nfail -ne 0 -then - echo '!!! ' $nfail failures in $ntry 'runs!!!' - exit 1 -else - echo No failures in $ntry runs. - exit 0 -fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index de65d77b47ff..63bbbdd5f4ef 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -39,7 +39,12 @@ do X*) ;; *) - kvm-recheck-${TORTURE_SUITE}.sh $i + if test -f tools/testing/selftests/rcutorture/bin/kvm-recheck-${TORTURE_SUITE}.sh + then + kvm-recheck-${TORTURE_SUITE}.sh $i + else + echo No kvm-recheck-${TORTURE_SUITE}.sh, so no ${TORTURE_SUITE}-specific analysis. + fi esac if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137 then @@ -49,7 +54,7 @@ do then if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137 then - echo QEMU killed + echo "Summary: Potential hang (QEMU killed)" fi configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1 if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 21e6ba3615f6..be1e943ca4d5 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -113,7 +113,6 @@ then then print_warning $title `cat $T.seq` fi - exit 2 fi fi | tee -a $file.diags diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 208be7d09a61..4e98c697def4 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -50,7 +50,7 @@ do do err= val=$((d*1000+t*10+c)) - tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x4" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config @@ -92,12 +92,12 @@ do nerrs=$((nerrs+1)) err=1 fi - if test "$val" -eq 0xf && test "$ret" -eq 0 + if test "$val" = 0xf && test "$ret" -eq 0 then err=1 echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" fi - if test "$val" -eq 0x1 && test "$ret" -ne 0 + if test "$val" = 0x1 && test "$ret" -ne 0 then err=1 echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 new file mode 100644 index 000000000000..bbe6d28210ab --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 @@ -0,0 +1,21 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_RCU_LAZY=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot new file mode 100644 index 000000000000..5130bc84c435 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot @@ -0,0 +1,2 @@ +rcupdate.rcu_self_test=1 +rcu_nocbs=all diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 new file mode 100644 index 000000000000..4c2b8cd6d8fd --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 @@ -0,0 +1,20 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot new file mode 100644 index 000000000000..c212ae299b0b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot @@ -0,0 +1,3 @@ +rcupdate.rcu_self_test=1 +rcu_nocbs=all +rcu_nocb_poll diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT new file mode 100644 index 000000000000..8230b14bfe68 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT @@ -0,0 +1,12 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_TRIVIAL_PREEMPT_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot new file mode 100644 index 000000000000..299cd3a12df6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot @@ -0,0 +1,3 @@ +rcutorture.torture_type=trivial-preempt +rcutorture.onoff_interval=0 +rcutorture.shuffle_interval=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 28070b43f017..b78ddc243d89 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -11,7 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo rcuscale.shutdown=1 \ + echo rcuscale.shutdown_secs=$3 \ rcuscale.verbose=0 \ $1 } diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh index 748465627601..219fac070af2 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -11,7 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo refscale.shutdown=1 \ + echo refscale.shutdown_secs=$3 \ refscale.verbose=0 \ $1 }