diff --git a/.mailmap b/.mailmap index 7d6fb567ba5e..4cdb7986fa43 100644 --- a/.mailmap +++ b/.mailmap @@ -208,6 +208,7 @@ Colin Ian King Corey Minyard Damian Hobson-Garcia Dan Carpenter +Dan Williams Daniel Borkmann Daniel Borkmann Daniel Borkmann @@ -427,6 +428,7 @@ John Stultz Jonas Gorski +Jonathan Cameron Jordan Crouse diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index c80a1b5a03db..16a9b3d2e2c0 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -508,6 +508,19 @@ Description: (RO) The size of extended linear cache, if there is an extended linear cache. Otherwise the attribute will not be visible. + +What: /sys/bus/cxl/devices/regionZ/locked +Date: Mar, 2026 +KernelVersion: v7.1 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The CXL driver has the capability to lock a region based on + a BIOS or platform dependent configuration. Regions created as + locked are never permitted to be destroyed. Resets to participating + decoders will not result in a region destroy and will not free the + decoder resources. + + What: /sys/bus/cxl/devices/regionZ/mode Date: January, 2023 KernelVersion: v6.3 diff --git a/MAINTAINERS b/MAINTAINERS index 8562d244a49f..d0aa32154356 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4055,7 +4055,7 @@ S: Maintained F: crypto/rsa* ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API -R: Dan Williams +R: Dan Williams S: Odd fixes W: http://sourceforge.net/projects/xscaleiop F: Documentation/crypto/async-tx-api.rst @@ -6422,12 +6422,12 @@ F: include/linux/compiler_attributes.h COMPUTE EXPRESS LINK (CXL) M: Davidlohr Bueso -M: Jonathan Cameron +M: Jonathan Cameron M: Dave Jiang M: Alison Schofield M: Vishal Verma M: Ira Weiny -M: Dan Williams +M: Dan Williams L: linux-cxl@vger.kernel.org S: Maintained F: Documentation/driver-api/cxl @@ -6438,7 +6438,7 @@ F: include/uapi/linux/cxl_mem.h F: tools/testing/cxl/ COMPUTE EXPRESS LINK PMU (CPMU) -M: Jonathan Cameron +M: Jonathan Cameron L: linux-cxl@vger.kernel.org S: Maintained F: Documentation/admin-guide/perf/cxl.rst @@ -7295,7 +7295,7 @@ S: Maintained F: scripts/dev-needs.sh DEVICE DIRECT ACCESS (DAX) -M: Dan Williams +M: Dan Williams M: Vishal Verma M: Dave Jiang L: nvdimm@lists.linux.dev @@ -9852,7 +9852,7 @@ F: include/linux/fcntl.h F: include/uapi/linux/fcntl.h FILESYSTEM DIRECT ACCESS (DAX) -M: Dan Williams +M: Dan Williams R: Matthew Wilcox R: Jan Kara L: linux-fsdevel@vger.kernel.org @@ -10597,7 +10597,7 @@ FWCTL SUBSYSTEM M: Dave Jiang M: Jason Gunthorpe M: Saeed Mahameed -R: Jonathan Cameron +R: Jonathan Cameron S: Maintained F: Documentation/userspace-api/fwctl/ F: drivers/fwctl/ @@ -12938,7 +12938,7 @@ F: drivers/platform/x86/intel/hid.c INTEL I/OAT DMA DRIVER M: Dave Jiang -R: Dan Williams +R: Dan Williams L: dmaengine@vger.kernel.org S: Supported Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ @@ -14657,7 +14657,7 @@ K: libie LIBNVDIMM BTT: BLOCK TRANSLATION TABLE M: Vishal Verma -M: Dan Williams +M: Dan Williams M: Dave Jiang L: nvdimm@lists.linux.dev S: Supported @@ -14666,7 +14666,7 @@ P: Documentation/nvdimm/maintainer-entry-profile.rst F: drivers/nvdimm/btt* LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER -M: Dan Williams +M: Dan Williams M: Vishal Verma M: Dave Jiang L: nvdimm@lists.linux.dev @@ -14684,7 +14684,7 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.yaml F: drivers/nvdimm/of_pmem.c LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM -M: Dan Williams +M: Dan Williams M: Vishal Verma M: Dave Jiang M: Ira Weiny @@ -25361,7 +25361,7 @@ F: drivers/staging/ STANDALONE CACHE CONTROLLER DRIVERS M: Conor Dooley -M: Jonathan Cameron +M: Jonathan Cameron S: Maintained T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ F: Documentation/devicetree/bindings/cache/ @@ -27088,7 +27088,7 @@ S: Maintained F: Documentation/devicetree/bindings/trigger-source/* TRUSTED EXECUTION ENVIRONMENT SECURITY MANAGER (TSM) -M: Dan Williams +M: Dan Williams L: linux-coco@lists.linux.dev S: Maintained F: Documentation/ABI/testing/configfs-tsm-report diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index aa87ee1583a4..62d4a8df0b8c 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -654,8 +654,11 @@ int __init acpi_numa_init(void) } last_real_pxm = fake_pxm; fake_pxm++; - acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws, - &fake_pxm); + + /* No need to expand numa nodes if CXL is disabled */ + if (IS_ENABLED(CONFIG_CXL_ACPI)) + acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws, + &fake_pxm); if (cnt < 0) return cnt; diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index a639a9499972..ce7213818d3c 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -15,7 +15,7 @@ cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o cxl_core-$(CONFIG_TRACING) += trace.o -cxl_core-$(CONFIG_CXL_REGION) += region.o +cxl_core-$(CONFIG_CXL_REGION) += region.o region_pmem.o region_dax.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 5b0570df0fd9..82ca3a476708 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -50,6 +50,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port); struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa); u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa); +int devm_cxl_add_dax_region(struct cxl_region *cxlr); +int devm_cxl_add_pmem_region(struct cxl_region *cxlr); #else static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, @@ -224,4 +226,6 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, u16 *return_code); #endif +resource_size_t cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index cb5d5a047a9d..0c80b76a5f9b 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -170,7 +170,7 @@ static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, } parse_hdm_decoder_caps(cxlhdm); - if (cxlhdm->decoder_count == 0) { + if (cxlhdm->decoder_count < 0) { dev_err(dev, "Spec violation. Caps invalid\n"); return ERR_PTR(-ENXIO); } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 12386d912705..7c6c5b7450a5 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -893,7 +893,7 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL"); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, +void cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt) @@ -920,6 +920,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, * translations. Take topology mutation locks and lookup * { HPA, REGION } from { DPA, MEMDEV } in the event record. */ + guard(device)(&cxlmd->dev); guard(rwsem_read)(&cxl_rwsem.region); guard(rwsem_read)(&cxl_rwsem.dpa); @@ -968,7 +969,7 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd, } EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL"); -static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd, +static void __cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { @@ -1521,23 +1522,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec) { struct cxl_memdev_state *mds; int rc; - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, + dvsec, struct cxl_memdev_state, cxlds, + true); if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } mutex_init(&mds->event.log_lock); - mds->cxlds.dev = dev; - mds->cxlds.reg_map.host = dev; - mds->cxlds.cxl_mbox.host = dev; - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); if (rc == -EOPNOTSUPP) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 273c22118d3d..80e65690eb77 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -204,6 +204,9 @@ bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, { struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + if (!mds) + return 0; + return test_bit(cmd, mds->poison.enabled_cmds); } @@ -656,6 +659,30 @@ static void detach_memdev(struct work_struct *work) static struct lock_class_key cxl_memdev_key; +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox) +{ + struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!cxlds) + return NULL; + + cxlds->dev = dev; + cxlds->type = type; + cxlds->serial = serial; + cxlds->cxl_dvsec = dvsec; + cxlds->reg_map.host = dev; + cxlds->reg_map.resource = CXL_RESOURCE_NONE; + + if (has_mbox) + cxlds->cxl_mbox.host = dev; + + return cxlds; +} +EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL"); + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops, const struct cxl_memdev_attach *attach) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index f96ce884a213..d1f487b3d809 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -696,6 +696,63 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); +static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, + struct cxl_register_map *map, + struct cxl_dport *dport) +{ + resource_size_t component_reg_phys; + + *map = (struct cxl_register_map) { + .host = &pdev->dev, + .resource = CXL_RESOURCE_NONE, + }; + + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); + if (component_reg_phys == CXL_RESOURCE_NONE) + return -ENXIO; + + map->resource = component_reg_phys; + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + return 0; +} + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + int rc; + + rc = cxl_find_regblock(pdev, type, map); + + /* + * If the Register Locator DVSEC does not exist, check if it + * is an RCH and try to extract the Component Registers from + * an RCRB. + */ + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { + return rc; + } + + return cxl_setup_regs(map); +} +EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL"); + int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) { int speed, bw; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c37ae0b28bbb..e50dc716d4e8 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -485,22 +485,14 @@ static ssize_t interleave_ways_show(struct device *dev, static const struct attribute_group *get_cxl_region_target_group(void); -static ssize_t interleave_ways_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_ways(struct cxl_region *cxlr, int val) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; - unsigned int val, save; - int rc; + int save, rc; u8 iw; - rc = kstrtouint(buf, 0, &val); - if (rc) - return rc; - rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -515,9 +507,7 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; @@ -525,10 +515,31 @@ static ssize_t interleave_ways_store(struct device *dev, save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) { + if (rc) p->interleave_ways = save; + + return rc; +} + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int val; + int rc; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_ways(cxlr, val); + if (rc) return rc; - } return len; } @@ -548,21 +559,14 @@ static ssize_t interleave_granularity_show(struct device *dev, return sysfs_emit(buf, "%d\n", p->interleave_granularity); } -static ssize_t interleave_granularity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_granularity(struct cxl_region *cxlr, int val) { - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; - int rc, val; + int rc; u16 ig; - rc = kstrtoint(buf, 0, &val); - if (rc) - return rc; - rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -578,14 +582,33 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; p->interleave_granularity = val; + return 0; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_granularity(cxlr, val); + if (rc) + return rc; return len; } @@ -767,6 +790,22 @@ static ssize_t extended_linear_cache_size_show(struct device *dev, } static DEVICE_ATTR_RO(extended_linear_cache_size); +static ssize_t locked_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc; + + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return rc; + + rc = test_bit(CXL_REGION_F_LOCK, &cxlr->flags); + return sysfs_emit(buf, "%d\n", rc); +} +static DEVICE_ATTR_RO(locked); + static struct attribute *cxl_region_attrs[] = { &dev_attr_uuid.attr, &dev_attr_commit.attr, @@ -776,6 +815,7 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_size.attr, &dev_attr_mode.attr, &dev_attr_extended_linear_cache_size.attr, + &dev_attr_locked.attr, NULL, }; @@ -1063,6 +1103,14 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, if (!cxld->region) { cxld->region = cxlr; + + /* + * Now that cxld->region is set the intermediate staging state + * can be cleared. + */ + if (cxld == &cxled->cxld && + cxled->state == CXL_DECODER_STATE_AUTO_STAGED) + cxled->state = CXL_DECODER_STATE_AUTO; get_device(&cxlr->dev); } @@ -1804,6 +1852,7 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr, pos = p->nr_targets; p->targets[pos] = cxled; cxled->pos = pos; + cxled->state = CXL_DECODER_STATE_AUTO_STAGED; p->nr_targets++; return 0; @@ -2153,6 +2202,47 @@ static int cxl_region_attach(struct cxl_region *cxlr, return 0; } +static int cxl_region_by_target(struct device *dev, const void *data) +{ + const struct cxl_endpoint_decoder *cxled = data; + struct cxl_region_params *p; + struct cxl_region *cxlr; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + return p->targets[cxled->pos] == cxled; +} + +/* + * When an auto-region fails to assemble the decoder may be listed as a target, + * but not fully attached. + */ +static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + int pos = cxled->pos; + + if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED) + return; + + struct device *dev __free(put_device) = + bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target); + if (!dev) + return; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + p->nr_targets--; + cxled->state = CXL_DECODER_STATE_AUTO; + cxled->pos = -1; + p->targets[pos] = NULL; +} + static struct cxl_region * __cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, @@ -2176,8 +2266,10 @@ __cxl_decoder_detach(struct cxl_region *cxlr, cxled = p->targets[pos]; } else { cxlr = cxled->cxld.region; - if (!cxlr) + if (!cxlr) { + cxl_cancel_auto_attach(cxled); return NULL; + } p = &cxlr->params; } @@ -2650,7 +2742,8 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_partition_mode mode, int id) + enum cxl_partition_mode mode, int id, + enum cxl_decoder_type target_type) { int rc; @@ -2672,7 +2765,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); + return devm_cxl_add_region(cxlrd, id, mode, target_type); } static ssize_t create_region_store(struct device *dev, const char *buf, @@ -2686,7 +2779,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, mode, id); + cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); @@ -2757,46 +2850,6 @@ static ssize_t delete_region_store(struct device *dev, } DEVICE_ATTR_WO(delete_region); -static void cxl_pmem_region_release(struct device *dev) -{ - struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); - int i; - - for (i = 0; i < cxlr_pmem->nr_mappings; i++) { - struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd; - - put_device(&cxlmd->dev); - } - - kfree(cxlr_pmem); -} - -static const struct attribute_group *cxl_pmem_region_attribute_groups[] = { - &cxl_base_attribute_group, - NULL, -}; - -const struct device_type cxl_pmem_region_type = { - .name = "cxl_pmem_region", - .release = cxl_pmem_region_release, - .groups = cxl_pmem_region_attribute_groups, -}; - -bool is_cxl_pmem_region(struct device *dev) -{ - return dev->type == &cxl_pmem_region_type; -} -EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL"); - -struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) -{ - if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev), - "not a cxl_pmem_region device\n")) - return NULL; - return container_of(dev, struct cxl_pmem_region, dev); -} -EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); - struct cxl_poison_context { struct cxl_port *port; int part; @@ -2950,13 +3003,15 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_dpa_to_region_context ctx; - struct cxl_port *port; + struct cxl_port *port = cxlmd->endpoint; + + if (!cxlmd->dev.driver) + return NULL; ctx = (struct cxl_dpa_to_region_context) { .dpa = dpa, }; - port = cxlmd->endpoint; - if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) + if (cxl_num_decoders_committed(port)) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); return ctx.cxlr; @@ -3450,249 +3505,6 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, return -ENXIO; } -static struct lock_class_key cxl_pmem_region_key; - -static int cxl_pmem_region_alloc(struct cxl_region *cxlr) -{ - struct cxl_region_params *p = &cxlr->params; - struct cxl_nvdimm_bridge *cxl_nvb; - struct device *dev; - int i; - - guard(rwsem_read)(&cxl_rwsem.region); - if (p->state != CXL_CONFIG_COMMIT) - return -ENXIO; - - struct cxl_pmem_region *cxlr_pmem __free(kfree) = - kzalloc_flex(*cxlr_pmem, mapping, p->nr_targets); - if (!cxlr_pmem) - return -ENOMEM; - - cxlr_pmem->hpa_range.start = p->res->start; - cxlr_pmem->hpa_range.end = p->res->end; - - /* Snapshot the region configuration underneath the cxl_rwsem.region */ - cxlr_pmem->nr_mappings = p->nr_targets; - for (i = 0; i < p->nr_targets; i++) { - struct cxl_endpoint_decoder *cxled = p->targets[i]; - struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; - - /* - * Regions never span CXL root devices, so by definition the - * bridge for one device is the same for all. - */ - if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint); - if (!cxl_nvb) - return -ENODEV; - cxlr->cxl_nvb = cxl_nvb; - } - m->cxlmd = cxlmd; - get_device(&cxlmd->dev); - m->start = cxled->dpa_res->start; - m->size = resource_size(cxled->dpa_res); - m->position = i; - } - - dev = &cxlr_pmem->dev; - device_initialize(dev); - lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); - device_set_pm_not_required(dev); - dev->parent = &cxlr->dev; - dev->bus = &cxl_bus_type; - dev->type = &cxl_pmem_region_type; - cxlr_pmem->cxlr = cxlr; - cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem); - - return 0; -} - -static void cxl_dax_region_release(struct device *dev) -{ - struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); - - kfree(cxlr_dax); -} - -static const struct attribute_group *cxl_dax_region_attribute_groups[] = { - &cxl_base_attribute_group, - NULL, -}; - -const struct device_type cxl_dax_region_type = { - .name = "cxl_dax_region", - .release = cxl_dax_region_release, - .groups = cxl_dax_region_attribute_groups, -}; - -static bool is_cxl_dax_region(struct device *dev) -{ - return dev->type == &cxl_dax_region_type; -} - -struct cxl_dax_region *to_cxl_dax_region(struct device *dev) -{ - if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), - "not a cxl_dax_region device\n")) - return NULL; - return container_of(dev, struct cxl_dax_region, dev); -} -EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL"); - -static struct lock_class_key cxl_dax_region_key; - -static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) -{ - struct cxl_region_params *p = &cxlr->params; - struct cxl_dax_region *cxlr_dax; - struct device *dev; - - guard(rwsem_read)(&cxl_rwsem.region); - if (p->state != CXL_CONFIG_COMMIT) - return ERR_PTR(-ENXIO); - - cxlr_dax = kzalloc_obj(*cxlr_dax); - if (!cxlr_dax) - return ERR_PTR(-ENOMEM); - - cxlr_dax->hpa_range.start = p->res->start; - cxlr_dax->hpa_range.end = p->res->end; - - dev = &cxlr_dax->dev; - cxlr_dax->cxlr = cxlr; - device_initialize(dev); - lockdep_set_class(&dev->mutex, &cxl_dax_region_key); - device_set_pm_not_required(dev); - dev->parent = &cxlr->dev; - dev->bus = &cxl_bus_type; - dev->type = &cxl_dax_region_type; - - return cxlr_dax; -} - -static void cxlr_pmem_unregister(void *_cxlr_pmem) -{ - struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; - struct cxl_region *cxlr = cxlr_pmem->cxlr; - struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; - - /* - * Either the bridge is in ->remove() context under the device_lock(), - * or cxlr_release_nvdimm() is cancelling the bridge's release action - * for @cxlr_pmem and doing it itself (while manually holding the bridge - * lock). - */ - device_lock_assert(&cxl_nvb->dev); - cxlr->cxlr_pmem = NULL; - cxlr_pmem->cxlr = NULL; - device_unregister(&cxlr_pmem->dev); -} - -static void cxlr_release_nvdimm(void *_cxlr) -{ - struct cxl_region *cxlr = _cxlr; - struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; - - scoped_guard(device, &cxl_nvb->dev) { - if (cxlr->cxlr_pmem) - devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, - cxlr->cxlr_pmem); - } - cxlr->cxl_nvb = NULL; - put_device(&cxl_nvb->dev); -} - -/** - * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge - * @cxlr: parent CXL region for this pmem region bridge device - * - * Return: 0 on success negative error code on failure. - */ -static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) -{ - struct cxl_pmem_region *cxlr_pmem; - struct cxl_nvdimm_bridge *cxl_nvb; - struct device *dev; - int rc; - - rc = cxl_pmem_region_alloc(cxlr); - if (rc) - return rc; - cxlr_pmem = cxlr->cxlr_pmem; - cxl_nvb = cxlr->cxl_nvb; - - dev = &cxlr_pmem->dev; - rc = dev_set_name(dev, "pmem_region%d", cxlr->id); - if (rc) - goto err; - - rc = device_add(dev); - if (rc) - goto err; - - dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), - dev_name(dev)); - - scoped_guard(device, &cxl_nvb->dev) { - if (cxl_nvb->dev.driver) - rc = devm_add_action_or_reset(&cxl_nvb->dev, - cxlr_pmem_unregister, - cxlr_pmem); - else - rc = -ENXIO; - } - - if (rc) - goto err_bridge; - - /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */ - return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr); - -err: - put_device(dev); -err_bridge: - put_device(&cxl_nvb->dev); - cxlr->cxl_nvb = NULL; - return rc; -} - -static void cxlr_dax_unregister(void *_cxlr_dax) -{ - struct cxl_dax_region *cxlr_dax = _cxlr_dax; - - device_unregister(&cxlr_dax->dev); -} - -static int devm_cxl_add_dax_region(struct cxl_region *cxlr) -{ - struct cxl_dax_region *cxlr_dax; - struct device *dev; - int rc; - - cxlr_dax = cxl_dax_region_alloc(cxlr); - if (IS_ERR(cxlr_dax)) - return PTR_ERR(cxlr_dax); - - dev = &cxlr_dax->dev; - rc = dev_set_name(dev, "dax_region%d", cxlr->id); - if (rc) - goto err; - - rc = device_add(dev); - if (rc) - goto err; - - dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), - dev_name(dev)); - - return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, - cxlr_dax); -err: - put_device(dev); - return rc; -} - static int match_root_decoder(struct device *dev, const void *data) { const struct range *r1, *r2 = data; @@ -3904,7 +3716,8 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, - atomic_read(&cxlrd->region_id)); + atomic_read(&cxlrd->region_id), + cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); if (IS_ERR(cxlr)) { @@ -4175,6 +3988,36 @@ static int cxl_region_setup_poison(struct cxl_region *cxlr) return devm_add_action_or_reset(dev, remove_debugfs, dentry); } +static int region_contains_resource(struct device *dev, const void *data) +{ + const struct resource *res = data; + struct cxl_region *cxlr; + struct cxl_region_params *p; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + if (p->state != CXL_CONFIG_COMMIT) + return 0; + + if (!p->res) + return 0; + + return resource_contains(p->res, res) ? 1 : 0; +} + +bool cxl_region_contains_resource(const struct resource *res) +{ + guard(rwsem_read)(&cxl_rwsem.region); + struct device *dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, res, region_contains_resource); + return !!dev; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_region_contains_resource, "dax_hmem"); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; diff --git a/drivers/cxl/core/region_dax.c b/drivers/cxl/core/region_dax.c new file mode 100644 index 000000000000..de04f78f6ad8 --- /dev/null +++ b/drivers/cxl/core/region_dax.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2026 Meta Technologies Inc. All rights reserved. + */ +#include +#include +#include +#include +#include "core.h" + +static void cxl_dax_region_release(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + + kfree(cxlr_dax); +} + +static const struct attribute_group *cxl_dax_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL +}; + +const struct device_type cxl_dax_region_type = { + .name = "cxl_dax_region", + .release = cxl_dax_region_release, + .groups = cxl_dax_region_attribute_groups, +}; + +static bool is_cxl_dax_region(struct device *dev) +{ + return dev->type == &cxl_dax_region_type; +} + +struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), + "not a cxl_dax_region device\n")) + return NULL; + return container_of(dev, struct cxl_dax_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL"); + +static struct lock_class_key cxl_dax_region_key; + +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_dax_region *cxlr_dax; + struct device *dev; + + guard(rwsem_read)(&cxl_rwsem.region); + if (p->state != CXL_CONFIG_COMMIT) + return ERR_PTR(-ENXIO); + + cxlr_dax = kzalloc_obj(*cxlr_dax); + if (!cxlr_dax) + return ERR_PTR(-ENOMEM); + + cxlr_dax->hpa_range.start = p->res->start; + cxlr_dax->hpa_range.end = p->res->end; + + dev = &cxlr_dax->dev; + cxlr_dax->cxlr = cxlr; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_dax_region_type; + + return cxlr_dax; +} + +static void cxlr_dax_unregister(void *_cxlr_dax) +{ + struct cxl_dax_region *cxlr_dax = _cxlr_dax; + + device_unregister(&cxlr_dax->dev); +} + +int devm_cxl_add_dax_region(struct cxl_region *cxlr) +{ + struct device *dev; + int rc; + + struct cxl_dax_region *cxlr_dax __free(put_cxl_dax_region) = + cxl_dax_region_alloc(cxlr); + if (IS_ERR(cxlr_dax)) + return PTR_ERR(cxlr_dax); + + dev = &cxlr_dax->dev; + rc = dev_set_name(dev, "dax_region%d", cxlr->id); + if (rc) + return rc; + + rc = device_add(dev); + if (rc) + return rc; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, + no_free_ptr(cxlr_dax)); +} diff --git a/drivers/cxl/core/region_pmem.c b/drivers/cxl/core/region_pmem.c new file mode 100644 index 000000000000..23d97e3d78b6 --- /dev/null +++ b/drivers/cxl/core/region_pmem.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include "core.h" + +static void cxl_pmem_region_release(struct device *dev) +{ + struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); + int i; + + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { + struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd; + + put_device(&cxlmd->dev); + } + + kfree(cxlr_pmem); +} + +static const struct attribute_group *cxl_pmem_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL +}; + +const struct device_type cxl_pmem_region_type = { + .name = "cxl_pmem_region", + .release = cxl_pmem_region_release, + .groups = cxl_pmem_region_attribute_groups, +}; + +bool is_cxl_pmem_region(struct device *dev) +{ + return dev->type == &cxl_pmem_region_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL"); + +struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev), + "not a cxl_pmem_region device\n")) + return NULL; + return container_of(dev, struct cxl_pmem_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); + +static struct lock_class_key cxl_pmem_region_key; + +static int cxl_pmem_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_nvdimm_bridge *cxl_nvb; + struct device *dev; + int i; + + guard(rwsem_read)(&cxl_rwsem.region); + if (p->state != CXL_CONFIG_COMMIT) + return -ENXIO; + + struct cxl_pmem_region *cxlr_pmem __free(kfree) = + kzalloc_flex(*cxlr_pmem, mapping, p->nr_targets); + if (!cxlr_pmem) + return -ENOMEM; + + cxlr_pmem->hpa_range.start = p->res->start; + cxlr_pmem->hpa_range.end = p->res->end; + + /* Snapshot the region configuration underneath the cxl_rwsem.region */ + cxlr_pmem->nr_mappings = p->nr_targets; + for (i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + + /* + * Regions never span CXL root devices, so by definition the + * bridge for one device is the same for all. + */ + if (i == 0) { + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint); + if (!cxl_nvb) + return -ENODEV; + cxlr->cxl_nvb = cxl_nvb; + } + m->cxlmd = cxlmd; + get_device(&cxlmd->dev); + m->start = cxled->dpa_res->start; + m->size = resource_size(cxled->dpa_res); + m->position = i; + } + + dev = &cxlr_pmem->dev; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_pmem_region_type; + cxlr_pmem->cxlr = cxlr; + cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem); + + return 0; +} + +static void cxlr_pmem_unregister(void *_cxlr_pmem) +{ + struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; + struct cxl_region *cxlr = cxlr_pmem->cxlr; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; + + /* + * Either the bridge is in ->remove() context under the device_lock(), + * or cxlr_release_nvdimm() is cancelling the bridge's release action + * for @cxlr_pmem and doing it itself (while manually holding the bridge + * lock). + */ + device_lock_assert(&cxl_nvb->dev); + cxlr->cxlr_pmem = NULL; + cxlr_pmem->cxlr = NULL; + device_unregister(&cxlr_pmem->dev); +} + +static void cxlr_release_nvdimm(void *_cxlr) +{ + struct cxl_region *cxlr = _cxlr; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; + + scoped_guard(device, &cxl_nvb->dev) { + if (cxlr->cxlr_pmem) + devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, + cxlr->cxlr_pmem); + } + cxlr->cxl_nvb = NULL; + put_device(&cxl_nvb->dev); +} + +/** + * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge + * @cxlr: parent CXL region for this pmem region bridge device + * + * Return: 0 on success negative error code on failure. + */ +int devm_cxl_add_pmem_region(struct cxl_region *cxlr) +{ + struct cxl_pmem_region *cxlr_pmem; + struct cxl_nvdimm_bridge *cxl_nvb; + struct device *dev; + int rc; + + rc = cxl_pmem_region_alloc(cxlr); + if (rc) + return rc; + cxlr_pmem = cxlr->cxlr_pmem; + cxl_nvb = cxlr->cxl_nvb; + + dev = &cxlr_pmem->dev; + rc = dev_set_name(dev, "pmem_region%d", cxlr->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + scoped_guard(device, &cxl_nvb->dev) { + if (cxl_nvb->dev.driver) + rc = devm_add_action_or_reset(&cxl_nvb->dev, + cxlr_pmem_unregister, + cxlr_pmem); + else + rc = -ENXIO; + } + + if (rc) + goto err_bridge; + + /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */ + return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr); + +err: + put_device(dev); +err_bridge: + put_device(&cxl_nvb->dev); + cxlr->cxl_nvb = NULL; + return rc; +} diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index a010b3214342..93710cf4f0a6 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -641,4 +641,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, return CXL_RESOURCE_NONE; return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM); } -EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 9b947286eb9b..1297594beaec 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -12,6 +12,7 @@ #include #include #include +#include extern const struct nvdimm_security_ops *cxl_security_ops; @@ -77,7 +78,16 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr) { int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); - return val ? val * 2 : 1; + switch (val) { + case 0: + return 1; + case 1 ... 8: + return val * 2; + case 9 ... 12: + return (val - 4) * 4; + default: + return -ENXIO; + } } /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ @@ -201,97 +211,6 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* - * Using struct_group() allows for per register-block-type helper routines, - * without requiring block-type agnostic code to include the prefix. - */ -struct cxl_regs { - /* - * Common set of CXL Component register block base pointers - * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure - * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure - */ - struct_group_tagged(cxl_component_regs, component, - void __iomem *hdm_decoder; - void __iomem *ras; - ); - /* - * Common set of CXL Device register block base pointers - * @status: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers - */ - struct_group_tagged(cxl_device_regs, device_regs, - void __iomem *status, *mbox, *memdev; - ); - - struct_group_tagged(cxl_pmu_regs, pmu_regs, - void __iomem *pmu; - ); - - /* - * RCH downstream port specific RAS register - * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB - */ - struct_group_tagged(cxl_rch_regs, rch_regs, - void __iomem *dport_aer; - ); - - /* - * RCD upstream port specific PCIe cap register - * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB - */ - struct_group_tagged(cxl_rcd_regs, rcd_regs, - void __iomem *rcd_pcie_cap; - ); -}; - -struct cxl_reg_map { - bool valid; - int id; - unsigned long offset; - unsigned long size; -}; - -struct cxl_component_reg_map { - struct cxl_reg_map hdm_decoder; - struct cxl_reg_map ras; -}; - -struct cxl_device_reg_map { - struct cxl_reg_map status; - struct cxl_reg_map mbox; - struct cxl_reg_map memdev; -}; - -struct cxl_pmu_reg_map { - struct cxl_reg_map pmu; -}; - -/** - * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @host: device for devm operations and logging - * @base: virtual base of the register-block-BAR + @block_offset - * @resource: physical resource base of the register block - * @max_size: maximum mapping size to perform register search - * @reg_type: see enum cxl_regloc_type - * @component_map: cxl_reg_map for component registers - * @device_map: cxl_reg_maps for device registers - * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units - */ -struct cxl_register_map { - struct device *host; - void __iomem *base; - resource_size_t resource; - resource_size_t max_size; - u8 reg_type; - union { - struct cxl_component_reg_map component_map; - struct cxl_device_reg_map device_map; - struct cxl_pmu_reg_map pmu_map; - }; -}; - void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, @@ -312,8 +231,6 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; -resource_size_t cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport); int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) @@ -333,6 +250,7 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_DECODER_F_LOCK BIT(4) #define CXL_DECODER_F_ENABLE BIT(5) #define CXL_DECODER_F_NORMALIZED_ADDRESSING BIT(6) +#define CXL_DECODER_F_RESET_MASK (CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK) enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, @@ -378,12 +296,14 @@ struct cxl_decoder { }; /* - * Track whether this decoder is reserved for region autodiscovery, or - * free for userspace provisioning. + * Track whether this decoder is free for userspace provisioning, reserved for + * region autodiscovery, whether it is started connecting (awaiting other + * peers), or has completed auto assembly. */ enum cxl_decoder_state { CXL_DECODER_STATE_MANUAL, CXL_DECODER_STATE_AUTO, + CXL_DECODER_STATE_AUTO_STAGED, }; /** @@ -497,11 +417,6 @@ struct cxl_region_params { resource_size_t cache_size; }; -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic @@ -808,6 +723,7 @@ DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) +DEFINE_FREE(put_cxl_dax_region, struct cxl_dax_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); @@ -939,6 +855,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); +bool cxl_region_contains_resource(const struct resource *res); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -961,6 +878,10 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, { return 0; } +static inline bool cxl_region_contains_resource(const struct resource *res) +{ + return false; +} #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index e21d744d639b..776c50d1db51 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -113,8 +113,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); -#define CXL_NR_PARTITIONS_MAX 2 - struct cxl_dpa_info { u64 size; struct cxl_dpa_part_info { @@ -373,87 +371,6 @@ struct cxl_security_state { struct kernfs_node *sanitize_node; }; -/* - * enum cxl_devtype - delineate type-2 from a generic type-3 device - * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or - * HDM-DB, no requirement that this device implements a - * mailbox, or other memory-device-standard manageability - * flows. - * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with - * HDM-H and class-mandatory memory device registers - */ -enum cxl_devtype { - CXL_DEVTYPE_DEVMEM, - CXL_DEVTYPE_CLASSMEM, -}; - -/** - * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range: range for DPA address - * @coord: QoS performance data (i.e. latency, bandwidth) - * @cdat_coord: raw QoS performance data from CDAT - * @qos_class: QoS Class cookies - */ -struct cxl_dpa_perf { - struct range dpa_range; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; - int qos_class; -}; - -/** - * struct cxl_dpa_partition - DPA partition descriptor - * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) - * @perf: performance attributes of the partition from CDAT - * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... - */ -struct cxl_dpa_partition { - struct resource res; - struct cxl_dpa_perf perf; - enum cxl_partition_mode mode; -}; - -/** - * struct cxl_dev_state - The driver device state - * - * cxl_dev_state represents the CXL driver/device state. It provides an - * interface to mailbox commands as well as some cached data about the device. - * Currently only memory devices are represented. - * - * @dev: The device associated with this CXL state - * @cxlmd: The device representing the CXL.mem capabilities of @dev - * @reg_map: component and ras register mapping parameters - * @regs: Class device "Device" registers - * @cxl_dvsec: Offset to the PCIe device DVSEC - * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) - * @media_ready: Indicate whether the device media is usable - * @dpa_res: Overall DPA resource tree for the device - * @part: DPA partition array - * @nr_partitions: Number of DPA partitions - * @serial: PCIe Device Serial Number - * @type: Generic Memory Class device or Vendor Specific Memory device - * @cxl_mbox: CXL mailbox context - * @cxlfs: CXL features context - */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - struct cxl_register_map reg_map; - struct cxl_device_regs regs; - int cxl_dvsec; - bool rcd; - bool media_ready; - struct resource dpa_res; - struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; - unsigned int nr_partitions; - u64 serial; - enum cxl_devtype type; - struct cxl_mailbox cxl_mbox; -#ifdef CONFIG_CXL_FEATURES - struct cxl_features_state *cxlfs; -#endif -}; - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { /* @@ -858,13 +775,14 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status); -void cxl_event_trace_record(const struct cxl_memdev *cxlmd, +void cxl_event_trace_record(struct cxl_memdev *cxlmd, enum cxl_event_log_type type, enum cxl_event_type event_type, const uuid_t *uuid, union cxl_event *evt); @@ -923,7 +841,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); */ struct cxl_hdm { struct cxl_component_regs regs; - unsigned int decoder_count; + int decoder_count; unsigned int target_count; unsigned int interleave_mask; unsigned long iw_cap_mask; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 0cf64218aa16..b826eb53cf7b 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -74,6 +74,17 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } +/* + * Assume that the caller has already validated that @pdev has CXL + * capabilities, any RCiEP with CXL capabilities is treated as a + * Restricted CXL Device (RCD) and finds upstream port and endpoint + * registers in a Root Complex Register Block (RCRB). + */ +static inline bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); @@ -101,4 +112,6 @@ static inline void devm_cxl_port_ras_setup(struct cxl_port *port) } #endif +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index fbb300a01830..bace662dc988 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -465,76 +465,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return 0; } -/* - * Assume that any RCIEP that emits the CXL memory expander class code - * is an RCD - */ -static bool is_cxl_restricted(struct pci_dev *pdev) -{ - return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; -} - -static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map, - struct cxl_dport *dport) -{ - resource_size_t component_reg_phys; - - *map = (struct cxl_register_map) { - .host = &pdev->dev, - .resource = CXL_RESOURCE_NONE, - }; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - if (component_reg_phys == CXL_RESOURCE_NONE) - return -ENXIO; - - map->resource = component_reg_phys; - map->reg_type = CXL_REGLOC_RBI_COMPONENT; - map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; - - return 0; -} - -static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - int rc; - - rc = cxl_find_regblock(pdev, type, map); - - /* - * If the Register Locator DVSEC does not exist, check if it - * is an RCH and try to extract the Component Registers from - * an RCRB. - */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { - struct cxl_dport *dport; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - rc = cxl_rcrb_get_comp_regs(pdev, map, dport); - if (rc) - return rc; - - rc = cxl_dport_map_rcd_linkcap(pdev, dport); - if (rc) - return rc; - - } else if (rc) { - return rc; - } - - return cxl_setup_regs(map); -} - static void free_event_buf(void *buf) { kvfree(buf); @@ -865,25 +795,25 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc, pmu_count; unsigned int i; bool irq_avail; + u16 dvsec; rc = pcim_enable_device(pdev); if (rc) return rc; pci_set_master(pdev); - mds = cxl_memdev_state_create(&pdev->dev); + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); + + mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); - cxlds->serial = pci_get_dsn(pdev); - cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); - if (!cxlds->cxl_dvsec) - dev_warn(&pdev->dev, - "Device DVSEC not present, skip CXL.mem init\n"); rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) @@ -1030,6 +960,19 @@ static void cxl_error_resume(struct pci_dev *pdev) dev->driver ? "successful" : "failed"); } +static int cxl_endpoint_decoder_clear_reset_flags(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + cxled->cxld.flags &= ~CXL_DECODER_F_RESET_MASK; + + return 0; +} + static void cxl_reset_done(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); @@ -1043,8 +986,14 @@ static void cxl_reset_done(struct pci_dev *pdev) * that no longer exists. */ guard(device)(&cxlmd->dev); + if (!cxlmd->dev.driver) + return; + if (cxlmd->endpoint && cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { + device_for_each_child(&cxlmd->endpoint->dev, NULL, + cxl_endpoint_decoder_clear_reset_flags); + dev_crit(dev, "SBR happened without memory regions removal.\n"); dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); add_taint(TAINT_USER, LOCKDEP_STILL_OK); diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index d656e4c0eb84..504f7f735ef5 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -32,6 +32,9 @@ config DEV_DAX_HMEM depends on EFI_SOFT_RESERVE select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS default DEV_DAX + depends on CXL_ACPI || !CXL_ACPI + depends on CXL_PCI || !CXL_PCI + depends on CXL_BUS || !CXL_BUS help EFI 2.8 platforms, and others, may advertise 'specific purpose' memory. For example, a high bandwidth memory pool. The @@ -48,6 +51,7 @@ config DEV_DAX_CXL tristate "CXL DAX: direct access to CXL RAM regions" depends on CXL_BUS && CXL_REGION && DEV_DAX default CXL_REGION && DEV_DAX + depends on DEV_DAX_HMEM || !DEV_DAX_HMEM help CXL RAM regions are either mapped by platform-firmware and published in the initial system-memory map as "System RAM", mapped diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 5ed5c39857c8..70e996bf1526 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-y += hmem/ obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o @@ -10,5 +11,3 @@ dax-y += bus.o device_dax-y := device.o dax_pmem-y := pmem.o dax_cxl-y := cxl.o - -obj-y += hmem/ diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index c94c09622516..68437c05e21d 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -10,6 +10,7 @@ #include "dax-private.h" #include "bus.h" +static struct resource dax_regions = DEFINE_RES_MEM_NAMED(0, -1, "DAX Regions"); static DEFINE_MUTEX(dax_bus_lock); /* @@ -627,6 +628,7 @@ static void dax_region_unregister(void *region) sysfs_remove_groups(&dax_region->dev->kobj, dax_region_attribute_groups); + release_resource(&dax_region->res); dax_region_put(dax_region); } @@ -635,6 +637,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, unsigned long flags) { struct dax_region *dax_region; + int rc; /* * The DAX core assumes that it can store its private data in @@ -667,14 +670,25 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, .flags = IORESOURCE_MEM | flags, }; - if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { - kfree(dax_region); - return NULL; + rc = request_resource(&dax_regions, &dax_region->res); + if (rc) { + dev_dbg(parent, "dax_region resource conflict for %pR\n", + &dax_region->res); + goto err_res; } + if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) + goto err_sysfs; + if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) return NULL; return dax_region; + +err_sysfs: + release_resource(&dax_region->res); +err_res: + dax_region_put(dax_region); + return NULL; } EXPORT_SYMBOL_GPL(alloc_dax_region); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index cbbf64443098..7b1a83f1ce1f 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -3,7 +3,9 @@ #ifndef __DAX_BUS_H__ #define __DAX_BUS_H__ #include +#include #include +#include struct dev_dax; struct resource; @@ -49,6 +51,24 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); bool static_dev_dax(struct dev_dax *dev_dax); +struct hmem_platform_device { + struct platform_device pdev; + struct work_struct work; + bool did_probe; +}; + +static inline struct hmem_platform_device * +to_hmem_platform_device(struct platform_device *pdev) +{ + return container_of(pdev, struct hmem_platform_device, pdev); +} + +#if IS_ENABLED(CONFIG_DEV_DAX_HMEM) +void dax_hmem_flush_work(void); +#else +static inline void dax_hmem_flush_work(void) { } +#endif + #define MODULE_ALIAS_DAX_DEVICE(type) \ MODULE_ALIAS("dax:t" __stringify(type) "*") #define DAX_DEVICE_MODALIAS_FMT "dax:t%d" diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index 13cd94d32ff7..3ab39b77843d 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -38,10 +38,36 @@ static struct cxl_driver cxl_dax_region_driver = { .id = CXL_DEVICE_DAX_REGION, .drv = { .suppress_bind_attrs = true, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; -module_cxl_driver(cxl_dax_region_driver); +static void cxl_dax_region_driver_register(struct work_struct *work) +{ + dax_hmem_flush_work(); + cxl_driver_register(&cxl_dax_region_driver); +} + +static DECLARE_WORK(cxl_dax_region_driver_work, cxl_dax_region_driver_register); + +static int __init cxl_dax_region_init(void) +{ + /* + * Need to resolve a race with dax_hmem wanting to drive regions + * instead of CXL + */ + queue_work(system_long_wq, &cxl_dax_region_driver_work); + return 0; +} +module_init(cxl_dax_region_init); + +static void __exit cxl_dax_region_exit(void) +{ + flush_work(&cxl_dax_region_driver_work); + cxl_driver_unregister(&cxl_dax_region_driver); +} +module_exit(cxl_dax_region_exit); + MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION); MODULE_DESCRIPTION("CXL DAX: direct access to CXL regions"); MODULE_LICENSE("GPL"); diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 56e3cbd181b5..d70359b4307b 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -4,6 +4,7 @@ #include #include #include +#include "../bus.h" static bool nohmem; module_param_named(disable, nohmem, bool, 0444); @@ -33,9 +34,21 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) } EXPORT_SYMBOL_GPL(walk_hmem_resources); +static void hmem_work(struct work_struct *work) +{ + /* place holder until dax_hmem driver attaches */ +} + +static struct hmem_platform_device hmem_platform = { + .pdev = { + .name = "hmem_platform", + .id = 0, + }, + .work = __WORK_INITIALIZER(hmem_platform.work, hmem_work), +}; + static void __hmem_register_resource(int target_nid, struct resource *res) { - struct platform_device *pdev; struct resource *new; int rc; @@ -51,17 +64,13 @@ static void __hmem_register_resource(int target_nid, struct resource *res) if (platform_initialized) return; - pdev = platform_device_alloc("hmem_platform", 0); - if (!pdev) { + rc = platform_device_register(&hmem_platform.pdev); + if (rc) { pr_err_once("failed to register device-dax hmem_platform device\n"); return; } - rc = platform_device_add(pdev); - if (rc) - platform_device_put(pdev); - else - platform_initialized = true; + platform_initialized = true; } void hmem_register_resource(int target_nid, struct resource *res) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 1cf7c2a0ee1c..af21f66bf872 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,6 +3,7 @@ #include #include #include +#include "../../cxl/cxl.h" #include "../bus.h" static bool region_idle; @@ -58,21 +59,22 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } -static int hmem_register_device(struct device *host, int target_nid, - const struct resource *res) +static struct workqueue_struct *dax_hmem_wq; + +void dax_hmem_flush_work(void) +{ + flush_workqueue(dax_hmem_wq); +} +EXPORT_SYMBOL_FOR_MODULES(dax_hmem_flush_work, "dax_cxl"); + +static int __hmem_register_device(struct device *host, int target_nid, + const struct resource *res) { struct platform_device *pdev; struct memregion_info info; long id; int rc; - if (IS_ENABLED(CONFIG_CXL_REGION) && - region_intersects(res->start, resource_size(res), IORESOURCE_MEM, - IORES_DESC_CXL) != REGION_DISJOINT) { - dev_dbg(host, "deferring range to CXL: %pr\n", res); - return 0; - } - rc = region_intersects_soft_reserve(res->start, resource_size(res)); if (rc != REGION_INTERSECTS) return 0; @@ -94,6 +96,7 @@ static int hmem_register_device(struct device *host, int target_nid, return -ENOMEM; } + pdev->dev.parent = host; pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -123,8 +126,74 @@ out_put: return rc; } +static int hmem_register_cxl_device(struct device *host, int target_nid, + const struct resource *res) +{ + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) == REGION_DISJOINT) + return 0; + + if (cxl_region_contains_resource(res)) { + dev_dbg(host, "CXL claims resource, dropping: %pr\n", res); + return 0; + } + + dev_dbg(host, "CXL did not claim resource, registering: %pr\n", res); + return __hmem_register_device(host, target_nid, res); +} + +static void process_defer_work(struct work_struct *w) +{ + struct hmem_platform_device *hpdev = container_of(w, typeof(*hpdev), work); + struct device *dev = &hpdev->pdev.dev; + + /* Relies on cxl_acpi and cxl_pci having had a chance to load */ + wait_for_device_probe(); + + guard(device)(dev); + if (!dev->driver) + goto out; + + if (!hpdev->did_probe) { + hpdev->did_probe = true; + walk_hmem_resources(dev, hmem_register_cxl_device); + } +out: + put_device(dev); +} + +static int hmem_register_device(struct device *host, int target_nid, + const struct resource *res) +{ + struct platform_device *pdev = to_platform_device(host); + struct hmem_platform_device *hpdev = to_hmem_platform_device(pdev); + + if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && + region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) != REGION_DISJOINT) { + if (!hpdev->did_probe) { + dev_dbg(host, "await CXL initial probe: %pr\n", res); + hpdev->work.func = process_defer_work; + get_device(host); + if (!queue_work(dax_hmem_wq, &hpdev->work)) + put_device(host); + return 0; + } + dev_dbg(host, "deferring range to CXL: %pr\n", res); + return 0; + } + + return __hmem_register_device(host, target_nid, res); +} + static int dax_hmem_platform_probe(struct platform_device *pdev) { + struct hmem_platform_device *hpdev = to_hmem_platform_device(pdev); + + /* queue is only flushed on module unload, fail rebind with pending work */ + if (work_pending(&hpdev->work)) + return -EBUSY; + return walk_hmem_resources(&pdev->dev, hmem_register_device); } @@ -139,13 +208,34 @@ static __init int dax_hmem_init(void) { int rc; + /* + * Ensure that cxl_acpi and cxl_pci have a chance to kick off + * CXL topology discovery at least once before scanning the + * iomem resource tree for IORES_DESC_CXL resources. + */ + if (IS_ENABLED(CONFIG_DEV_DAX_CXL)) { + request_module("cxl_acpi"); + request_module("cxl_pci"); + } + + dax_hmem_wq = alloc_ordered_workqueue("dax_hmem_wq", 0); + if (!dax_hmem_wq) + return -ENOMEM; + rc = platform_driver_register(&dax_hmem_platform_driver); if (rc) - return rc; + goto err_platform_driver; rc = platform_driver_register(&dax_hmem_driver); if (rc) - platform_driver_unregister(&dax_hmem_platform_driver); + goto err_driver; + + return 0; + +err_driver: + platform_driver_unregister(&dax_hmem_platform_driver); +err_platform_driver: + destroy_workqueue(dax_hmem_wq); return rc; } @@ -154,18 +244,12 @@ static __exit void dax_hmem_exit(void) { platform_driver_unregister(&dax_hmem_driver); platform_driver_unregister(&dax_hmem_platform_driver); + destroy_workqueue(dax_hmem_wq); } module_init(dax_hmem_init); module_exit(dax_hmem_exit); -/* Allow for CXL to define its own dax regions */ -#if IS_ENABLED(CONFIG_CXL_REGION) -#if IS_MODULE(CONFIG_CXL_ACPI) -MODULE_SOFTDEP("pre: cxl_acpi"); -#endif -#endif - MODULE_ALIAS("platform:hmem*"); MODULE_ALIAS("platform:hmem_platform*"); MODULE_DESCRIPTION("HMEM DAX: direct access to 'specific purpose' memory"); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 000000000000..fa7269154620 --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. */ +/* Copyright(c) 2026 Advanced Micro Devices, Inc. */ + +#ifndef __CXL_CXL_H__ +#define __CXL_CXL_H__ + +#include +#include +#include + +/** + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + +struct device; + +/* + * Using struct_group() allows for per register-block-type helper routines, + * without requiring block-type agnostic code to include the prefix. + */ +struct cxl_regs { + /* + * Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure + */ + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + /* + * Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); +}; + +struct cxl_reg_map { + bool valid; + int id; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + +/** + * struct cxl_register_map - DVSEC harvested register block mapping parameters + * @host: device for devm operations and logging + * @base: virtual base of the register-block-BAR + @block_offset + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search + * @reg_type: see enum cxl_regloc_type + * @component_map: cxl_reg_map for component registers + * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + */ +struct cxl_register_map { + struct device *host; + void __iomem *base; + resource_size_t resource; + resource_size_t max_size; + u8 reg_type; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; + }; +}; + +/** + * struct cxl_dpa_perf - DPA performance property entry + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT + * @qos_class: QoS Class cookies + */ +struct cxl_dpa_perf { + struct range dpa_range; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; + int qos_class; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +/** + * struct cxl_dev_state - The driver device state + * + * cxl_dev_state represents the CXL driver/device state. It provides an + * interface to mailbox commands as well as some cached data about the device. + * Currently only memory devices are represented. + * + * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) + * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context + */ +struct cxl_dev_state { + /* public for Type2 drivers */ + struct device *dev; + struct cxl_memdev *cxlmd; + + /* private for Type2 drivers */ + struct cxl_register_map reg_map; + struct cxl_device_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; + u64 serial; + enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif +}; + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: name of the struct cxl_dev_state member in drv_struct + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) +#endif /* __CXL_CXL_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 53d84a6874b7..2be1df80fcc9 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -11,8 +11,12 @@ ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup ldflags-y += --wrap=hmat_get_extended_linear_cache_size ldflags-y += --wrap=devm_cxl_add_dport_by_dev ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup +ldflags-y += --wrap=walk_hmem_resources +ldflags-y += --wrap=region_intersects +ldflags-y += --wrap=region_intersects_soft_reserve DRIVERS := ../../../drivers +DAX_HMEM_SRC := $(DRIVERS)/dax/hmem CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ @@ -59,7 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o -cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o +cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o $(CXL_CORE_SRC)/region_pmem.o $(CXL_CORE_SRC)/region_dax.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o @@ -70,6 +74,9 @@ cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o +obj-m += dax_hmem.o +dax_hmem-y := $(DAX_HMEM_SRC)/hmem.o + KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) obj-m += test/ diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index af50972c8b6d..c168e3c998a7 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -7,6 +7,7 @@ obj-m += cxl_mock_mem.o obj-m += cxl_translate.o cxl_test-y := cxl.o +cxl_test-y += hmem_test.o cxl_mock-y := mock.o cxl_mock_mem-y := mem.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 81e2aef3627a..418669927fb0 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -16,6 +16,7 @@ static int interleave_arithmetic; static bool extended_linear_cache; +static bool fail_autoassemble; #define FAKE_QTG_ID 42 @@ -51,6 +52,31 @@ struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; static struct platform_device *cxl_rch[NR_CXL_RCH]; static struct platform_device *cxl_rcd[NR_CXL_RCH]; +/* + * Decoder registry + * + * Record decoder programming so that the topology can be reconstructed + * after cxl_acpi unbind/bind. This allows a user-created region config + * to be replayed as if firmware had provided the region at enumeration + * time. + * + * Entries are keyed by a stable port identity (port->uport_dev) combined + * with the decoder id. Decoder state is saved at initialization and + * updated on commit and reset. + * + * On re-enumeration mock_init_hdm_decoder() consults this registry to + * restore enabled decoders. Disabled decoders are reinitialized to a + * clean default state rather than replaying stale programming. + */ +static DEFINE_XARRAY(decoder_registry); + +/* + * When set, decoder reset will not update the registry. This allows + * region destroy operations to reset live decoders without erasing + * the saved programming needed for replay after re-enumeration. + */ +static bool decoder_reset_preserve_registry; + static inline bool is_multi_bridge(struct device *dev) { int i; @@ -704,6 +730,194 @@ static int map_targets(struct device *dev, void *data) return 0; } +/* + * Build a stable registry key from the decoder's upstream port identity + * and decoder id. + * + * Decoder objects and cxl_port objects are reallocated on each enumeration, + * so their addresses cannot be used directly as replay keys. However, + * port->uport_dev is stable for a given topology across cxl_acpi unbind/bind + * in cxl_test, so use that as the port identity and pack the local decoder + * id into the low bits. + * + * The key is formed as: + * ((unsigned long)port->uport_dev << 4) | cxld->id + * + * The low bits hold the decoder id (which must fit in 4 bits) while + * the remaining bits identify the upstream port. This key is only used + * within cxl_test to locate saved decoder state during replay. + */ +static unsigned long cxld_registry_index(struct cxl_decoder *cxld) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + + dev_WARN_ONCE(&port->dev, cxld->id >= 16, + "decoder id:%d out of range\n", cxld->id); + return (((unsigned long)port->uport_dev) << 4) | cxld->id; +} + +struct cxl_test_decoder { + union { + struct cxl_switch_decoder cxlsd; + struct cxl_endpoint_decoder cxled; + }; + struct range dpa_range; +}; + +static struct cxl_test_decoder *cxld_registry_find(struct cxl_decoder *cxld) +{ + return xa_load(&decoder_registry, cxld_registry_index(cxld)); +} + +#define dbg_cxld(port, msg, cxld) \ + do { \ + struct cxl_decoder *___d = (cxld); \ + dev_dbg((port)->uport_dev, \ + "decoder%d: %s range: %#llx-%#llx iw: %d ig: %d flags: %#lx\n", \ + ___d->id, msg, ___d->hpa_range.start, \ + ___d->hpa_range.end + 1, ___d->interleave_ways, \ + ___d->interleave_granularity, ___d->flags); \ + } while (0) + +static int mock_decoder_commit(struct cxl_decoder *cxld); +static void mock_decoder_reset(struct cxl_decoder *cxld); +static void init_disabled_mock_decoder(struct cxl_decoder *cxld); + +static void cxld_copy(struct cxl_decoder *a, struct cxl_decoder *b) +{ + a->id = b->id; + a->hpa_range = b->hpa_range; + a->interleave_ways = b->interleave_ways; + a->interleave_granularity = b->interleave_granularity; + a->target_type = b->target_type; + a->flags = b->flags; + a->commit = mock_decoder_commit; + a->reset = mock_decoder_reset; +} + +/* + * Restore decoder programming saved in the registry. + * + * Only decoders that were saved enabled are restored. Disabled decoders + * are left in their default inactive state so that stale programming is + * not resurrected after topology replay. + * + * For endpoint decoders this also restores the DPA reservation needed + * to reconstruct committed mappings. + */ +static int cxld_registry_restore(struct cxl_decoder *cxld, + struct cxl_test_decoder *td) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + int rc; + + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd = + to_cxl_switch_decoder(&cxld->dev); + + if (!(td->cxlsd.cxld.flags & CXL_DECODER_F_ENABLE)) + return 0; + + dbg_cxld(port, "restore", &td->cxlsd.cxld); + cxld_copy(cxld, &td->cxlsd.cxld); + WARN_ON(cxlsd->nr_targets != td->cxlsd.nr_targets); + + /* Restore saved target intent; live dport binding happens later */ + for (int i = 0; i < cxlsd->nr_targets; i++) { + cxlsd->target[i] = NULL; + cxld->target_map[i] = td->cxlsd.cxld.target_map[i]; + } + + port->commit_end = cxld->id; + + } else { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + + if (!(td->cxled.cxld.flags & CXL_DECODER_F_ENABLE)) + return 0; + + dbg_cxld(port, "restore", &td->cxled.cxld); + cxld_copy(cxld, &td->cxled.cxld); + cxled->state = td->cxled.state; + cxled->skip = td->cxled.skip; + if (range_len(&td->dpa_range)) { + rc = devm_cxl_dpa_reserve(cxled, td->dpa_range.start, + range_len(&td->dpa_range), + td->cxled.skip); + if (rc) { + init_disabled_mock_decoder(cxld); + return rc; + } + } + port->commit_end = cxld->id; + } + + return 0; +} + +static void __cxld_registry_save(struct cxl_test_decoder *td, + struct cxl_decoder *cxld) +{ + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd = + to_cxl_switch_decoder(&cxld->dev); + + cxld_copy(&td->cxlsd.cxld, cxld); + td->cxlsd.nr_targets = cxlsd->nr_targets; + + /* Save target port_id as a stable identify for the dport */ + for (int i = 0; i < cxlsd->nr_targets; i++) { + struct cxl_dport *dport; + + if (!cxlsd->target[i]) + continue; + + dport = cxlsd->target[i]; + td->cxlsd.cxld.target_map[i] = dport->port_id; + } + } else { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + + cxld_copy(&td->cxled.cxld, cxld); + td->cxled.state = cxled->state; + td->cxled.skip = cxled->skip; + + if (!(cxld->flags & CXL_DECODER_F_ENABLE)) { + td->dpa_range.start = 0; + td->dpa_range.end = -1; + } else if (cxled->dpa_res) { + td->dpa_range.start = cxled->dpa_res->start; + td->dpa_range.end = cxled->dpa_res->end; + } else { + td->dpa_range.start = 0; + td->dpa_range.end = -1; + } + } +} + +static void cxld_registry_save(struct cxl_test_decoder *td, + struct cxl_decoder *cxld) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + + dbg_cxld(port, "save", cxld); + __cxld_registry_save(td, cxld); +} + +static void cxld_registry_update(struct cxl_decoder *cxld) +{ + struct cxl_test_decoder *td = cxld_registry_find(cxld); + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + + if (WARN_ON_ONCE(!td)) + return; + + dbg_cxld(port, "update", cxld); + __cxld_registry_save(td, cxld); +} + static int mock_decoder_commit(struct cxl_decoder *cxld) { struct cxl_port *port = to_cxl_port(cxld->dev.parent); @@ -723,6 +937,13 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) port->commit_end++; cxld->flags |= CXL_DECODER_F_ENABLE; + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + + cxled->state = CXL_DECODER_STATE_AUTO; + } + cxld_registry_update(cxld); return 0; } @@ -743,6 +964,65 @@ static void mock_decoder_reset(struct cxl_decoder *cxld) "%s: out of order reset, expected decoder%d.%d\n", dev_name(&cxld->dev), port->id, port->commit_end); cxld->flags &= ~CXL_DECODER_F_ENABLE; + + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + + cxled->state = CXL_DECODER_STATE_MANUAL; + cxled->skip = 0; + } + if (decoder_reset_preserve_registry) + dev_dbg(port->uport_dev, "decoder%d: skip registry update\n", + cxld->id); + else + cxld_registry_update(cxld); +} + +static struct cxl_test_decoder *cxld_registry_new(struct cxl_decoder *cxld) +{ + struct cxl_test_decoder *td __free(kfree) = + kzalloc(sizeof(*td), GFP_KERNEL); + unsigned long key = cxld_registry_index(cxld); + + if (!td) + return NULL; + + if (xa_insert(&decoder_registry, key, td, GFP_KERNEL)) { + WARN_ON(1); + return NULL; + } + + cxld_registry_save(td, cxld); + return no_free_ptr(td); +} + +static void init_disabled_mock_decoder(struct cxl_decoder *cxld) +{ + cxld->hpa_range.start = 0; + cxld->hpa_range.end = -1; + cxld->interleave_ways = 1; + cxld->interleave_granularity = 0; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + cxld->flags = 0; + cxld->commit = mock_decoder_commit; + cxld->reset = mock_decoder_reset; + + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd = + to_cxl_switch_decoder(&cxld->dev); + + for (int i = 0; i < cxlsd->nr_targets; i++) { + cxlsd->target[i] = NULL; + cxld->target_map[i] = 0; + } + } else { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + + cxled->state = CXL_DECODER_STATE_MANUAL; + cxled->skip = 0; + } } static void default_mock_decoder(struct cxl_decoder *cxld) @@ -757,6 +1037,8 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->commit = mock_decoder_commit; cxld->reset = mock_decoder_reset; + + WARN_ON_ONCE(!cxld_registry_new(cxld)); } static int first_decoder(struct device *dev, const void *data) @@ -771,13 +1053,29 @@ static int first_decoder(struct device *dev, const void *data) return 0; } -static void mock_init_hdm_decoder(struct cxl_decoder *cxld) +/* + * Initialize a decoder during HDM enumeration. + * + * If a saved registry entry exists: + * - enabled decoders are restored from the saved programming + * - disabled decoders are initialized in a clean disabled state + * + * If no registry entry exists the decoder follows the normal mock + * initialization path, including the special auto-region setup for + * the first endpoints under host-bridge0. + * + * Returns true if decoder state was restored from the registry. In + * that case the saved decode configuration (including target mapping) + * has already been applied and the map_targets() is skipped. + */ +static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) { struct acpi_cedt_cfmws *window = mock_cfmws[0]; struct platform_device *pdev = NULL; struct cxl_endpoint_decoder *cxled; struct cxl_switch_decoder *cxlsd; struct cxl_port *port, *iter; + struct cxl_test_decoder *td; struct cxl_memdev *cxlmd; struct cxl_dport *dport; struct device *dev; @@ -804,6 +1102,24 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) port = NULL; } while (port); port = cxled_to_port(cxled); + } else { + port = to_cxl_port(cxld->dev.parent); + } + + td = cxld_registry_find(cxld); + if (td) { + bool enabled; + + if (is_switch_decoder(&cxld->dev)) + enabled = td->cxlsd.cxld.flags & CXL_DECODER_F_ENABLE; + else + enabled = td->cxled.cxld.flags & CXL_DECODER_F_ENABLE; + + if (enabled) + return !cxld_registry_restore(cxld, td); + + init_disabled_mock_decoder(cxld); + return false; } /* @@ -814,9 +1130,16 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) * * See 'cxl list -BMPu -m cxl_mem.0,cxl_mem.4' */ - if (!hb0 || pdev->id % 4 || pdev->id > 4 || cxld->id > 0) { + if (!is_endpoint_decoder(&cxld->dev) || !hb0 || pdev->id % 4 || + pdev->id > 4 || cxld->id > 0) { default_mock_decoder(cxld); - return; + return false; + } + + /* Simulate missing cxl_mem.4 configuration */ + if (hb0 && pdev->id == 4 && cxld->id == 0 && fail_autoassemble) { + default_mock_decoder(cxld); + return false; } base = window->base_hpa; @@ -838,6 +1161,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->commit = mock_decoder_commit; cxld->reset = mock_decoder_reset; + WARN_ON_ONCE(!cxld_registry_new(cxld)); /* * Now that endpoint decoder is set up, walk up the hierarchy * and setup the switch and root port decoders targeting @cxlmd. @@ -859,14 +1183,14 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) /* put cxl_mem.4 second in the decode order */ if (pdev->id == 4) { cxlsd->target[1] = dport; - cxld->target_map[1] = dport->port_id; + cxlsd->cxld.target_map[1] = dport->port_id; } else { cxlsd->target[0] = dport; - cxld->target_map[0] = dport->port_id; + cxlsd->cxld.target_map[0] = dport->port_id; } } else { cxlsd->target[0] = dport; - cxld->target_map[0] = dport->port_id; + cxlsd->cxld.target_map[0] = dport->port_id; } cxld = &cxlsd->cxld; cxld->target_type = CXL_DECODER_HOSTONLYMEM; @@ -885,8 +1209,14 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) .start = base, .end = base + mock_auto_region_size - 1, }; + cxld->commit = mock_decoder_commit; + cxld->reset = mock_decoder_reset; + + cxld_registry_update(cxld); put_device(dev); } + + return false; } static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, @@ -895,6 +1225,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_port *port = cxlhdm->port; struct cxl_port *parent_port = to_cxl_port(port->dev.parent); int target_count, i; + bool restored; if (is_cxl_endpoint(port)) target_count = 0; @@ -934,10 +1265,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } ctx.target_map = cxld->target_map; - - mock_init_hdm_decoder(cxld); - - if (target_count) { + restored = mock_init_hdm_decoder(cxld); + if (target_count && !restored) { rc = device_for_each_child(port->uport_dev, &ctx, map_targets); if (rc) { @@ -1114,6 +1443,53 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_endpoint_get_perf_coordinates(port, ep_c); } +/* + * Simulate that the first half of mock CXL Window 0 is "Soft Reserve" capacity + */ +static int mock_walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + dev_dbg(host, "walk cxl_test resource: %pr\n", &window); + return fn(host, 0, &window); +} + +/* + * This should only be called by the dax_hmem case, treat mismatches (negative + * result) as "fallback to base region_intersects()". Simulate that the first + * half of mock CXL Window 0 is IORES_DESC_CXL capacity. + */ +static int mock_region_intersects(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc) +{ + struct resource res = DEFINE_RES_MEM(start, size); + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + if (resource_overlaps(&res, &window)) + return REGION_INTERSECTS; + pr_debug("warning: no cxl_test CXL intersection for %pr\n", &res); + return -1; +} + + +static int +mock_region_intersects_soft_reserve(resource_size_t start, size_t size) +{ + struct resource res = DEFINE_RES_MEM(start, size); + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + if (resource_overlaps(&res, &window)) + return REGION_INTERSECTS; + pr_debug("warning: no cxl_test soft reserve intersection for %pr\n", &res); + return -1; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -1129,6 +1505,9 @@ static struct cxl_mock_ops cxl_mock_ops = { .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, .hmat_get_extended_linear_cache_size = mock_hmat_get_extended_linear_cache_size, + .walk_hmem_resources = mock_walk_hmem_resources, + .region_intersects = mock_region_intersects, + .region_intersects_soft_reserve = mock_region_intersects_soft_reserve, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -1415,6 +1794,33 @@ err_mem: return rc; } +static ssize_t +decoder_reset_preserve_registry_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", decoder_reset_preserve_registry); +} + +static ssize_t +decoder_reset_preserve_registry_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + + rc = kstrtobool(buf, &decoder_reset_preserve_registry); + if (rc) + return rc; + return count; +} + +static DEVICE_ATTR_RW(decoder_reset_preserve_registry); + +static struct attribute *cxl_acpi_attrs[] = { + &dev_attr_decoder_reset_preserve_registry.attr, NULL +}; +ATTRIBUTE_GROUPS(cxl_acpi); + static __init int cxl_test_init(void) { int rc, i; @@ -1545,6 +1951,7 @@ static __init int cxl_test_init(void) mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; + cxl_acpi->dev.groups = cxl_acpi_groups; rc = platform_device_add(cxl_acpi); if (rc) @@ -1554,8 +1961,14 @@ static __init int cxl_test_init(void) if (rc) goto err_root; + rc = hmem_test_init(); + if (rc) + goto err_mem; + return 0; +err_mem: + cxl_mem_exit(); err_root: platform_device_put(cxl_acpi); err_rch: @@ -1589,10 +2002,22 @@ err_gen_pool_create: return rc; } +static void free_decoder_registry(void) +{ + unsigned long index; + void *entry; + + xa_for_each(&decoder_registry, index, entry) { + xa_erase(&decoder_registry, index); + kfree(entry); + } +} + static __exit void cxl_test_exit(void) { int i; + hmem_test_exit(); cxl_mem_exit(); platform_device_unregister(cxl_acpi); cxl_rch_topo_exit(); @@ -1614,12 +2039,16 @@ static __exit void cxl_test_exit(void) depopulate_all_mock_resources(); gen_pool_destroy(cxl_mock_pool); unregister_cxl_mock_ops(&cxl_mock_ops); + free_decoder_registry(); + xa_destroy(&decoder_registry); } module_param(interleave_arithmetic, int, 0444); MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_param(extended_linear_cache, bool, 0444); MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support"); +module_param(fail_autoassemble, bool, 0444); +MODULE_PARM_DESC(fail_autoassemble, "Simulate missing member of an auto-region"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/hmem_test.c b/tools/testing/cxl/test/hmem_test.c new file mode 100644 index 000000000000..3a1a089e1721 --- /dev/null +++ b/tools/testing/cxl/test/hmem_test.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2026 Intel Corporation */ +#include +#include +#include "../../../drivers/dax/bus.h" + +static bool hmem_test; + +static void hmem_test_work(struct work_struct *work) +{ +} + +static void hmem_test_release(struct device *dev) +{ + struct hmem_platform_device *hpdev = + container_of(dev, typeof(*hpdev), pdev.dev); + + memset(hpdev, 0, sizeof(*hpdev)); +} + +static struct hmem_platform_device hmem_test_device = { + .pdev = { + .name = "hmem_platform", + .id = 1, + .dev = { + .release = hmem_test_release, + }, + }, + .work = __WORK_INITIALIZER(hmem_test_device.work, hmem_test_work), +}; + +int hmem_test_init(void) +{ + if (!hmem_test) + return 0; + + return platform_device_register(&hmem_test_device.pdev); +} + +void hmem_test_exit(void) +{ + if (hmem_test) + platform_device_unregister(&hmem_test_device.pdev); +} + +module_param(hmem_test, bool, 0444); +MODULE_PARM_DESC(hmem_test, "Enable/disable the dax_hmem test platform device"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index cb87e8c0e63c..271c7ad8cc32 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1695,6 +1695,9 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_dpa_info range_info = { 0 }; int rc; + /* Increase async probe race window */ + usleep_range(500*1000, 1000*1000); + mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); if (!mdata) return -ENOMEM; @@ -1716,7 +1719,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mds = cxl_memdev_state_create(dev); + mds = cxl_memdev_state_create(dev, pdev->id + 1, 0); if (IS_ERR(mds)) return PTR_ERR(mds); @@ -1732,7 +1735,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index b8fcb50c1027..6454b868b122 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -251,6 +251,56 @@ struct cxl_dport *__wrap_devm_cxl_add_dport_by_dev(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_dport_by_dev, "CXL"); +int __wrap_region_intersects(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc) +{ + int rc = -1; + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->region_intersects(start, size, flags, desc); + if (rc < 0) + rc = region_intersects(start, size, flags, desc); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_region_intersects); + +int __wrap_region_intersects_soft_reserve(resource_size_t start, size_t size) +{ + int rc = -1; + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->region_intersects_soft_reserve(start, size); + if (rc < 0) + rc = region_intersects_soft_reserve(start, size); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_region_intersects_soft_reserve); + +int __wrap_walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + int index, rc = 0; + bool is_mock = strcmp(dev_name(host), "hmem_platform.1") == 0; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (is_mock) { + if (ops) + rc = ops->walk_hmem_resources(host, fn); + } else { + rc = walk_hmem_resources(host, fn); + } + put_cxl_mock_ops(index); + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_walk_hmem_resources); + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 2684b89c8aa2..4f57dc80ae7d 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -2,6 +2,7 @@ #include #include +#include #include struct cxl_mock_ops { @@ -27,8 +28,15 @@ struct cxl_mock_ops { int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res, int nid, resource_size_t *cache_size); + int (*walk_hmem_resources)(struct device *host, walk_hmem_fn fn); + int (*region_intersects)(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc); + int (*region_intersects_soft_reserve)(resource_size_t start, + size_t size); }; +int hmem_test_init(void); +void hmem_test_exit(void); void register_cxl_mock_ops(struct cxl_mock_ops *ops); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops); struct cxl_mock_ops *get_cxl_mock_ops(int *index);