Merge tag 'edac_updates_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - imh_edac: Add a new EDAC driver for Intel Diamond Rapids and future
   incarnations of this memory controllers architecture

 - amd64_edac: Remove the legacy csrow sysfs interface which has been
   deprecated and unused (we assume) for at least a decade

 - Add the capability to fallback to BIOS-provided address translation
   functionality (ACPI PRM) which can be used on systems unsupported by
   the current AMD address translation library

 - The usual fixes, fixlets, cleanups and improvements all over the
   place

* tag 'edac_updates_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  RAS/AMD/ATL: Replace bitwise_xor_bits() with hweight16()
  EDAC/igen6: Fix error handling in igen6_edac driver
  EDAC/imh: Setup 'imh_test' debugfs testing node
  EDAC/{skx_comm,imh}: Detect 2-level memory configuration
  EDAC/skx_common: Extend the maximum number of DRAM chip row bits
  EDAC/{skx_common,imh}: Add EDAC driver for Intel Diamond Rapids servers
  EDAC/skx_common: Prepare for skx_set_hi_lo()
  EDAC/skx_common: Prepare for skx_get_edac_list()
  EDAC/{skx_common,skx,i10nm}: Make skx_register_mci() independent of pci_dev
  EDAC/ghes: Replace deprecated strcpy() in ghes_edac_report_mem_error()
  EDAC/ie31200: Fix error handling in ie31200_register_mci
  RAS/CEC: Replace use of system_wq with system_percpu_wq
  EDAC: Remove the legacy EDAC sysfs interface
  EDAC/amd64: Remove NUM_CONTROLLERS macro
  EDAC/amd64: Generate ctl_name string at runtime
  RAS/AMD/ATL: Require PRM support for future systems
  ACPI: PRM: Add acpi_prm_handler_available()
  RAS/AMD/ATL: Return error codes from helper functions
This commit is contained in:
Linus Torvalds
2025-12-02 10:45:50 -08:00
23 changed files with 796 additions and 673 deletions

View File

@@ -23,14 +23,6 @@ menuconfig EDAC
if EDAC
config EDAC_LEGACY_SYSFS
bool "EDAC legacy sysfs"
default y
help
Enable the compatibility sysfs nodes.
Use 'Y' if your edac utilities aren't ported to work with the newer
structures.
config EDAC_DEBUG
bool "Debugging"
select DEBUG_FS
@@ -291,6 +283,18 @@ config EDAC_I10NM
system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT.
config EDAC_IMH
tristate "Intel Integrated Memory/IO Hub MC"
depends on X86_64 && X86_MCE_INTEL && ACPI
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_IMH can't be y
select DMI
select ACPI_ADXL
help
Support for error detection and correction the Intel
Integrated Memory/IO Hub Memory Controller. This MC IP is
first used on the Diamond Rapids servers but may appear on
others in the future.
config EDAC_PND2
tristate "Intel Pondicherry2"
depends on PCI && X86_64 && X86_MCE_INTEL

View File

@@ -65,6 +65,9 @@ obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o
i10nm_edac-y := i10nm_base.o
obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o
imh_edac-y := imh_base.o
obj-$(CONFIG_EDAC_IMH) += imh_edac.o skx_edac_common.o
obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o

View File

@@ -3732,6 +3732,7 @@ static void hw_info_put(struct amd64_pvt *pvt)
pci_dev_put(pvt->F1);
pci_dev_put(pvt->F2);
kfree(pvt->umc);
kfree(pvt->csels);
}
static struct low_ops umc_ops = {
@@ -3766,6 +3767,7 @@ static int per_family_init(struct amd64_pvt *pvt)
pvt->stepping = boot_cpu_data.x86_stepping;
pvt->model = boot_cpu_data.x86_model;
pvt->fam = boot_cpu_data.x86;
char *tmp_name = NULL;
pvt->max_mcs = 2;
/*
@@ -3779,7 +3781,7 @@ static int per_family_init(struct amd64_pvt *pvt)
switch (pvt->fam) {
case 0xf:
pvt->ctl_name = (pvt->ext_model >= K8_REV_F) ?
tmp_name = (pvt->ext_model >= K8_REV_F) ?
"K8 revF or later" : "K8 revE or earlier";
pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP;
pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL;
@@ -3788,7 +3790,6 @@ static int per_family_init(struct amd64_pvt *pvt)
break;
case 0x10:
pvt->ctl_name = "F10h";
pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP;
pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM;
pvt->ops->dbam_to_cs = f10_dbam_to_chip_select;
@@ -3797,12 +3798,10 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x15:
switch (pvt->model) {
case 0x30:
pvt->ctl_name = "F15h_M30h";
pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1;
pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2;
break;
case 0x60:
pvt->ctl_name = "F15h_M60h";
pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1;
pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2;
pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select;
@@ -3811,7 +3810,6 @@ static int per_family_init(struct amd64_pvt *pvt)
/* Richland is only client */
return -ENODEV;
default:
pvt->ctl_name = "F15h";
pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1;
pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2;
pvt->ops->dbam_to_cs = f15_dbam_to_chip_select;
@@ -3822,12 +3820,10 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x16:
switch (pvt->model) {
case 0x30:
pvt->ctl_name = "F16h_M30h";
pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1;
pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2;
break;
default:
pvt->ctl_name = "F16h";
pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1;
pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2;
break;
@@ -3836,76 +3832,51 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x17:
switch (pvt->model) {
case 0x10 ... 0x2f:
pvt->ctl_name = "F17h_M10h";
break;
case 0x30 ... 0x3f:
pvt->ctl_name = "F17h_M30h";
pvt->max_mcs = 8;
break;
case 0x60 ... 0x6f:
pvt->ctl_name = "F17h_M60h";
break;
case 0x70 ... 0x7f:
pvt->ctl_name = "F17h_M70h";
break;
default:
pvt->ctl_name = "F17h";
break;
}
break;
case 0x18:
pvt->ctl_name = "F18h";
break;
case 0x19:
switch (pvt->model) {
case 0x00 ... 0x0f:
pvt->ctl_name = "F19h";
pvt->max_mcs = 8;
break;
case 0x10 ... 0x1f:
pvt->ctl_name = "F19h_M10h";
pvt->max_mcs = 12;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x20 ... 0x2f:
pvt->ctl_name = "F19h_M20h";
break;
case 0x30 ... 0x3f:
if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
pvt->ctl_name = "MI200";
tmp_name = "MI200";
pvt->max_mcs = 4;
pvt->dram_type = MEM_HBM2;
pvt->gpu_umc_base = 0x50000;
pvt->ops = &gpu_ops;
} else {
pvt->ctl_name = "F19h_M30h";
pvt->max_mcs = 8;
}
break;
case 0x50 ... 0x5f:
pvt->ctl_name = "F19h_M50h";
break;
case 0x60 ... 0x6f:
pvt->ctl_name = "F19h_M60h";
pvt->flags.zn_regs_v2 = 1;
break;
case 0x70 ... 0x7f:
pvt->ctl_name = "F19h_M70h";
pvt->max_mcs = 4;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x90 ... 0x9f:
pvt->ctl_name = "F19h_M90h";
pvt->max_mcs = 4;
pvt->dram_type = MEM_HBM3;
pvt->gpu_umc_base = 0x90000;
pvt->ops = &gpu_ops;
break;
case 0xa0 ... 0xaf:
pvt->ctl_name = "F19h_MA0h";
pvt->max_mcs = 12;
pvt->flags.zn_regs_v2 = 1;
break;
@@ -3915,34 +3886,22 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x1A:
switch (pvt->model) {
case 0x00 ... 0x1f:
pvt->ctl_name = "F1Ah";
pvt->max_mcs = 12;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x40 ... 0x4f:
pvt->ctl_name = "F1Ah_M40h";
pvt->flags.zn_regs_v2 = 1;
break;
case 0x50 ... 0x57:
pvt->ctl_name = "F1Ah_M50h";
case 0xc0 ... 0xc7:
pvt->max_mcs = 16;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x90 ... 0x9f:
pvt->ctl_name = "F1Ah_M90h";
pvt->max_mcs = 8;
pvt->flags.zn_regs_v2 = 1;
break;
case 0xa0 ... 0xaf:
pvt->ctl_name = "F1Ah_MA0h";
pvt->max_mcs = 8;
pvt->flags.zn_regs_v2 = 1;
break;
case 0xc0 ... 0xc7:
pvt->ctl_name = "F1Ah_MC0h";
pvt->max_mcs = 16;
pvt->flags.zn_regs_v2 = 1;
break;
}
break;
@@ -3951,6 +3910,16 @@ static int per_family_init(struct amd64_pvt *pvt)
return -ENODEV;
}
if (tmp_name)
scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), tmp_name);
else
scnprintf(pvt->ctl_name, sizeof(pvt->ctl_name), "F%02Xh_M%02Xh",
pvt->fam, pvt->model);
pvt->csels = kcalloc(pvt->max_mcs, sizeof(*pvt->csels), GFP_KERNEL);
if (!pvt->csels)
return -ENOMEM;
return 0;
}

View File

@@ -96,11 +96,12 @@
/* Hardware limit on ChipSelect rows per MC and processors per system */
#define NUM_CHIPSELECTS 8
#define DRAM_RANGES 8
#define NUM_CONTROLLERS 16
#define ON true
#define OFF false
#define MAX_CTL_NAMELEN 19
/*
* PCI-defined configuration space registers
*/
@@ -346,7 +347,7 @@ struct amd64_pvt {
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
/* one for each DCT/UMC */
struct chip_select csels[NUM_CONTROLLERS];
struct chip_select *csels;
/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
struct dram_range ranges[DRAM_RANGES];
@@ -362,7 +363,7 @@ struct amd64_pvt {
/* x4, x8, or x16 syndromes in use */
u8 ecc_sym_sz;
const char *ctl_name;
char ctl_name[MAX_CTL_NAMELEN];
u16 f1_id, f2_id;
/* Maximum number of memory controllers per die/node. */
u8 max_mcs;

View File

@@ -115,401 +115,6 @@ static const char * const edac_caps[] = {
[EDAC_S16ECD16ED] = "S16ECD16ED"
};
#ifdef CONFIG_EDAC_LEGACY_SYSFS
/*
* EDAC sysfs CSROW data structures and methods
*/
#define to_csrow(k) container_of(k, struct csrow_info, dev)
/*
* We need it to avoid namespace conflicts between the legacy API
* and the per-dimm/per-rank one
*/
#define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \
static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store)
struct dev_ch_attribute {
struct device_attribute attr;
unsigned int channel;
};
#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
static struct dev_ch_attribute dev_attr_legacy_##_name = \
{ __ATTR(_name, _mode, _show, _store), (_var) }
#define to_channel(k) (container_of(k, struct dev_ch_attribute, attr)->channel)
/* Set of more default csrow<id> attribute show/store functions */
static ssize_t csrow_ue_count_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
return sysfs_emit(data, "%u\n", csrow->ue_count);
}
static ssize_t csrow_ce_count_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
return sysfs_emit(data, "%u\n", csrow->ce_count);
}
static ssize_t csrow_size_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
int i;
u32 nr_pages = 0;
for (i = 0; i < csrow->nr_channels; i++)
nr_pages += csrow->channels[i]->dimm->nr_pages;
return sysfs_emit(data, "%u\n", PAGES_TO_MiB(nr_pages));
}
static ssize_t csrow_mem_type_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
return sysfs_emit(data, "%s\n", edac_mem_types[csrow->channels[0]->dimm->mtype]);
}
static ssize_t csrow_dev_type_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
return sysfs_emit(data, "%s\n", dev_types[csrow->channels[0]->dimm->dtype]);
}
static ssize_t csrow_edac_mode_show(struct device *dev,
struct device_attribute *mattr,
char *data)
{
struct csrow_info *csrow = to_csrow(dev);
return sysfs_emit(data, "%s\n", edac_caps[csrow->channels[0]->dimm->edac_mode]);
}
/* show/store functions for DIMM Label attributes */
static ssize_t channel_dimm_label_show(struct device *dev,
struct device_attribute *mattr,
char *data)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
/* if field has not been initialized, there is nothing to send */
if (!rank->dimm->label[0])
return 0;
return sysfs_emit(data, "%s\n", rank->dimm->label);
}
static ssize_t channel_dimm_label_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
size_t copy_count = count;
if (count == 0)
return -EINVAL;
if (data[count - 1] == '\0' || data[count - 1] == '\n')
copy_count -= 1;
if (copy_count == 0 || copy_count >= sizeof(rank->dimm->label))
return -EINVAL;
memcpy(rank->dimm->label, data, copy_count);
rank->dimm->label[copy_count] = '\0';
return count;
}
/* show function for dynamic chX_ce_count attribute */
static ssize_t channel_ce_count_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
return sysfs_emit(data, "%u\n", rank->ce_count);
}
/* cwrow<id>/attribute files */
DEVICE_ATTR_LEGACY(size_mb, S_IRUGO, csrow_size_show, NULL);
DEVICE_ATTR_LEGACY(dev_type, S_IRUGO, csrow_dev_type_show, NULL);
DEVICE_ATTR_LEGACY(mem_type, S_IRUGO, csrow_mem_type_show, NULL);
DEVICE_ATTR_LEGACY(edac_mode, S_IRUGO, csrow_edac_mode_show, NULL);
DEVICE_ATTR_LEGACY(ue_count, S_IRUGO, csrow_ue_count_show, NULL);
DEVICE_ATTR_LEGACY(ce_count, S_IRUGO, csrow_ce_count_show, NULL);
/* default attributes of the CSROW<id> object */
static struct attribute *csrow_attrs[] = {
&dev_attr_legacy_dev_type.attr,
&dev_attr_legacy_mem_type.attr,
&dev_attr_legacy_edac_mode.attr,
&dev_attr_legacy_size_mb.attr,
&dev_attr_legacy_ue_count.attr,
&dev_attr_legacy_ce_count.attr,
NULL,
};
static const struct attribute_group csrow_attr_grp = {
.attrs = csrow_attrs,
};
static const struct attribute_group *csrow_attr_groups[] = {
&csrow_attr_grp,
NULL
};
static const struct device_type csrow_attr_type = {
.groups = csrow_attr_groups,
};
/*
* possible dynamic channel DIMM Label attribute files
*
*/
DEVICE_CHANNEL(ch0_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 0);
DEVICE_CHANNEL(ch1_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 1);
DEVICE_CHANNEL(ch2_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 2);
DEVICE_CHANNEL(ch3_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 3);
DEVICE_CHANNEL(ch4_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 4);
DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 5);
DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 6);
DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 7);
DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 8);
DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 9);
DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 10);
DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 11);
DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 12);
DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 13);
DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 14);
DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
channel_dimm_label_show, channel_dimm_label_store, 15);
/* Total possible dynamic DIMM Label attribute file table */
static struct attribute *dynamic_csrow_dimm_attr[] = {
&dev_attr_legacy_ch0_dimm_label.attr.attr,
&dev_attr_legacy_ch1_dimm_label.attr.attr,
&dev_attr_legacy_ch2_dimm_label.attr.attr,
&dev_attr_legacy_ch3_dimm_label.attr.attr,
&dev_attr_legacy_ch4_dimm_label.attr.attr,
&dev_attr_legacy_ch5_dimm_label.attr.attr,
&dev_attr_legacy_ch6_dimm_label.attr.attr,
&dev_attr_legacy_ch7_dimm_label.attr.attr,
&dev_attr_legacy_ch8_dimm_label.attr.attr,
&dev_attr_legacy_ch9_dimm_label.attr.attr,
&dev_attr_legacy_ch10_dimm_label.attr.attr,
&dev_attr_legacy_ch11_dimm_label.attr.attr,
&dev_attr_legacy_ch12_dimm_label.attr.attr,
&dev_attr_legacy_ch13_dimm_label.attr.attr,
&dev_attr_legacy_ch14_dimm_label.attr.attr,
&dev_attr_legacy_ch15_dimm_label.attr.attr,
NULL
};
/* possible dynamic channel ce_count attribute files */
DEVICE_CHANNEL(ch0_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 0);
DEVICE_CHANNEL(ch1_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 1);
DEVICE_CHANNEL(ch2_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 2);
DEVICE_CHANNEL(ch3_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 3);
DEVICE_CHANNEL(ch4_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 4);
DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 5);
DEVICE_CHANNEL(ch6_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 6);
DEVICE_CHANNEL(ch7_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 7);
DEVICE_CHANNEL(ch8_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 8);
DEVICE_CHANNEL(ch9_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 9);
DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 10);
DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 11);
DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 12);
DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 13);
DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 14);
DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
channel_ce_count_show, NULL, 15);
/* Total possible dynamic ce_count attribute file table */
static struct attribute *dynamic_csrow_ce_count_attr[] = {
&dev_attr_legacy_ch0_ce_count.attr.attr,
&dev_attr_legacy_ch1_ce_count.attr.attr,
&dev_attr_legacy_ch2_ce_count.attr.attr,
&dev_attr_legacy_ch3_ce_count.attr.attr,
&dev_attr_legacy_ch4_ce_count.attr.attr,
&dev_attr_legacy_ch5_ce_count.attr.attr,
&dev_attr_legacy_ch6_ce_count.attr.attr,
&dev_attr_legacy_ch7_ce_count.attr.attr,
&dev_attr_legacy_ch8_ce_count.attr.attr,
&dev_attr_legacy_ch9_ce_count.attr.attr,
&dev_attr_legacy_ch10_ce_count.attr.attr,
&dev_attr_legacy_ch11_ce_count.attr.attr,
&dev_attr_legacy_ch12_ce_count.attr.attr,
&dev_attr_legacy_ch13_ce_count.attr.attr,
&dev_attr_legacy_ch14_ce_count.attr.attr,
&dev_attr_legacy_ch15_ce_count.attr.attr,
NULL
};
static umode_t csrow_dev_is_visible(struct kobject *kobj,
struct attribute *attr, int idx)
{
struct device *dev = kobj_to_dev(kobj);
struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
if (idx >= csrow->nr_channels)
return 0;
if (idx >= ARRAY_SIZE(dynamic_csrow_ce_count_attr) - 1) {
WARN_ONCE(1, "idx: %d\n", idx);
return 0;
}
/* Only expose populated DIMMs */
if (!csrow->channels[idx]->dimm->nr_pages)
return 0;
return attr->mode;
}
static const struct attribute_group csrow_dev_dimm_group = {
.attrs = dynamic_csrow_dimm_attr,
.is_visible = csrow_dev_is_visible,
};
static const struct attribute_group csrow_dev_ce_count_group = {
.attrs = dynamic_csrow_ce_count_attr,
.is_visible = csrow_dev_is_visible,
};
static const struct attribute_group *csrow_dev_groups[] = {
&csrow_dev_dimm_group,
&csrow_dev_ce_count_group,
NULL
};
static void csrow_release(struct device *dev)
{
/*
* Nothing to do, just unregister sysfs here. The mci
* device owns the data and will also release it.
*/
}
static inline int nr_pages_per_csrow(struct csrow_info *csrow)
{
int chan, nr_pages = 0;
for (chan = 0; chan < csrow->nr_channels; chan++)
nr_pages += csrow->channels[chan]->dimm->nr_pages;
return nr_pages;
}
/* Create a CSROW object under specified edac_mc_device */
static int edac_create_csrow_object(struct mem_ctl_info *mci,
struct csrow_info *csrow, int index)
{
int err;
csrow->dev.type = &csrow_attr_type;
csrow->dev.groups = csrow_dev_groups;
csrow->dev.release = csrow_release;
device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev;
csrow->mci = mci;
dev_set_name(&csrow->dev, "csrow%d", index);
dev_set_drvdata(&csrow->dev, csrow);
err = device_add(&csrow->dev);
if (err) {
edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev));
put_device(&csrow->dev);
return err;
}
edac_dbg(0, "device %s created\n", dev_name(&csrow->dev));
return 0;
}
/* Create a CSROW object under specified edac_mc_device */
static int edac_create_csrow_objects(struct mem_ctl_info *mci)
{
int err, i;
struct csrow_info *csrow;
for (i = 0; i < mci->nr_csrows; i++) {
csrow = mci->csrows[i];
if (!nr_pages_per_csrow(csrow))
continue;
err = edac_create_csrow_object(mci, mci->csrows[i], i);
if (err < 0)
goto error;
}
return 0;
error:
for (--i; i >= 0; i--) {
if (device_is_registered(&mci->csrows[i]->dev))
device_unregister(&mci->csrows[i]->dev);
}
return err;
}
static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
{
int i;
for (i = 0; i < mci->nr_csrows; i++) {
if (device_is_registered(&mci->csrows[i]->dev))
device_unregister(&mci->csrows[i]->dev);
}
}
#endif
/*
* Per-dimm (or per-rank) devices
*/
@@ -989,12 +594,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
goto fail;
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
err = edac_create_csrow_objects(mci);
if (err < 0)
goto fail;
#endif
edac_create_debugfs_nodes(mci);
return 0;
@@ -1019,9 +618,6 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
#ifdef CONFIG_EDAC_DEBUG
edac_debugfs_remove_recursive(mci->debugfs);
#endif
#ifdef CONFIG_EDAC_LEGACY_SYSFS
edac_delete_csrow_objects(mci);
#endif
mci_for_each_dimm(mci, dimm) {
if (!device_is_registered(&dimm->dev))

View File

@@ -15,6 +15,7 @@
#include "edac_module.h"
#include <ras/ras_event.h>
#include <linux/notifier.h>
#include <linux/string.h>
#define OTHER_DETAIL_LEN 400
@@ -332,7 +333,7 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb,
p = pvt->msg;
p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
} else {
strcpy(pvt->msg, "unknown error");
strscpy(pvt->msg, "unknown error");
}
/* Error address */
@@ -357,14 +358,14 @@ static int ghes_edac_report_mem_error(struct notifier_block *nb,
dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
if (dimm) {
e->top_layer = dimm->idx;
strcpy(e->label, dimm->label);
strscpy(e->label, dimm->label);
}
}
if (p > e->location)
*(p - 1) = '\0';
if (!*e->label)
strcpy(e->label, "unknown memory");
strscpy(e->label, "unknown memory");
/* All other fields are mapped on e->other_detail */
p = pvt->other_detail;

View File

@@ -1198,7 +1198,8 @@ static int __init i10nm_init(void)
d->imc[i].num_dimms = cfg->ddr_dimm_num;
}
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
rc = skx_register_mci(&d->imc[i], &d->imc[i].mdev->dev,
pci_name(d->imc[i].mdev),
"Intel_10nm Socket", EDAC_MOD_STR,
i10nm_get_dimm_config, cfg);
if (rc < 0)

View File

@@ -526,6 +526,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in
ie31200_pvt.priv[mc] = priv;
return 0;
fail_unmap:
put_device(&priv->dev);
iounmap(window);
fail_free:
edac_mc_free(mci);
@@ -598,6 +599,7 @@ static void ie31200_unregister_mcis(void)
mci = priv->mci;
edac_mc_del_mc(mci->pdev);
iounmap(priv->window);
put_device(&priv->dev);
edac_mc_free(mci);
}
}

View File

@@ -1300,6 +1300,7 @@ static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev
imc->mci = mci;
return 0;
fail3:
put_device(&imc->dev);
mci->pvt_info = NULL;
kfree(mci->ctl_name);
fail2:
@@ -1326,6 +1327,7 @@ static void igen6_unregister_mcis(void)
kfree(mci->ctl_name);
mci->pvt_info = NULL;
edac_mc_free(mci);
put_device(&imc->dev);
iounmap(imc->window);
}
}

602
drivers/edac/imh_base.c Normal file
View File

@@ -0,0 +1,602 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller.
* Copyright (c) 2025, Intel Corporation.
*/
#include <linux/kernel.h>
#include <linux/io.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/mce.h>
#include <asm/cpu.h>
#include "edac_module.h"
#include "skx_common.h"
#define IMH_REVISION "v0.0.1"
#define EDAC_MOD_STR "imh_edac"
/* Debug macros */
#define imh_printk(level, fmt, arg...) \
edac_printk(level, "imh", fmt, ##arg)
/* Configuration Agent(Ubox) */
#define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23)
#define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3)
/* PUNIT */
#define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30)
/* Memory Controller */
#define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2)
#define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15)
/* System Cache Agent(SCA) */
#define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16)
#define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16)
/* Home Agent (HA) */
#define NMCACHING(reg) GET_BITFIELD(reg, 8, 8)
/**
* struct local_reg - A register as described in the local package view.
*
* @pkg: (input) The package where the register is located.
* @pbase: (input) The IP MMIO base physical address in the local package view.
* @size: (input) The IP MMIO size.
* @offset: (input) The register offset from the IP MMIO base @pbase.
* @width: (input) The register width in byte.
* @vbase: (internal) The IP MMIO base virtual address.
* @val: (output) The register value.
*/
struct local_reg {
int pkg;
u64 pbase;
u32 size;
u32 offset;
u8 width;
void __iomem *vbase;
u64 val;
};
#define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \
struct local_reg name = { \
.pkg = package, \
.pbase = (north ? (cfg)->mmio_base_l_north : \
(cfg)->mmio_base_l_south) + \
(cfg)->ip_name##_base + \
(cfg)->ip_name##_size * (ip_idx), \
.size = (cfg)->ip_name##_size, \
.offset = (cfg)->ip_name##_reg_##reg_name##_offset, \
.width = (cfg)->ip_name##_reg_##reg_name##_width, \
}
static u64 readx(void __iomem *addr, u8 width)
{
switch (width) {
case 1:
return readb(addr);
case 2:
return readw(addr);
case 4:
return readl(addr);
case 8:
return readq(addr);
default:
imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width);
return 0;
}
}
static void __read_local_reg(void *reg)
{
struct local_reg *r = (struct local_reg *)reg;
r->val = readx(r->vbase + r->offset, r->width);
}
/* Read a local-view register. */
static bool read_local_reg(struct local_reg *reg)
{
int cpu;
/* Get the target CPU in the package @reg->pkg. */
for_each_online_cpu(cpu) {
if (reg->pkg == topology_physical_package_id(cpu))
break;
}
if (cpu >= nr_cpu_ids)
return false;
reg->vbase = ioremap(reg->pbase, reg->size);
if (!reg->vbase) {
imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase);
return false;
}
/* Get the target CPU to read the register. */
smp_call_function_single(cpu, __read_local_reg, reg, 1);
iounmap(reg->vbase);
return true;
}
/* Get the bitmap of memory controller instances in package @pkg. */
static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north)
{
DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3);
if (!read_local_reg(&reg))
return 0;
edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n",
pkg, north ? "north" : "south",
DDR_IMC_BITMAP(reg.val), reg.val);
return DDR_IMC_BITMAP(reg.val);
}
static void imc_release(struct device *dev)
{
edac_dbg(2, "imc device %s released\n", dev_name(dev));
kfree(dev);
}
static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d,
bool north, int lmc)
{
unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num;
unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north);
void __iomem *mbase;
struct device *dev;
int i, rc, pmc;
u64 base;
for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) {
base = north ? d->mmio_base_h_north : d->mmio_base_h_south;
base += cfg->ddr_imc_base + size * i;
edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n",
d->pkg, lmc, base, size);
/* Set up the imc MMIO. */
mbase = ioremap(base, size);
if (!mbase) {
imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base);
return -ENOMEM;
}
d->imc[lmc].mbase = mbase;
d->imc[lmc].lmc = lmc;
/* Create the imc device instance. */
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->release = imc_release;
device_initialize(dev);
rc = dev_set_name(dev, "0x%llx", base);
if (rc) {
imh_printk(KERN_ERR, "Failed to set dev name\n");
put_device(dev);
return rc;
}
d->imc[lmc].dev = dev;
/* Set up the imc index mapping. */
pmc = north ? i : 8 + i;
skx_set_mc_mapping(d, pmc, lmc);
lmc++;
}
return lmc;
}
static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d)
{
int lmc = __get_ddr_munits(cfg, d, true, 0);
if (lmc < 0)
return false;
lmc = __get_ddr_munits(cfg, d, false, lmc);
if (lmc <= 0)
return false;
return true;
}
static bool get_socket_id(struct res_config *cfg, struct skx_dev *d)
{
DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id);
u8 src_id;
int i;
if (!read_local_reg(&reg))
return false;
src_id = SOCKET_ID(reg.val);
edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val);
for (i = 0; i < cfg->ddr_imc_num; i++)
d->imc[i].src_id = src_id;
return true;
}
/* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */
static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm)
{
DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm);
if (!read_local_reg(&reg))
return false;
*tolm = TOLM(reg.val);
edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val);
DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm);
if (!read_local_reg(&reg2))
return false;
*tohm = TOHM(reg2.val);
edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val);
return true;
}
/* Get the system-view MMIO_BASE_H for {north,south}-IMH. */
static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list)
{
int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
struct skx_dev *d;
for (i = 0; i < n; i++) {
d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
if (!d)
return -ENOMEM;
DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base);
/* Get MMIO_BASE_H for the north-IMH. */
if (!read_local_reg(&reg) || !reg.val) {
kfree(d);
imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i);
return -ENODEV;
}
d->mmio_base_h_north = MMIO_BASE_H(reg.val);
edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n",
i, d->mmio_base_h_north, reg.val);
/* Get MMIO_BASE_H for the south-IMH (optional). */
DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base);
if (read_local_reg(&reg2)) {
d->mmio_base_h_south = MMIO_BASE_H(reg2.val);
edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n",
i, d->mmio_base_h_south, reg2.val);
}
d->pkg = i;
d->num_imc = imc_num;
skx_init_mc_mapping(d);
list_add_tail(&d->list, edac_list);
}
return 0;
}
/* Get the number of per-package memory controllers. */
static int imh_get_imc_num(struct res_config *cfg)
{
int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) +
hweight32(get_imc_bitmap(cfg, 0, false));
if (!imc_num) {
imh_printk(KERN_ERR, "Invalid mc number\n");
return -ENODEV;
}
if (cfg->ddr_imc_num != imc_num) {
/*
* Update the configuration data to reflect the number of
* present DDR memory controllers.
*/
cfg->ddr_imc_num = imc_num;
edac_dbg(2, "Set ddr mc number %d\n", imc_num);
}
return 0;
}
/* Get all memory controllers' parameters. */
static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list)
{
struct skx_imc *imc;
struct skx_dev *d;
u8 mc = 0;
int i;
list_for_each_entry(d, edac_list, list) {
if (!get_ddr_munits(cfg, d)) {
imh_printk(KERN_ERR, "No mc found\n");
return -ENODEV;
}
if (!get_socket_id(cfg, d)) {
imh_printk(KERN_ERR, "Failed to get socket id\n");
return -ENODEV;
}
for (i = 0; i < cfg->ddr_imc_num; i++) {
imc = &d->imc[i];
if (!imc->mbase)
continue;
imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz;
imc->num_channels = cfg->ddr_chan_num;
imc->num_dimms = cfg->ddr_dimm_num;
imc->mc = mc++;
}
}
return 0;
}
static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx)
{
DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode);
if (!read_local_reg(&reg))
return false;
if (!NMCACHING(reg.val))
return false;
edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx);
return true;
}
/* Check whether the system has a 2-level memory configuration. */
static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head)
{
struct skx_dev *d;
int i;
list_for_each_entry(d, head, list) {
for (i = 0; i < cfg->ddr_imc_num; i++)
if (check_2lm_enabled(cfg, d, i))
return true;
}
return false;
}
/* Helpers to read memory controller registers */
static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
{
return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width);
}
static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan)
{
return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width);
}
static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm)
{
return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset +
cfg->ddr_reg_dimmmtr_width * dimm,
cfg->ddr_reg_dimmmtr_width);
}
static bool ecc_enabled(u32 mcmtr)
{
return (bool)ECC_ENABLED(mcmtr);
}
static bool dimm_populated(u32 dimmmtr)
{
return (bool)DIMM_POPULATED(dimmmtr);
}
/* Get each DIMM's configurations of the memory controller @mci. */
static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
{
struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc;
struct dimm_info *dimm;
u32 mcmtr, dimmmtr;
int i, j, ndimms;
for (i = 0; i < imc->num_channels; i++) {
if (!imc->mbase)
continue;
mcmtr = read_imc_mcmtr(cfg, imc, i);
for (ndimms = 0, j = 0; j < imc->num_dimms; j++) {
dimmmtr = read_imc_dimmmtr(cfg, imc, i, j);
edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n",
mcmtr, dimmmtr, imc->mc, i, j);
if (!dimm_populated(dimmmtr))
continue;
dimm = edac_get_dimm(mci, i, j, 0);
ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm,
imc, i, j, cfg);
}
if (ndimms && !ecc_enabled(mcmtr)) {
imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n",
imc->mc, i);
return -ENODEV;
}
}
return 0;
}
/* Register all memory controllers to the EDAC core. */
static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list)
{
struct skx_imc *imc;
struct skx_dev *d;
int i, rc;
list_for_each_entry(d, edac_list, list) {
for (i = 0; i < cfg->ddr_imc_num; i++) {
imc = &d->imc[i];
if (!imc->mbase)
continue;
rc = skx_register_mci(imc, imc->dev,
dev_name(imc->dev),
"Intel IMH-based Socket",
EDAC_MOD_STR,
imh_get_dimm_config, cfg);
if (rc)
return rc;
}
}
return 0;
}
static struct res_config dmr_cfg = {
.type = DMR,
.support_ddr5 = true,
.mmio_base_l_north = 0xf6800000,
.mmio_base_l_south = 0xf6000000,
.ddr_chan_num = 1,
.ddr_dimm_num = 2,
.ddr_imc_base = 0x39b000,
.ddr_chan_mmio_sz = 0x8000,
.ddr_reg_mcmtr_offset = 0x360,
.ddr_reg_mcmtr_width = 4,
.ddr_reg_dimmmtr_offset = 0x370,
.ddr_reg_dimmmtr_width = 4,
.ubox_base = 0x0,
.ubox_size = 0x2000,
.ubox_reg_mmio_base_offset = 0x580,
.ubox_reg_mmio_base_width = 4,
.ubox_reg_socket_id_offset = 0x1080,
.ubox_reg_socket_id_width = 4,
.pcu_base = 0x3000,
.pcu_size = 0x10000,
.pcu_reg_capid3_offset = 0x290,
.pcu_reg_capid3_width = 4,
.sca_base = 0x24c000,
.sca_size = 0x2500,
.sca_reg_tolm_offset = 0x2100,
.sca_reg_tolm_width = 8,
.sca_reg_tohm_offset = 0x2108,
.sca_reg_tohm_width = 8,
.ha_base = 0x3eb000,
.ha_size = 0x1000,
.ha_reg_mode_offset = 0x4a0,
.ha_reg_mode_width = 4,
};
static const struct x86_cpu_id imh_cpuids[] = {
X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg),
{}
};
MODULE_DEVICE_TABLE(x86cpu, imh_cpuids);
static struct notifier_block imh_mce_dec = {
.notifier_call = skx_mce_check_error,
.priority = MCE_PRIO_EDAC,
};
static int __init imh_init(void)
{
const struct x86_cpu_id *id;
struct list_head *edac_list;
struct res_config *cfg;
const char *owner;
u64 tolm, tohm;
int rc;
edac_dbg(2, "\n");
if (ghes_get_devices())
return -EBUSY;
owner = edac_get_owner();
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return -ENODEV;
id = x86_match_cpu(imh_cpuids);
if (!id)
return -ENODEV;
cfg = (struct res_config *)id->driver_data;
skx_set_res_cfg(cfg);
if (!imh_get_tolm_tohm(cfg, &tolm, &tohm))
return -ENODEV;
skx_set_hi_lo(tolm, tohm);
rc = imh_get_imc_num(cfg);
if (rc < 0)
goto fail;
edac_list = skx_get_edac_list();
rc = imh_get_all_mmio_base_h(cfg, edac_list);
if (rc)
goto fail;
rc = imh_get_munits(cfg, edac_list);
if (rc)
goto fail;
skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list));
rc = imh_register_mci(cfg, edac_list);
if (rc)
goto fail;
rc = skx_adxl_get();
if (rc)
goto fail;
opstate_init();
mce_register_decode_chain(&imh_mce_dec);
skx_setup_debug("imh_test");
imh_printk(KERN_INFO, "%s\n", IMH_REVISION);
return 0;
fail:
skx_remove();
return rc;
}
static void __exit imh_exit(void)
{
edac_dbg(2, "\n");
skx_teardown_debug();
mce_unregister_decode_chain(&imh_mce_dec);
skx_adxl_put();
skx_remove();
}
module_init(imh_init);
module_exit(imh_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Qiuxu Zhuo");
MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller");

View File

@@ -662,8 +662,8 @@ static int __init skx_init(void)
d->imc[i].src_id = src_id;
d->imc[i].num_channels = cfg->ddr_chan_num;
d->imc[i].num_dimms = cfg->ddr_dimm_num;
rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
rc = skx_register_mci(&d->imc[i], &d->imc[i].chan[0].cdev->dev,
pci_name(d->imc[i].chan[0].cdev),
"Skylake Socket", EDAC_MOD_STR,
skx_get_dimm_config, cfg);
if (rc < 0)

View File

@@ -124,7 +124,7 @@ void skx_adxl_put(void)
}
EXPORT_SYMBOL_GPL(skx_adxl_put);
static void skx_init_mc_mapping(struct skx_dev *d)
void skx_init_mc_mapping(struct skx_dev *d)
{
/*
* By default, the BIOS presents all memory controllers within each
@@ -135,6 +135,7 @@ static void skx_init_mc_mapping(struct skx_dev *d)
for (int i = 0; i < d->num_imc; i++)
d->imc[i].mc_mapping = i;
}
EXPORT_SYMBOL_GPL(skx_init_mc_mapping);
void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
{
@@ -384,6 +385,12 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
}
EXPORT_SYMBOL_GPL(skx_get_all_bus_mappings);
struct list_head *skx_get_edac_list(void)
{
return &dev_edac_list;
}
EXPORT_SYMBOL_GPL(skx_get_edac_list);
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
{
struct pci_dev *pdev;
@@ -424,6 +431,13 @@ fail:
}
EXPORT_SYMBOL_GPL(skx_get_hi_lo);
void skx_set_hi_lo(u64 tolm, u64 tohm)
{
skx_tolm = tolm;
skx_tohm = tohm;
}
EXPORT_SYMBOL_GPL(skx_set_hi_lo);
static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
int minval, int maxval, const char *name)
{
@@ -437,7 +451,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
}
#define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 7, "rows")
#define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
@@ -545,9 +559,9 @@ unknown_size:
}
EXPORT_SYMBOL_GPL(skx_get_nvdimm_info);
int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
const char *ctl_name, const char *mod_str,
get_dimm_config_f get_dimm_config,
int skx_register_mci(struct skx_imc *imc, struct device *dev,
const char *dev_name, const char *ctl_name,
const char *mod_str, get_dimm_config_f get_dimm_config,
struct res_config *cfg)
{
struct mem_ctl_info *mci;
@@ -588,7 +602,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = mod_str;
mci->dev_name = pci_name(pdev);
mci->dev_name = dev_name;
mci->ctl_page_to_phys = NULL;
rc = get_dimm_config(mci, cfg);
@@ -596,7 +610,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
goto fail;
/* Record ptr to the generic device */
mci->pdev = &pdev->dev;
mci->pdev = dev;
/* Add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
@@ -810,6 +824,9 @@ void skx_remove(void)
if (d->imc[i].mbase)
iounmap(d->imc[i].mbase);
if (d->imc[i].dev)
put_device(d->imc[i].dev);
for (j = 0; j < d->imc[i].num_channels; j++) {
if (d->imc[i].chan[j].cdev)
pci_dev_put(d->imc[i].chan[j].cdev);
@@ -833,7 +850,7 @@ EXPORT_SYMBOL_GPL(skx_remove);
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/{skx,i10nm}_test/addr.
* /sys/kernel/debug/edac/{skx,i10nm,imh}_test/addr.
*/
static struct dentry *skx_test;

View File

@@ -121,20 +121,33 @@ struct reg_rrl {
* memory controllers on the die.
*/
struct skx_dev {
struct list_head list;
/* {skx,i10nm}_edac */
u8 bus[4];
int seg;
struct pci_dev *sad_all;
struct pci_dev *util_all;
struct pci_dev *uracu; /* for i10nm CPU */
struct pci_dev *pcu_cr3; /* for HBM memory detection */
struct pci_dev *uracu;
struct pci_dev *pcu_cr3;
u32 mcroute;
/* imh_edac */
/* System-view MMIO base physical addresses. */
u64 mmio_base_h_north;
u64 mmio_base_h_south;
int pkg;
int num_imc;
struct list_head list;
struct skx_imc {
/* i10nm_edac */
struct pci_dev *mdev;
/* imh_edac */
struct device *dev;
struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */
void __iomem *mbase; /* for i10nm CPU */
int chan_mmio_sz; /* for i10nm CPU */
void __iomem *mbase;
int chan_mmio_sz;
int num_channels; /* channels per memory controller */
int num_dimms; /* dimms per channel */
bool hbm_mc;
@@ -178,7 +191,8 @@ enum type {
SKX,
I10NM,
SPR,
GNR
GNR,
DMR,
};
enum {
@@ -237,10 +251,6 @@ struct pci_bdf {
struct res_config {
enum type type;
/* Configuration agent device ID */
unsigned int decs_did;
/* Default bus number configuration register offset */
int busno_cfg_offset;
/* DDR memory controllers per socket */
int ddr_imc_num;
/* DDR channels per DDR memory controller */
@@ -258,23 +268,57 @@ struct res_config {
/* Per HBM channel memory-mapped I/O size */
int hbm_chan_mmio_sz;
bool support_ddr5;
/* SAD device BDF */
struct pci_bdf sad_all_bdf;
/* PCU device BDF */
struct pci_bdf pcu_cr3_bdf;
/* UTIL device BDF */
struct pci_bdf util_all_bdf;
/* URACU device BDF */
struct pci_bdf uracu_bdf;
/* DDR mdev device BDF */
struct pci_bdf ddr_mdev_bdf;
/* HBM mdev device BDF */
struct pci_bdf hbm_mdev_bdf;
int sad_all_offset;
/* RRL register sets per DDR channel */
struct reg_rrl *reg_rrl_ddr;
/* RRL register sets per HBM channel */
struct reg_rrl *reg_rrl_hbm[2];
union {
/* {skx,i10nm}_edac */
struct {
/* Configuration agent device ID */
unsigned int decs_did;
/* Default bus number configuration register offset */
int busno_cfg_offset;
struct pci_bdf sad_all_bdf;
struct pci_bdf pcu_cr3_bdf;
struct pci_bdf util_all_bdf;
struct pci_bdf uracu_bdf;
struct pci_bdf ddr_mdev_bdf;
struct pci_bdf hbm_mdev_bdf;
int sad_all_offset;
};
/* imh_edac */
struct {
/* MMIO base physical address in local package view */
u64 mmio_base_l_north;
u64 mmio_base_l_south;
u64 ddr_imc_base;
u64 ddr_reg_mcmtr_offset;
u8 ddr_reg_mcmtr_width;
u64 ddr_reg_dimmmtr_offset;
u8 ddr_reg_dimmmtr_width;
u64 ubox_base;
u32 ubox_size;
u32 ubox_reg_mmio_base_offset;
u8 ubox_reg_mmio_base_width;
u32 ubox_reg_socket_id_offset;
u8 ubox_reg_socket_id_width;
u64 pcu_base;
u32 pcu_size;
u32 pcu_reg_capid3_offset;
u8 pcu_reg_capid3_width;
u64 sca_base;
u32 sca_size;
u32 sca_reg_tolm_offset;
u8 sca_reg_tolm_width;
u32 sca_reg_tohm_offset;
u8 sca_reg_tohm_width;
u64 ha_base;
u32 ha_size;
u32 ha_reg_mode_offset;
u8 ha_reg_mode_width;
};
};
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
@@ -287,13 +331,17 @@ void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
void skx_set_res_cfg(struct res_config *cfg);
void skx_init_mc_mapping(struct skx_dev *d);
void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
struct list_head *skx_get_edac_list(void);
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
void skx_set_hi_lo(u64 tolm, u64 tohm);
int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno,
@@ -302,7 +350,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno, const char *mod_str);
int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
int skx_register_mci(struct skx_imc *imc, struct device *dev, const char *dev_name,
const char *ctl_name, const char *mod_str,
get_dimm_config_f get_dimm_config,
struct res_config *cfg);