Patchwork [4/4] EDAC, i10nm_edac: Add EDAC driver for Intel 10nm server processors

login
register
mail settings
Submitter Luck, Tony
Date Jan. 25, 2019, 7:59 p.m.
Message ID <20190125195902.17109-5-tony.luck@intel.com>
Download mbox | patch
Permalink /patch/710123/
State New
Headers show

Comments

Luck, Tony - Jan. 25, 2019, 7:59 p.m.
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

This driver supports Intel 10nm series server integrated memory
controller. It gets the memory capacity and topology information by
reading the registers in PCI configuration space and memory-map I/O.
It decodes the memory error address to the platform specific address
by using the ACPI Address Translation (ADXL) Device Specific Method
(DSM).

Uses skx_comm_edac.c for most of the work.

Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/edac/Kconfig      |  12 ++
 drivers/edac/Makefile     |   3 +
 drivers/edac/i10nm_base.c | 279 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 294 insertions(+)
 create mode 100644 drivers/edac/i10nm_base.c

Patch

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e286b5b99003..2239b3409d41 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -241,6 +241,18 @@  config EDAC_SKX
 	  system has non-volatile DIMMs you should also manually
 	  select CONFIG_ACPI_NFIT.
 
+config EDAC_I10NM
+	tristate "Intel 10nm server Integrated MC"
+	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
+	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y
+	select DMI
+	select ACPI_ADXL if ACPI
+	help
+	  Support for error detection and correction the Intel
+	  10nm server Integrated Memory Controllers. If your
+	  system has non-volatile DIMMs you should also manually
+	  select CONFIG_ACPI_NFIT.
+
 config EDAC_PND2
 	tristate "Intel Pondicherry2"
 	depends on PCI && X86_64 && X86_MCE_INTEL
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 1764b7ed05db..e15115e02036 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -60,6 +60,9 @@  obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o
 skx_edac-y				:= skx_comm_edac.o skx_base.o
 obj-$(CONFIG_EDAC_SKX)			+= skx_edac.o
 
+i10nm_edac-y				:= skx_comm_edac.o i10nm_base.o
+obj-$(CONFIG_EDAC_I10NM)		+= i10nm_edac.o
+
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
new file mode 100644
index 000000000000..74cb80f9fc88
--- /dev/null
+++ b/drivers/edac/i10nm_base.c
@@ -0,0 +1,279 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Intel(R) 10nm server memory controller.
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/acpi.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/mce.h>
+#include "edac_module.h"
+#include "skx_comm_edac.h"
+
+#define I10NM_REVISION	"v0.0.3"
+#define EDAC_MOD_STR	"i10nm_edac"
+
+/* Debug macros */
+#define i10nm_printk(level, fmt, arg...)	\
+	edac_printk(level, "i10nm", fmt, ##arg)
+
+#define I10NM_GET_SCK_BAR(d, reg)		\
+	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
+#define I10NM_GET_IMC_BAR(d, i, reg)	\
+	pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_DIMMMTR(m, i, j)	\
+	(*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4))
+#define I10NM_GET_MCDDRTCFG(m, i, j)	\
+	(*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4))
+
+#define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
+#define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
+#define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
+					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
+
+static struct list_head *i10nm_edac_list;
+
+static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
+					   unsigned int dev, unsigned int fun)
+{
+	struct pci_dev *pdev;
+
+	pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
+	if (!pdev) {
+		edac_dbg(2, "No device %02x:%02x.%x\n",
+			 bus, dev, fun);
+		return NULL;
+	}
+
+	if (unlikely(pci_enable_device(pdev) < 0)) {
+		edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
+			 bus, dev, fun);
+		return NULL;
+	}
+
+	pci_dev_get(pdev);
+
+	return pdev;
+}
+
+static int i10nm_get_all_munits(void)
+{
+	struct pci_dev *mdev;
+	void __iomem *mbase;
+	unsigned long size;
+	struct skx_dev *d;
+	int i, j = 0;
+	u32 reg, off;
+	u64 base;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
+		if (!d->util_all)
+			return -ENODEV;
+
+		d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
+		if (!d->uracu)
+			return -ENODEV;
+
+		if (I10NM_GET_SCK_BAR(d, reg)) {
+			i10nm_printk(KERN_ERR, "Failed to socket bar\n");
+			return -ENODEV;
+		}
+
+		base = I10NM_GET_SCK_MMIO_BASE(reg);
+		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
+			 j++, base, reg);
+
+		for (i = 0; i < I10NM_NUM_IMC; i++) {
+			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+						   12 + i, 0);
+			if (i == 0 && !mdev) {
+				i10nm_printk(KERN_ERR, "No IMC found\n");
+				return -ENODEV;
+			}
+			if (!mdev)
+				continue;
+
+			d->imc[i].mdev = mdev;
+
+			if (I10NM_GET_IMC_BAR(d, i, reg)) {
+				i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
+				return -ENODEV;
+			}
+
+			off  = I10NM_GET_IMC_MMIO_OFFSET(reg);
+			size = I10NM_GET_IMC_MMIO_SIZE(reg);
+			edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
+				 i, base + off, size, reg);
+
+			mbase = ioremap(base + off, size);
+			if (!mbase) {
+				i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
+					     base + off);
+				return -ENODEV;
+			}
+
+			d->imc[i].mbase = mbase;
+		}
+	}
+
+	return 0;
+}
+
+static const struct x86_cpu_id i10nm_cpuids[] = {
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
+
+static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
+{
+	u32 mcmtr;
+
+	mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000);
+	edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
+
+	return !!GET_BITFIELD(mcmtr, 2, 2);
+}
+
+static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
+{
+	struct skx_pvt *pvt = mci->pvt_info;
+	struct skx_imc *imc = pvt->imc;
+	struct dimm_info *dimm;
+	u32 mtr, mcddrtcfg;
+	int i, j, ndimms;
+
+	for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+		if (!imc->mbase)
+			continue;
+
+		ndimms = 0;
+		for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+					     mci->n_layers, i, j, 0);
+			mtr = I10NM_GET_DIMMMTR(imc, i, j);
+			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
+			edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
+				 mtr, mcddrtcfg, imc->mc, i, j);
+
+			if (IS_DIMM_PRESENT(mtr))
+				ndimms += skx_get_dimm_info(mtr, 0, dimm,
+							    imc, i, j);
+			else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
+				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
+							      EDAC_MOD_STR);
+		}
+		if (ndimms && !i10nm_check_ecc(imc, 0)) {
+			i10nm_printk(KERN_ERR, "ECC is disabled on imc %d\n",
+				     imc->mc);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static struct notifier_block i10nm_mce_dec = {
+	.notifier_call	= skx_mce_check_error,
+	.priority	= MCE_PRIO_EDAC,
+};
+
+static int __init i10nm_init(void)
+{
+	u8 mc = 0, src_id = 0, node_id = 0;
+	const struct x86_cpu_id *id;
+	const char *owner;
+	struct skx_dev *d;
+	int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
+	u64 tolm, tohm;
+
+	edac_dbg(2, "\n");
+
+	owner = edac_get_owner();
+	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
+		return -EBUSY;
+
+	id = x86_match_cpu(i10nm_cpuids);
+	if (!id)
+		return -ENODEV;
+
+	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
+	if (rc)
+		return rc;
+
+	rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list);
+	if (rc < 0)
+		goto fail;
+	if (rc == 0) {
+		i10nm_printk(KERN_ERR, "No memory controllers found\n");
+		return -ENODEV;
+	}
+
+	rc = i10nm_get_all_munits();
+	if (rc < 0)
+		goto fail;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		rc = skx_get_src_id(d, &src_id);
+		if (rc < 0)
+			goto fail;
+
+		rc = skx_get_node_id(d, &node_id);
+		if (rc < 0)
+			goto fail;
+
+		edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+		for (i = 0; i < I10NM_NUM_IMC; i++) {
+			if (!d->imc[i].mdev)
+				continue;
+
+			d->imc[i].mc  = mc++;
+			d->imc[i].lmc = i;
+			d->imc[i].src_id  = src_id;
+			d->imc[i].node_id = node_id;
+
+			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
+					      "Intel_10nm Socket", EDAC_MOD_STR,
+					      i10nm_get_dimm_config);
+			if (rc < 0)
+				goto fail;
+		}
+	}
+
+	rc = skx_adxl_get();
+	if (rc)
+		goto fail;
+
+	opstate_init();
+	mce_register_decode_chain(&i10nm_mce_dec);
+	setup_skx_debug("i10nm_test");
+
+	i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
+
+	return 0;
+fail:
+	skx_remove();
+	return rc;
+}
+
+static void __exit i10nm_exit(void)
+{
+	edac_dbg(2, "\n");
+	teardown_skx_debug();
+	mce_unregister_decode_chain(&i10nm_mce_dec);
+	skx_adxl_put();
+	skx_remove();
+}
+
+module_init(i10nm_init);
+module_exit(i10nm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");