Patchwork [v3,3/6] EDAC/amd64: Support more than two Unified Memory Controllers

login
register
mail settings
Submitter Yazen Ghannam
Date Feb. 28, 2019, 3:36 p.m.
Message ID <20190228153558.127292-3-Yazen.Ghannam@amd.com>
Download mbox | patch
Permalink /patch/738291/
State New
Headers show

Comments

Yazen Ghannam - Feb. 28, 2019, 3:36 p.m.
From: Yazen Ghannam <yazen.ghannam@amd.com>

The first few models of Family 17h all had 2 Unified Memory Controllers
per Die, so this was treated as a fixed value. However, future systems
may have more Unified Memory Controllers per Die.

Related to this, the channel number and base address of a Unified Memory
Controller were found by matching on fixed, known values. However,
current and future systems follow this pattern for the channel number
and base address of a Unified Memory Controller: 0xYXXXXX, where Y is
the channel number. So matching on hardcoded values is not necessary.

Set the number of Unified Memory Controllers at driver init time based
on the Family/Model. Also, update the functions that find the channel
number and base address of a Unified Memory Controller to support more
than two.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
Link:
https://lkml.kernel.org/r/20190226172532.12924-2-Yazen.Ghannam@amd.com

v2->v3:
* Apply V2 Patch 3 before V2 Patch 2.

v1->v2:
* Fix tone in commit message.
* Clarify pattern used for finding channel numbers.
* Remove macro for looping over number of UMCs.
* Move function to find number of UMCs to single driver init function.
* Rename function that finds the number of UMCs.
* Add comments for new variables and functions.
* Move function to find number of UMCs out of header.

 drivers/edac/amd64_edac.c | 44 ++++++++++++++++++++++++---------------
 drivers/edac/amd64_edac.h | 10 ++++++---
 2 files changed, 34 insertions(+), 20 deletions(-)
Borislav Petkov - March 23, 2019, 12:15 p.m.
On Thu, Feb 28, 2019 at 03:36:10PM +0000, Ghannam, Yazen wrote:
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index de8dbb0b42b5..40e63cea2d81 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -274,7 +274,11 @@
>  
>  #define UMC_SDP_INIT			BIT(31)
>  
> -#define NUM_UMCS			2
> +/*
> + * Number of Unified Memory Controllers
> + * Set during driver init based on family/model.
> + */
> +static u8 num_umcs;

Please don't do silly things like that:

In file included from drivers/edac/amd64_edac_inj.c:2:0:
drivers/edac/amd64_edac.h:279:11: warning: ‘num_umcs’ defined but not used [-Wunused-variable]
 static u8 num_umcs;
           ^~~~~~~~

I fixed it up now.
Yazen Ghannam - March 23, 2019, 5:22 p.m.
> -----Original Message-----

> From: linux-edac-owner@vger.kernel.org <linux-edac-owner@vger.kernel.org> On Behalf Of Borislav Petkov

> Sent: Saturday, March 23, 2019 7:16 AM

> To: Ghannam, Yazen <Yazen.Ghannam@amd.com>

> Cc: linux-edac@vger.kernel.org; linux-kernel@vger.kernel.org

> Subject: Re: [PATCH v3 3/6] EDAC/amd64: Support more than two Unified Memory Controllers

> 

> On Thu, Feb 28, 2019 at 03:36:10PM +0000, Ghannam, Yazen wrote:

> > diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h

> > index de8dbb0b42b5..40e63cea2d81 100644

> > --- a/drivers/edac/amd64_edac.h

> > +++ b/drivers/edac/amd64_edac.h

> > @@ -274,7 +274,11 @@

> >

> >  #define UMC_SDP_INIT			BIT(31)

> >

> > -#define NUM_UMCS			2

> > +/*

> > + * Number of Unified Memory Controllers

> > + * Set during driver init based on family/model.

> > + */

> > +static u8 num_umcs;

> 

> Please don't do silly things like that:

> 

> In file included from drivers/edac/amd64_edac_inj.c:2:0:

> drivers/edac/amd64_edac.h:279:11: warning: ‘num_umcs’ defined but not used [-Wunused-variable]

>  static u8 num_umcs;

>            ^~~~~~~~

> 

> I fixed it up now.

> 


Thanks. Sorry I missed that.

-Yazen

Patch

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e4fd459d807a..25416c608908 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -450,7 +450,7 @@  static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
 
 #define for_each_umc(i) \
-	for (i = 0; i < NUM_UMCS; i++)
+	for (i = 0; i < num_umcs; i++)
 
 /*
  * @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -2476,18 +2476,14 @@  static inline void decode_bus_error(int node_id, struct mce *m)
  * To find the UMC channel represented by this bank we need to match on its
  * instance_id. The instance_id of a bank is held in the lower 32 bits of its
  * IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th nibble in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
  */
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
 {
-	u32 umc_instance_id[] = {0x50f00, 0x150f00};
-	u32 instance_id = m->ipid & GENMASK(31, 0);
-	int i, channel = -1;
-
-	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
-		if (umc_instance_id[i] == instance_id)
-			channel = i;
-
-	return channel;
+	return (m->ipid & GENMASK(31, 0)) >> 20;
 }
 
 static void decode_umc_error(int node_id, struct mce *m)
@@ -2509,11 +2505,7 @@  static void decode_umc_error(int node_id, struct mce *m)
 	if (m->status & MCI_STATUS_DEFERRED)
 		ecc_type = 3;
 
-	err.channel = find_umc_channel(pvt, m);
-	if (err.channel < 0) {
-		err.err_code = ERR_CHANNEL;
-		goto log_error;
-	}
+	err.channel = find_umc_channel(m);
 
 	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
 		err.err_code = ERR_NORM_ADDR;
@@ -3252,6 +3244,22 @@  static const struct attribute_group *amd64_edac_attr_groups[] = {
 	NULL
 };
 
+/* Set the number of Unified Memory Controllers in the system. */
+static void compute_num_umcs(void)
+{
+	u8 model = boot_cpu_data.x86_model;
+
+	if (boot_cpu_data.x86 < 0x17)
+		return;
+
+	if (model >= 0x30 && model <= 0x3f)
+		num_umcs = 8;
+	else
+		num_umcs = 2;
+
+	edac_dbg(1, "Number of UMCs: %x", num_umcs);
+}
+
 static int init_one_instance(unsigned int nid)
 {
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
@@ -3276,7 +3284,7 @@  static int init_one_instance(unsigned int nid)
 		goto err_free;
 
 	if (pvt->fam >= 0x17) {
-		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
 		if (!pvt->umc) {
 			ret = -ENOMEM;
 			goto err_free;
@@ -3497,6 +3505,8 @@  static int __init amd64_edac_init(void)
 	if (!msrs)
 		goto err_free;
 
+	compute_num_umcs();
+
 	for (i = 0; i < amd_nb_num(); i++) {
 		err = probe_one_instance(i);
 		if (err) {
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index de8dbb0b42b5..40e63cea2d81 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -274,7 +274,11 @@ 
 
 #define UMC_SDP_INIT			BIT(31)
 
-#define NUM_UMCS			2
+/*
+ * Number of Unified Memory Controllers
+ * Set during driver init based on family/model.
+ */
+static u8 num_umcs;
 
 enum amd_families {
 	K8_CPUS = 0,
@@ -399,8 +403,8 @@  struct err_info {
 
 static inline u32 get_umc_base(u8 channel)
 {
-	/* ch0: 0x50000, ch1: 0x150000 */
-	return 0x50000 + (!!channel << 20);
+	/* chY: 0xY50000 */
+	return 0x50000 + (channel << 20);
 }
 
 static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)