Patchwork [v8,0/3] x86/boot/KASLR: Parse ACPI table and limit kaslr in immovable memory

login
register
mail settings
Submitter Masayoshi Mizuma
Date Oct. 22, 2018, 3:42 p.m.
Message ID <20181022154204.kagmdb55jtoez4ca@gabell>
Download mbox | patch
Permalink /patch/641655/
State New
Headers show

Comments

Masayoshi Mizuma - Oct. 22, 2018, 3:42 p.m.
Hi Boris,

On Tue, Oct 16, 2018 at 09:59:02PM +0200, Borislav Petkov wrote:
> On Tue, Oct 16, 2018 at 03:54:30PM -0400, Masayoshi Mizuma wrote:
> > Ah, sorry, I misunderstood your suggetion...
> > In parse_setup_data(), the data is picked up from boot_params,
> 
> Yes, this is the pointer to the setup_data linked list head. See
> 
> Documentation/ABI/testing/sysfs-kernel-boot_params
> Documentation/x86/boot.txt
> 
> for more information and basically grep the tree for examples.

I'm trying to store the SRAT info and pass it to kernel_randomize_memory(),
looks like add_e820ext()/parse_setup_data().

Is the approach useful only EFI environment? I'm not sure how I
allocate memory to store the data in legacy bios environment...
On EFI, I can use efi_call_early(allocate_pool, EFI_LOADER_DATA, ...).

Am I missing something? I would appreciate if you could help my
understanding.

Following patch is a prototype for EFI enviromnent.
---
 arch/x86/boot/compressed/acpitb.c     | 23 ++++++++++-
 arch/x86/boot/compressed/eboot.c      | 36 +++++++++++++++++
 arch/x86/include/uapi/asm/bootparam.h |  1 +
 arch/x86/mm/kaslr.c                   | 58 ++++++++++++++++++++++++++-
 4 files changed, 116 insertions(+), 2 deletions(-)

--

Thanks!
Masa
Chao Fan - Oct. 23, 2018, 2:48 a.m.
On Mon, Oct 22, 2018 at 11:42:05AM -0400, Masayoshi Mizuma wrote:
>Hi Boris,

Hi Mizuma-san,

I have several questions:

>+static void store_possible_addr(unsigned long long possible)
>+{
>+	struct setup_data *data;
>+
>+	data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
I suggest you add check:

	if (!data) {
		debug_putstr("No setup_data found.\n");
		return;
	}

>+	while (data) {
>+		if (data->type == SETUP_KASLR) {
>+			*(unsigned long long *)data->data = possible;
>+			return;
>+		}
>+		data = (struct setup_data *)(unsigned long)data->next;
>+	}
>+}
>+
> /*
>  * According to ACPI table, filter the immvoable memory regions
>  * and store them in immovable_mem[].
>@@ -319,6 +333,7 @@ void get_immovable_mem(void)
> 	struct acpi_subtable_header *table;
> 	struct acpi_srat_mem_affinity *ma;
> 	unsigned long table_end;
>+	unsigned long long possible_addr, max_possible_addr = 0;
> 	int i = 0;
>
> 	if (!cmdline_find_option_bool("movable_node") ||
>@@ -338,7 +353,12 @@ void get_immovable_mem(void)
> 		       sizeof(struct acpi_subtable_header) < table_end) {
> 		if (table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
> 			ma = (struct acpi_srat_mem_affinity *)table;
>-			if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
>+
>+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
>+				possible_addr = ma->base_address + ma->length;
>+				if (possible_addr > max_possible_addr)
>+					max_possible_addr = possible_addr;
>+			} else {
> 				immovable_mem[i].start = ma->base_address;
> 				immovable_mem[i].size = ma->length;
> 				i++;
>@@ -351,4 +371,5 @@ void get_immovable_mem(void)
> 			((unsigned long)table + table->length);
> 	}
> 	num_immovable_mem = i;
>+	store_possible_addr(max_possible_addr);
> }
>diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
>index 1458b17..9b95fba 100644
>--- a/arch/x86/boot/compressed/eboot.c
>+++ b/arch/x86/boot/compressed/eboot.c
>@@ -192,6 +192,40 @@ static void setup_efi_pci(struct boot_params *params)
> 	efi_call_early(free_pool, pci_handle);
> }
>
>+#ifdef CONFIG_RANDOMIZE_MEMORY
>+static void setup_kaslr(struct boot_params *params)
>+{
>+	struct setup_data *kaslr_data = NULL;
>+	struct setup_data *data;
>+	unsigned long size;
>+	efi_status_t status;
>+
>+	size = sizeof(struct setup_data) + sizeof(unsigned long long);
>+
>+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
>+			size, (void **)&kaslr_data);
>+	if (status != EFI_SUCCESS) {
>+		efi_printk(sys_table, "Failed to allocate memory for 'kaslr_data'\n");
>+		return;
>+	}
>+
>+	kaslr_data->type = SETUP_KASLR;
>+	kaslr_data->next = 0;
>+	kaslr_data->len = size;
>+
>+	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
>+	if (data)
>+		data->next = (unsigned long)kaslr_data;
Why just put the kaslr_data in data->next. You can't make sure
data->next was NULL.
>+	else {
If data is NULL, go to this else{}, so these two lines below work?
>+		while (data->next)
>+			data = (struct setup_data *)(unsigned long)data->next;
>+		data->next = (unsigned long)kaslr_data;
>+	}
If my understanding is not wrong, it should be:

	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
	if (!data)
		params->hdr.setup_data = (unsigned long)kaslr_data;
	else {
		while (data->next)
			data = (struct setup_data *)(unsigned long)data->next;
		data->next = (unsigned long)kaslr_data;
	}

If I misunderstand something, please tell me.

Thanks,
Chao Fan
Masayoshi Mizuma - Oct. 24, 2018, 7:21 p.m.
On Tue, Oct 23, 2018 at 10:48:02AM +0800, Chao Fan wrote:
> On Mon, Oct 22, 2018 at 11:42:05AM -0400, Masayoshi Mizuma wrote:
> >Hi Boris,
> 
> Hi Mizuma-san,
> 
> I have several questions:

Thank you for your comments! I think your suggestions are
right.
However, the prototype patch works EFI environment only.
The memory hot-plug affinity in SRAT and KASLR are also available
on legacy BIOS environment, so I need to get the patch useful
for legacy BIOS as well, but I have no idea to add such things...
If you have ideas, could you let me know?

Probably I should have another idea, for example,
add the SRAT parsing code, looks like you are adding to
arch/x86/boot/compressed/acpitb.c, to arch/x86/mm/kaslr.c.

Thanks,
Masa

> 
> >+static void store_possible_addr(unsigned long long possible)
> >+{
> >+	struct setup_data *data;
> >+
> >+	data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
> I suggest you add check:
> 
> 	if (!data) {
> 		debug_putstr("No setup_data found.\n");
> 		return;
> 	}
> 
> >+	while (data) {
> >+		if (data->type == SETUP_KASLR) {
> >+			*(unsigned long long *)data->data = possible;
> >+			return;
> >+		}
> >+		data = (struct setup_data *)(unsigned long)data->next;
> >+	}
> >+}
> >+
> > /*
> >  * According to ACPI table, filter the immvoable memory regions
> >  * and store them in immovable_mem[].
> >@@ -319,6 +333,7 @@ void get_immovable_mem(void)
> > 	struct acpi_subtable_header *table;
> > 	struct acpi_srat_mem_affinity *ma;
> > 	unsigned long table_end;
> >+	unsigned long long possible_addr, max_possible_addr = 0;
> > 	int i = 0;
> >
> > 	if (!cmdline_find_option_bool("movable_node") ||
> >@@ -338,7 +353,12 @@ void get_immovable_mem(void)
> > 		       sizeof(struct acpi_subtable_header) < table_end) {
> > 		if (table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
> > 			ma = (struct acpi_srat_mem_affinity *)table;
> >-			if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
> >+
> >+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> >+				possible_addr = ma->base_address + ma->length;
> >+				if (possible_addr > max_possible_addr)
> >+					max_possible_addr = possible_addr;
> >+			} else {
> > 				immovable_mem[i].start = ma->base_address;
> > 				immovable_mem[i].size = ma->length;
> > 				i++;
> >@@ -351,4 +371,5 @@ void get_immovable_mem(void)
> > 			((unsigned long)table + table->length);
> > 	}
> > 	num_immovable_mem = i;
> >+	store_possible_addr(max_possible_addr);
> > }
> >diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
> >index 1458b17..9b95fba 100644
> >--- a/arch/x86/boot/compressed/eboot.c
> >+++ b/arch/x86/boot/compressed/eboot.c
> >@@ -192,6 +192,40 @@ static void setup_efi_pci(struct boot_params *params)
> > 	efi_call_early(free_pool, pci_handle);
> > }
> >
> >+#ifdef CONFIG_RANDOMIZE_MEMORY
> >+static void setup_kaslr(struct boot_params *params)
> >+{
> >+	struct setup_data *kaslr_data = NULL;
> >+	struct setup_data *data;
> >+	unsigned long size;
> >+	efi_status_t status;
> >+
> >+	size = sizeof(struct setup_data) + sizeof(unsigned long long);
> >+
> >+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
> >+			size, (void **)&kaslr_data);
> >+	if (status != EFI_SUCCESS) {
> >+		efi_printk(sys_table, "Failed to allocate memory for 'kaslr_data'\n");
> >+		return;
> >+	}
> >+
> >+	kaslr_data->type = SETUP_KASLR;
> >+	kaslr_data->next = 0;
> >+	kaslr_data->len = size;
> >+
> >+	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
> >+	if (data)
> >+		data->next = (unsigned long)kaslr_data;
> Why just put the kaslr_data in data->next. You can't make sure
> data->next was NULL.
> >+	else {
> If data is NULL, go to this else{}, so these two lines below work?
> >+		while (data->next)
> >+			data = (struct setup_data *)(unsigned long)data->next;
> >+		data->next = (unsigned long)kaslr_data;
> >+	}
> If my understanding is not wrong, it should be:
> 
> 	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
> 	if (!data)
> 		params->hdr.setup_data = (unsigned long)kaslr_data;
> 	else {
> 		while (data->next)
> 			data = (struct setup_data *)(unsigned long)data->next;
> 		data->next = (unsigned long)kaslr_data;
> 	}
> 
> If I misunderstand something, please tell me.
> 
> Thanks,
> Chao Fan
> 
>
Chao Fan - Oct. 25, 2018, 1:22 a.m.
On Wed, Oct 24, 2018 at 03:21:36PM -0400, Masayoshi Mizuma wrote:
>On Tue, Oct 23, 2018 at 10:48:02AM +0800, Chao Fan wrote:
>> On Mon, Oct 22, 2018 at 11:42:05AM -0400, Masayoshi Mizuma wrote:
>> >Hi Boris,
>> 
>> Hi Mizuma-san,
>> 
>> I have several questions:
>
>Thank you for your comments! I think your suggestions are
>right.
>However, the prototype patch works EFI environment only.

Yes, I agree. But I think this method is much better than
adding code to arch/x86/mm/kaslr.c.

>The memory hot-plug affinity in SRAT and KASLR are also available
>on legacy BIOS environment, so I need to get the patch useful
>for legacy BIOS as well, but I have no idea to add such things...
>If you have ideas, could you let me know?

I have no idea. I will work on it, try to help figure out the BIOS issue.

Thanks,
Chao Fan

>
>Probably I should have another idea, for example,
>add the SRAT parsing code, looks like you are adding to
>arch/x86/boot/compressed/acpitb.c, to arch/x86/mm/kaslr.c.
>
>Thanks,
>Masa
>
>> 
>> >+static void store_possible_addr(unsigned long long possible)
>> >+{
>> >+	struct setup_data *data;
>> >+
>> >+	data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
>> I suggest you add check:
>> 
>> 	if (!data) {
>> 		debug_putstr("No setup_data found.\n");
>> 		return;
>> 	}
>> 
>> >+	while (data) {
>> >+		if (data->type == SETUP_KASLR) {
>> >+			*(unsigned long long *)data->data = possible;
>> >+			return;
>> >+		}
>> >+		data = (struct setup_data *)(unsigned long)data->next;
>> >+	}
>> >+}
>> >+
>> > /*
>> >  * According to ACPI table, filter the immvoable memory regions
>> >  * and store them in immovable_mem[].
>> >@@ -319,6 +333,7 @@ void get_immovable_mem(void)
>> > 	struct acpi_subtable_header *table;
>> > 	struct acpi_srat_mem_affinity *ma;
>> > 	unsigned long table_end;
>> >+	unsigned long long possible_addr, max_possible_addr = 0;
>> > 	int i = 0;
>> >
>> > 	if (!cmdline_find_option_bool("movable_node") ||
>> >@@ -338,7 +353,12 @@ void get_immovable_mem(void)
>> > 		       sizeof(struct acpi_subtable_header) < table_end) {
>> > 		if (table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
>> > 			ma = (struct acpi_srat_mem_affinity *)table;
>> >-			if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
>> >+
>> >+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
>> >+				possible_addr = ma->base_address + ma->length;
>> >+				if (possible_addr > max_possible_addr)
>> >+					max_possible_addr = possible_addr;
>> >+			} else {
>> > 				immovable_mem[i].start = ma->base_address;
>> > 				immovable_mem[i].size = ma->length;
>> > 				i++;
>> >@@ -351,4 +371,5 @@ void get_immovable_mem(void)
>> > 			((unsigned long)table + table->length);
>> > 	}
>> > 	num_immovable_mem = i;
>> >+	store_possible_addr(max_possible_addr);
>> > }
>> >diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
>> >index 1458b17..9b95fba 100644
>> >--- a/arch/x86/boot/compressed/eboot.c
>> >+++ b/arch/x86/boot/compressed/eboot.c
>> >@@ -192,6 +192,40 @@ static void setup_efi_pci(struct boot_params *params)
>> > 	efi_call_early(free_pool, pci_handle);
>> > }
>> >
>> >+#ifdef CONFIG_RANDOMIZE_MEMORY
>> >+static void setup_kaslr(struct boot_params *params)
>> >+{
>> >+	struct setup_data *kaslr_data = NULL;
>> >+	struct setup_data *data;
>> >+	unsigned long size;
>> >+	efi_status_t status;
>> >+
>> >+	size = sizeof(struct setup_data) + sizeof(unsigned long long);
>> >+
>> >+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
>> >+			size, (void **)&kaslr_data);
>> >+	if (status != EFI_SUCCESS) {
>> >+		efi_printk(sys_table, "Failed to allocate memory for 'kaslr_data'\n");
>> >+		return;
>> >+	}
>> >+
>> >+	kaslr_data->type = SETUP_KASLR;
>> >+	kaslr_data->next = 0;
>> >+	kaslr_data->len = size;
>> >+
>> >+	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
>> >+	if (data)
>> >+		data->next = (unsigned long)kaslr_data;
>> Why just put the kaslr_data in data->next. You can't make sure
>> data->next was NULL.
>> >+	else {
>> If data is NULL, go to this else{}, so these two lines below work?
>> >+		while (data->next)
>> >+			data = (struct setup_data *)(unsigned long)data->next;
>> >+		data->next = (unsigned long)kaslr_data;
>> >+	}
>> If my understanding is not wrong, it should be:
>> 
>> 	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
>> 	if (!data)
>> 		params->hdr.setup_data = (unsigned long)kaslr_data;
>> 	else {
>> 		while (data->next)
>> 			data = (struct setup_data *)(unsigned long)data->next;
>> 		data->next = (unsigned long)kaslr_data;
>> 	}
>> 
>> If I misunderstand something, please tell me.
>> 
>> Thanks,
>> Chao Fan
>> 
>> 
>
>
Borislav Petkov - Oct. 25, 2018, 10:33 a.m.
On Mon, Oct 22, 2018 at 11:42:05AM -0400, Masayoshi Mizuma wrote:
> I'm trying to store the SRAT info and pass it to kernel_randomize_memory(),
> looks like add_e820ext()/parse_setup_data().
> 
> Is the approach useful only EFI environment? I'm not sure how I

Does it matter for non-EFI even?

I mean, you want this code only for your use case - when you have
movable memory and you're doing kexec, yes?

And those machines are all EFI boxes I'd assume...
Masayoshi Mizuma - Oct. 25, 2018, 1:40 p.m.
On Thu, Oct 25, 2018 at 12:33:45PM +0200, Borislav Petkov wrote:
> On Mon, Oct 22, 2018 at 11:42:05AM -0400, Masayoshi Mizuma wrote:
> > I'm trying to store the SRAT info and pass it to kernel_randomize_memory(),
> > looks like add_e820ext()/parse_setup_data().
> > 
> > Is the approach useful only EFI environment? I'm not sure how I
> 
> Does it matter for non-EFI even?
> 
> I mean, you want this code only for your use case - when you have
> movable memory and you're doing kexec, yes?
> 
> And those machines are all EFI boxes I'd assume...

My actual use case is for EFI boxes, however, I think it's better to useful
for legacy BIOS as well because memory hot-plug affinity in SRAT and KASLR
are available on legacy BIOS.
Actually, we can create such environment in qemu.

I have another idea to solve this issue. Adding a SRAT parsing code
to arch/x86/mm/kaslr.c. It is useful for both EFI and BIOS and
also we don't need a new kernel parameter...
Dose the idea make sense?

Thanks,
Masa
Borislav Petkov - Nov. 6, 2018, 12:10 p.m.
On Thu, Oct 25, 2018 at 09:40:51AM -0400, Masayoshi Mizuma wrote:
> My actual use case is for EFI boxes, however, I think it's better to useful
> for legacy BIOS as well because memory hot-plug affinity in SRAT and KASLR
> are available on legacy BIOS.
> Actually, we can create such environment in qemu.

Ah, right, qemu. :)

> I have another idea to solve this issue. Adding a SRAT parsing code
> to arch/x86/mm/kaslr.c. It is useful for both EFI and BIOS and
> also we don't need a new kernel parameter...
> Dose the idea make sense?

The more automatic stuff we do and we don't have to involve the user,
the better.

However, lemme look at Chao's current patchset first - we should not go
nuts by putting SRAT parsing everywhere :)
Baoquan He - Nov. 6, 2018, 2:07 p.m.
On 11/06/18 at 01:10pm, Borislav Petkov wrote:
> > I have another idea to solve this issue. Adding a SRAT parsing code
> > to arch/x86/mm/kaslr.c. It is useful for both EFI and BIOS and
> > also we don't need a new kernel parameter...
> > Dose the idea make sense?
> 
> The more automatic stuff we do and we don't have to involve the user,
> the better.
> 
> However, lemme look at Chao's current patchset first - we should not go
> nuts by putting SRAT parsing everywhere :)

arch/x86/mm/ident_map.c is a good example, it's shared between
arch/x86/boot/compressed and arch/x86/mm/init_64.c
Borislav Petkov - Nov. 6, 2018, 6:45 p.m.
On Thu, Oct 25, 2018 at 09:40:51AM -0400, Masayoshi Mizuma wrote:
> I have another idea to solve this issue. Adding a SRAT parsing code
> to arch/x86/mm/kaslr.c. It is useful for both EFI and BIOS and
> also we don't need a new kernel parameter...
> Dose the idea make sense?

Ok, having swapped the whole thing back into my brain, forget what I
said earlier today.

Didn't we talk about passing info with setup_data to the later kernel
stage? You even had a patch:

https://lkml.kernel.org/r/20181022154204.kagmdb55jtoez4ca@gabell

So what is that "idea" again about adding SRAT parsing code to
arch/x86/mm/kaslr.c?!?!

The intent of passing info with setup_data is to *avoid* parsing SRAT
yet another time and duplicating that code one more time.

IOW:

* The first place that needs SRAT parsing, does the parsing - i.e., the
compressed stage.

* Later stages get information passed to them with setup_data. No second
parsing.

Ok?
Masayoshi Mizuma - Nov. 6, 2018, 7:36 p.m.
On Tue, Nov 06, 2018 at 07:45:19PM +0100, Borislav Petkov wrote:
> Ok, having swapped the whole thing back into my brain, forget what I
> said earlier today.
> 
> Didn't we talk about passing info with setup_data to the later kernel
> stage? You even had a patch:
> 
> https://lkml.kernel.org/r/20181022154204.kagmdb55jtoez4ca@gabell
> 
> So what is that "idea" again about adding SRAT parsing code to
> arch/x86/mm/kaslr.c?!?!
> 
> The intent of passing info with setup_data is to *avoid* parsing SRAT
> yet another time and duplicating that code one more time.
> 
> IOW:
> 
> * The first place that needs SRAT parsing, does the parsing - i.e., the
> compressed stage.
> 
> * Later stages get information passed to them with setup_data. No second
> parsing.
> 
> Ok?

Yes, I think I can see what you are saying. However, I'm not sure how
I use the setup_data in legacy BIOS environment. So, I said the another
idea which adding the SRAT parsing code to arch/x86/mm/kaslr.c as well.
Yes, as you said, that is not so good...

I would appreciate if you could help to use setup_data or something to
pass the information to later kernel stage in BIOS environment.

Thanks,
Masa
Borislav Petkov - Nov. 6, 2018, 8:45 p.m.
On Tue, Nov 06, 2018 at 02:36:38PM -0500, Masayoshi Mizuma wrote:
> Yes, I think I can see what you are saying. However, I'm not sure how
> I use the setup_data in legacy BIOS environment.

What is "legacy BIOS environment" and what does that have to do with
setup_data?
Masayoshi Mizuma - Nov. 6, 2018, 10:21 p.m.
On Tue, Nov 06, 2018 at 09:45:11PM +0100, Borislav Petkov wrote:
> On Tue, Nov 06, 2018 at 02:36:38PM -0500, Masayoshi Mizuma wrote:
> > Yes, I think I can see what you are saying. However, I'm not sure how
> > I use the setup_data in legacy BIOS environment.
> 
> What is "legacy BIOS environment" and what does that have to do with
> setup_data?

My proposed patch [1] is useful only for EFI environment. The patch
allocates a setup_date structure by efi_call_early() and store the
KASLR data into the memory area.

+static void setup_kaslr(struct boot_params *params)
+{
+	struct setup_data *kaslr_data = NULL;
+	struct setup_data *data;
+	unsigned long size;
+	efi_status_t status;
+
+	size = sizeof(struct setup_data) + sizeof(unsigned long long);
+
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+			size, (void **)&kaslr_data);

I'm not sure how I allocate such memory on no EFI environment.
Am I missing something...?

[1] https://lkml.kernel.org/r/20181022154204.kagmdb55jtoez4ca@gabell

Thanks,
Masa
Chao Fan - Nov. 7, 2018, 1:21 a.m.
On Tue, Nov 06, 2018 at 10:07:31PM +0800, Baoquan He wrote:
>On 11/06/18 at 01:10pm, Borislav Petkov wrote:
>> > I have another idea to solve this issue. Adding a SRAT parsing code
>> > to arch/x86/mm/kaslr.c. It is useful for both EFI and BIOS and
>> > also we don't need a new kernel parameter...
>> > Dose the idea make sense?
>> 
>> The more automatic stuff we do and we don't have to involve the user,
>> the better.
>> 
>> However, lemme look at Chao's current patchset first - we should not go
>> nuts by putting SRAT parsing everywhere :)
>
>arch/x86/mm/ident_map.c is a good example, it's shared between
>arch/x86/boot/compressed and arch/x86/mm/init_64.c

Thanks to Baoquan, I think we can try this idea.
How about you, Masa?

Thanks,
Chao Fan

>
>
Borislav Petkov - Nov. 8, 2018, 10:51 a.m.
On Tue, Nov 06, 2018 at 05:21:34PM -0500, Masayoshi Mizuma wrote:
> On Tue, Nov 06, 2018 at 09:45:11PM +0100, Borislav Petkov wrote:
> > On Tue, Nov 06, 2018 at 02:36:38PM -0500, Masayoshi Mizuma wrote:
> > > Yes, I think I can see what you are saying. However, I'm not sure how
> > > I use the setup_data in legacy BIOS environment.
> > 
> > What is "legacy BIOS environment" and what does that have to do with
> > setup_data?
> 
> My proposed patch [1] is useful only for EFI environment. The patch
> allocates a setup_date structure by efi_call_early() and store the
> KASLR data into the memory area.
> 
> +static void setup_kaslr(struct boot_params *params)
> +{
> +	struct setup_data *kaslr_data = NULL;
> +	struct setup_data *data;
> +	unsigned long size;
> +	efi_status_t status;
> +
> +	size = sizeof(struct setup_data) + sizeof(unsigned long long);
> +
> +	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
> +			size, (void **)&kaslr_data);
> 
> I'm not sure how I allocate such memory on no EFI environment.

A global definition which doesn't need allocation?

Maybe hpa would have another, better idea...
Borislav Petkov - Nov. 10, 2018, 10:54 a.m.
On Thu, Nov 08, 2018 at 11:51:29AM +0100, Borislav Petkov wrote:
> A global definition which doesn't need allocation?
> 
> Maybe hpa would have another, better idea...

...and he has: just put that address in a new field in struct
boot_params by converting one of the padding arrays there.

Don't forget to document it in Documentation/x86/zero-page.txt

This way you don't need any of the allocation fun or to use setup_data
at all.

HTH.
Masayoshi Mizuma - Nov. 11, 2018, 1:45 p.m.
On Sat, Nov 10, 2018 at 11:54:22AM +0100, Borislav Petkov wrote:
> On Thu, Nov 08, 2018 at 11:51:29AM +0100, Borislav Petkov wrote:
> > A global definition which doesn't need allocation?
> > 
> > Maybe hpa would have another, better idea...
> 
> ...and he has: just put that address in a new field in struct
> boot_params by converting one of the padding arrays there.
> 
> Don't forget to document it in Documentation/x86/zero-page.txt
> 
> This way you don't need any of the allocation fun or to use setup_data
> at all.

Thanks!
I have the prototype patch to use boot_params [1].
I will try to brush up it.

[1] https://lore.kernel.org/lkml/20181016151353.punyk7exekut2543@gabell

Thanks,
Masa

Patch

diff --git a/arch/x86/boot/compressed/acpitb.c b/arch/x86/boot/compressed/acpitb.c
index d119663..b79560c 100644
--- a/arch/x86/boot/compressed/acpitb.c
+++ b/arch/x86/boot/compressed/acpitb.c
@@ -309,6 +309,20 @@  static struct acpi_table_header *get_acpi_srat_table(void)
 	return NULL;
 }

+static void store_possible_addr(unsigned long long possible)
+{
+	struct setup_data *data;
+
+	data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+	while (data) {
+		if (data->type == SETUP_KASLR) {
+			*(unsigned long long *)data->data = possible;
+			return;
+		}
+		data = (struct setup_data *)(unsigned long)data->next;
+	}
+}
+
 /*
  * According to ACPI table, filter the immvoable memory regions
  * and store them in immovable_mem[].
@@ -319,6 +333,7 @@  void get_immovable_mem(void)
 	struct acpi_subtable_header *table;
 	struct acpi_srat_mem_affinity *ma;
 	unsigned long table_end;
+	unsigned long long possible_addr, max_possible_addr = 0;
 	int i = 0;

 	if (!cmdline_find_option_bool("movable_node") ||
@@ -338,7 +353,12 @@  void get_immovable_mem(void)
 		       sizeof(struct acpi_subtable_header) < table_end) {
 		if (table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
 			ma = (struct acpi_srat_mem_affinity *)table;
-			if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
+
+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+				possible_addr = ma->base_address + ma->length;
+				if (possible_addr > max_possible_addr)
+					max_possible_addr = possible_addr;
+			} else {
 				immovable_mem[i].start = ma->base_address;
 				immovable_mem[i].size = ma->length;
 				i++;
@@ -351,4 +371,5 @@  void get_immovable_mem(void)
 			((unsigned long)table + table->length);
 	}
 	num_immovable_mem = i;
+	store_possible_addr(max_possible_addr);
 }
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 1458b17..9b95fba 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -192,6 +192,40 @@  static void setup_efi_pci(struct boot_params *params)
 	efi_call_early(free_pool, pci_handle);
 }

+#ifdef CONFIG_RANDOMIZE_MEMORY
+static void setup_kaslr(struct boot_params *params)
+{
+	struct setup_data *kaslr_data = NULL;
+	struct setup_data *data;
+	unsigned long size;
+	efi_status_t status;
+
+	size = sizeof(struct setup_data) + sizeof(unsigned long long);
+
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+			size, (void **)&kaslr_data);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table, "Failed to allocate memory for 'kaslr_data'\n");
+		return;
+	}
+
+	kaslr_data->type = SETUP_KASLR;
+	kaslr_data->next = 0;
+	kaslr_data->len = size;
+
+	data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+	if (data)
+		data->next = (unsigned long)kaslr_data;
+	else {
+		while (data->next)
+			data = (struct setup_data *)(unsigned long)data->next;
+		data->next = (unsigned long)kaslr_data;
+	}
+}
+#else
+static void setup_kaslr(struct boot_params *params) {}
+#endif
+
 static void retrieve_apple_device_properties(struct boot_params *boot_params)
 {
 	efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
@@ -770,6 +804,8 @@  efi_main(struct efi_config *c, struct boot_params *boot_params)

 	setup_efi_pci(boot_params);

+	setup_kaslr(boot_params);
+
 	setup_quirks(boot_params);

 	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index a06cbf0..0a44d83 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -10,6 +10,7 @@ 
 #define SETUP_EFI			4
 #define SETUP_APPLE_PROPERTIES		5
 #define SETUP_JAILHOUSE			6
+#define SETUP_KASLR			7

 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK	0x07FF
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 61db77b..6f91cf4 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -28,6 +28,7 @@ 
 #include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/kaslr.h>
+#include <linux/io.h>

 #include "mm_internal.h"

@@ -69,6 +70,61 @@  static inline bool kaslr_memory_enabled(void)
 	return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
 }

+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long long __init get_max_possible_addr(void)
+{
+	struct setup_data *data;
+	u64 pa_data;
+	unsigned long long max = 0;
+
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_memremap(pa_data, sizeof(*data));
+		if (!data)
+			return 0;
+
+		if (data->type == SETUP_KASLR) {
+			max = *(unsigned long long *)data->data;
+			early_memunmap(data, sizeof(*data));
+			return max;
+		}
+		pa_data = data->next;
+		early_memunmap(data, sizeof(*data));
+	}
+
+	return max;
+}
+
+static unsigned int __init kaslr_padding(void)
+{
+	unsigned long long max_possible_addr;
+	unsigned long long max_possible_phys, max_actual_phys, threshold;
+	unsigned int rand_mem_physical_padding =
+				CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
+
+	max_possible_addr = get_max_possible_addr();
+	if (!max_possible_addr)
+		goto out;
+
+	max_actual_phys = roundup(PFN_PHYS(max_pfn), 1ULL << TB_SHIFT);
+	max_possible_phys = roundup(max_possible_addr,
+					1ULL << TB_SHIFT);
+	threshold = max_actual_phys +
+		((unsigned long long)rand_mem_physical_padding << TB_SHIFT);
+
+	if (max_possible_phys > threshold)
+		rand_mem_physical_padding =
+			(max_possible_phys - max_actual_phys) >> TB_SHIFT;
+out:
+	return rand_mem_physical_padding;
+}
+#else
+static unsigned int __init kaslr_padding(void)
+{
+	return CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
+}
+#endif
+
 /* Initialize base and padding for each memory region randomized with KASLR */
 void __init kernel_randomize_memory(void)
 {
@@ -102,7 +158,7 @@  void __init kernel_randomize_memory(void)
 	 */
 	BUG_ON(kaslr_regions[0].base != &page_offset_base);
 	memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
-		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
+		kaslr_padding();

 	/* Adapt phyiscal memory region size based on available memory */
 	if (memory_tb < kaslr_regions[0].size_tb)