Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Linux: Kernel

[PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11

 

 

Linux kernel RSS feed   Index | Next | Previous | View Threaded


yinghai at kernel

Nov 14, 2009, 12:50 AM

Post #1 of 14 (414 views)
Permalink
[PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11

v2: Jesse doesn't like it is in find_free_bus_resource...
try to move out of pci_bus_size_bridges loop.
v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
only clear release those res for x86.
v4: Bjorn want to release use dev instead of bus.
v5: Kenji pointed out it will have problem with several level bridge.
so let only handle leaf bridge.
v6: address Kenji's request (new pci_bus_release...). and change applying order
move back release to pci_assign_unassigned_resource
v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
v8: address Eric's concern, only overwrite leaf bridge resource that is not big
enough need to do it in two steps, and first step recore the failed res,
and don't touch bridge res that programmed by firmware. second step will
try to release bridge resource that is too small at first.
v9: refresh to be applied after bjorn's patch, and remove trick about save
size and restore resource second try.
v11:add pci=try=5, about more try to change more bridge

Signed-off-by: Yinghai Lu <yinghai [at] kernel>

---
drivers/pci/pci.c | 5
drivers/pci/pci.h | 2
drivers/pci/setup-bus.c | 304 +++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 292 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -27,7 +27,49 @@
#include <linux/slab.h>
#include "pci.h"

-static void pbus_assign_resources_sorted(const struct pci_bus *bus)
+
+static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
+ struct resource *res)
+{
+ struct resource_list *list = head;
+ struct resource_list *ln = list->next;
+ struct resource_list *tmp;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_warning("add_to_failed_list: kmalloc() failed!\n");
+ return;
+ }
+
+ tmp->next = ln;
+ tmp->res = res;
+ tmp->dev = dev;
+ list->next = tmp;
+}
+
+static void free_failed_list(struct resource_list *head)
+{
+ struct resource_list *list, *tmp;
+ struct resource *res;
+ /*
+ * Try to release leaf bridge's resources that there is no child
+ * under it
+ */
+ for (list = head->next; list;) {
+ res = list->res;
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ head->next = NULL;
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_dev *dev;
struct resource *res;
@@ -58,9 +100,17 @@ static void pbus_assign_resources_sorted
res = list->res;
idx = res - &list->dev->resource[0];
if (pci_assign_resource(list->dev, idx)) {
- res->start = 0;
- res->end = 0;
- res->flags = 0;
+ if (fail_head && !pci_is_root_bus(list->dev->bus)) {
+ /*
+ * device need to keep flags and size
+ * for next try
+ */
+ add_to_failed_list(fail_head, list->dev, res);
+ } else {
+ res->start = 0;
+ res->end = 0;
+ res->flags = 0;
+ }
}
tmp = list;
list = list->next;
@@ -134,19 +184,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
config space writes, so it's quite possible that an I/O window of
the bridge will have some undesirable address (e.g. 0) after the
first write. Ditto 64-bit prefetchable MMIO. */
-static void pci_setup_bridge(struct pci_bus *bus)
+static void pci_setup_bridge_io(struct pci_bus *bus)
{
struct pci_dev *bridge = bus->self;
struct resource *res;
struct pci_bus_region region;
- u32 l, bu, lu, io_upper16;
- int pref_mem64;
-
- if (pci_is_enabled(bridge))
- return;
-
- dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
- bus->secondary, bus->subordinate);
+ u32 l, io_upper16;

/* Set up the top and bottom of the PCI I/O segment for this bus. */
res = bus->resource[0];
@@ -172,7 +215,13 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_IO_BASE, l);
/* Update upper 16 bits of I/O base/limit. */
pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
-
+}
+static void pci_setup_bridge_mmio(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct resource *res;
+ struct pci_bus_region region;
+ u32 l;
/* Set up the top and bottom of the PCI Memory segment
for this bus. */
res = bus->resource[1];
@@ -187,6 +236,14 @@ static void pci_setup_bridge(struct pci_
dev_info(&bridge->dev, " bridge window [mem disabled]\n");
}
pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+}
+static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct resource *res;
+ struct pci_bus_region region;
+ u32 l, bu, lu;
+ int pref_mem64;

/* Clear out the upper 32 bits of PREF limit.
If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
@@ -219,10 +276,37 @@ static void pci_setup_bridge(struct pci_
pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
}
+}
+static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_dev *bridge = bus->self;
+
+ if (pci_is_enabled(bridge))
+ return;
+
+ dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
+ bus->secondary, bus->subordinate);
+
+ if (type & IORESOURCE_IO)
+ pci_setup_bridge_io(bus);
+
+ if (type & IORESOURCE_MEM)
+ pci_setup_bridge_mmio(bus);
+
+ if (type & IORESOURCE_PREFETCH)
+ pci_setup_bridge_mmio_pref(bus);

pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
}

+static void pci_setup_bridge(struct pci_bus *bus)
+{
+ unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ __pci_setup_bridge(bus, type);
+}
+
/* Check whether the bridge supports optional I/O and
prefetchable memory ranges. If not, the respective
base/limit registers must be read-only and read as 0. */
@@ -543,19 +627,20 @@ void __ref pci_bus_size_bridges(struct p
}
EXPORT_SYMBOL(pci_bus_size_bridges);

-void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
+ struct resource_list *fail_head)
{
struct pci_bus *b;
struct pci_dev *dev;

- pbus_assign_resources_sorted(bus);
+ pbus_assign_resources_sorted(bus, fail_head);

list_for_each_entry(dev, &bus->devices, bus_list) {
b = dev->subordinate;
if (!b)
continue;

- pci_bus_assign_resources(b);
+ __pci_bus_assign_resources(b, fail_head);

switch (dev->class >> 8) {
case PCI_CLASS_BRIDGE_PCI:
@@ -573,15 +658,130 @@ void __ref pci_bus_assign_resources(cons
}
}
}
+
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
+{
+ __pci_bus_assign_resources(bus, NULL);
+}
EXPORT_SYMBOL(pci_bus_assign_resources);

+static void release_children_resource(struct resource *r)
+{
+ struct resource *p;
+ resource_size_t size;
+
+ p = r->child;
+ while (p) {
+ release_children_resource(p);
+ release_resource(p);
+ printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
+ /* need to restore size, and keep flags */
+ size = resource_size(p);
+ p->start = 0;
+ p->end = size - 1;
+ p = r->child;
+ }
+}
+
+static void pci_bridge_release_unused_res(struct pci_bus *bus,
+ unsigned long type)
+{
+ int idx;
+ bool changed = false;
+ struct pci_dev *dev;
+ struct resource *r;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+
+ /* for pci bridges res only */
+ dev = bus->self;
+ for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
+ idx++) {
+ r = &dev->resource[idx];
+ if ((r->flags & type_mask) != type)
+ continue;
+ if (!r->parent)
+ continue;
+ /*
+ * if there are children under that, we should release them
+ * all
+ */
+ release_children_resource(r);
+ if (!release_resource(r)) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "resource %d %pRt released\n", idx, r);
+ r->flags = 0;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ if (type & IORESOURCE_PREFETCH) {
+ /* avoiding touch the one without PREF */
+ type = IORESOURCE_PREFETCH;
+ }
+ __pci_setup_bridge(bus, type);
+ }
+}
+
+/*
+ * try to release pci bridge resources that is from leaf bridge,
+ * so we can allocate big new one later
+ * check:
+ * 0: only release the bridge and only the bridge is leaf
+ * 1: release all down side bridge for third shoot
+ */
+static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
+ unsigned long type,
+ int check_leaf)
+{
+ struct pci_dev *dev;
+ bool is_leaf_bridge = true;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct pci_bus *b = dev->subordinate;
+ if (!b)
+ continue;
+
+ switch (dev->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ is_leaf_bridge = false;
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ is_leaf_bridge = false;
+ if (!check_leaf)
+ pci_bus_release_unused_bridge_res(b, type,
+ check_leaf);
+ break;
+ }
+ }
+
+ /* The root bus? */
+ if (!bus->self)
+ return;
+
+ switch (bus->self->class >> 8) {
+ case PCI_CLASS_BRIDGE_CARDBUS:
+ break;
+
+ case PCI_CLASS_BRIDGE_PCI:
+ default:
+ if ((check_leaf && is_leaf_bridge) || !check_leaf)
+ pci_bridge_release_unused_res(bus, type);
+ break;
+ }
+}
+
static void pci_bus_dump_res(struct pci_bus *bus)
{
int i;

for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
struct resource *res = bus->resource[i];
- if (!res || !res->end)
+
+ if (!res || !res->end || !res->flags)
continue;

dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res);
@@ -605,10 +805,25 @@ static void pci_bus_dump_resources(struc
}
}

+/*
+ * first try will not touch pci bridge res
+ * second try will clear small leaf bridge res
+ * third try will clear related bridge: some aggressive
+ */
+/* assume we only have 4 level bridges, so only try 5 times */
+int pci_try_num = 5;
void __init
pci_assign_unassigned_resources(void)
{
struct pci_bus *bus;
+ int tried_times = 0;
+ int check_leaf = 1;
+ struct resource_list head, *list, *tmp;
+ unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
+ IORESOURCE_PREFETCH;
+ unsigned long failed_type;
+again:
+ head.next = NULL;

/* Depth first, calculate sizes and alignments of all
subordinate buses. */
@@ -617,7 +832,58 @@ pci_assign_unassigned_resources(void)
}
/* Depth last, allocate resources and update the hardware. */
list_for_each_entry(bus, &pci_root_buses, node) {
- pci_bus_assign_resources(bus);
+ __pci_bus_assign_resources(bus, &head);
+ }
+ tried_times++;
+
+ /* any device complain? */
+ if (!head.next)
+ goto enable_and_dump;
+ failed_type = 0;
+ for (list = head.next; list;) {
+ unsigned long flags = list->res->flags;
+
+ failed_type |= flags;
+ list = list->next;
+ }
+ /*
+ * io port are tight, don't try extra
+ * or if reach the limit, don't want to try more
+ */
+ failed_type &= type_mask;
+ if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
+ free_failed_list(&head);
+ goto enable_and_dump;
+ }
+
+ printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
+ tried_times + 1);
+
+ /*
+ * Try to release leaf bridge's resources that doesn't fit resource of
+ * child device under that bridge
+ */
+ /* third times and later will not check if it is leaf */
+ if ((tried_times + 1) > 2)
+ check_leaf = 0;
+ for (list = head.next; list;) {
+ unsigned long flags = list->res->flags;
+
+ bus = list->dev->bus;
+ if (list->dev->subordinate)
+ list->res->flags = 0;
+ pci_bus_release_unused_bridge_res(bus, flags & type_mask,
+ check_leaf);
+ tmp = list;
+ list = list->next;
+ kfree(tmp);
+ }
+
+ goto again;
+
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ list_for_each_entry(bus, &pci_root_buses, node) {
pci_enable_bridges(bus);
}

Index: linux-2.6/drivers/pci/pci.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci.c
+++ linux-2.6/drivers/pci/pci.c
@@ -2779,6 +2779,11 @@ static int __init pci_setup(char *str)
pci_no_aer();
} else if (!strcmp(str, "nodomains")) {
pci_no_domains();
+ } else if (!strncmp(str, "try=", 4)) {
+ int try_num = memparse(str + 4, &str);
+
+ if (try_num > 0 && try_num < 10)
+ pci_try_num = try_num;
} else if (!strncmp(str, "cbiosize=", 9)) {
pci_cardbus_io_size = memparse(str + 9, &str);
} else if (!strncmp(str, "cbmemsize=", 10)) {
Index: linux-2.6/drivers/pci/pci.h
===================================================================
--- linux-2.6.orig/drivers/pci/pci.h
+++ linux-2.6/drivers/pci/pci.h
@@ -203,6 +203,8 @@ static inline int pci_ari_enabled(struct
return bus->self && bus->self->ari_enabled;
}

+extern int pci_try_num;
+
#ifdef CONFIG_PCI_QUIRKS
extern int pci_is_reassigndev(struct pci_dev *dev);
resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 23, 2009, 5:08 PM

Post #2 of 14 (368 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Hi,

I tried v11 patches. This version seems to fix the problem I
reported against previous version.

I have no objection against the idea of resource allocation
changes for PCI express hotplug slots.

But I still have concern about changing resource allocation for
other than PCI express hotplug slots. For example, some hotplug
controller other than PCI express can have multiple slots under
the same bus. If some hotplug slots are occupied and the others
are empty at the boot time, I think your code try to shrink the
bus resources for hotplug slots allocated by BIOS. It would break
the hot-add on the empty slots due to the resource allocation
failure.

Thanks,
Kenji Kaneshige



Yinghai Lu wrote:
> v2: Jesse doesn't like it is in find_free_bus_resource...
> try to move out of pci_bus_size_bridges loop.
> v3: add pci_setup_bridge calling after pci_bridge_release_not_used_res.
> only clear release those res for x86.
> v4: Bjorn want to release use dev instead of bus.
> v5: Kenji pointed out it will have problem with several level bridge.
> so let only handle leaf bridge.
> v6: address Kenji's request (new pci_bus_release...). and change applying order
> move back release to pci_assign_unassigned_resource
> v7: change functions name pci_bus_release_unused_bridge_res according to Jesse
> v8: address Eric's concern, only overwrite leaf bridge resource that is not big
> enough need to do it in two steps, and first step recore the failed res,
> and don't touch bridge res that programmed by firmware. second step will
> try to release bridge resource that is too small at first.
> v9: refresh to be applied after bjorn's patch, and remove trick about save
> size and restore resource second try.
> v11:add pci=try=5, about more try to change more bridge
>
> Signed-off-by: Yinghai Lu <yinghai [at] kernel>
>
> ---
> drivers/pci/pci.c | 5
> drivers/pci/pci.h | 2
> drivers/pci/setup-bus.c | 304 +++++++++++++++++++++++++++++++++++++++++++++---
> 3 files changed, 292 insertions(+), 19 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -27,7 +27,49 @@
> #include <linux/slab.h>
> #include "pci.h"
>
> -static void pbus_assign_resources_sorted(const struct pci_bus *bus)
> +
> +static void add_to_failed_list(struct resource_list *head, struct pci_dev *dev,
> + struct resource *res)
> +{
> + struct resource_list *list = head;
> + struct resource_list *ln = list->next;
> + struct resource_list *tmp;
> +
> + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
> + if (!tmp) {
> + pr_warning("add_to_failed_list: kmalloc() failed!\n");
> + return;
> + }
> +
> + tmp->next = ln;
> + tmp->res = res;
> + tmp->dev = dev;
> + list->next = tmp;
> +}
> +
> +static void free_failed_list(struct resource_list *head)
> +{
> + struct resource_list *list, *tmp;
> + struct resource *res;
> + /*
> + * Try to release leaf bridge's resources that there is no child
> + * under it
> + */
> + for (list = head->next; list;) {
> + res = list->res;
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +
> + head->next = NULL;
> +}
> +
> +static void pbus_assign_resources_sorted(const struct pci_bus *bus,
> + struct resource_list *fail_head)
> {
> struct pci_dev *dev;
> struct resource *res;
> @@ -58,9 +100,17 @@ static void pbus_assign_resources_sorted
> res = list->res;
> idx = res - &list->dev->resource[0];
> if (pci_assign_resource(list->dev, idx)) {
> - res->start = 0;
> - res->end = 0;
> - res->flags = 0;
> + if (fail_head && !pci_is_root_bus(list->dev->bus)) {
> + /*
> + * device need to keep flags and size
> + * for next try
> + */
> + add_to_failed_list(fail_head, list->dev, res);
> + } else {
> + res->start = 0;
> + res->end = 0;
> + res->flags = 0;
> + }
> }
> tmp = list;
> list = list->next;
> @@ -134,19 +184,12 @@ EXPORT_SYMBOL(pci_setup_cardbus);
> config space writes, so it's quite possible that an I/O window of
> the bridge will have some undesirable address (e.g. 0) after the
> first write. Ditto 64-bit prefetchable MMIO. */
> -static void pci_setup_bridge(struct pci_bus *bus)
> +static void pci_setup_bridge_io(struct pci_bus *bus)
> {
> struct pci_dev *bridge = bus->self;
> struct resource *res;
> struct pci_bus_region region;
> - u32 l, bu, lu, io_upper16;
> - int pref_mem64;
> -
> - if (pci_is_enabled(bridge))
> - return;
> -
> - dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
> - bus->secondary, bus->subordinate);
> + u32 l, io_upper16;
>
> /* Set up the top and bottom of the PCI I/O segment for this bus. */
> res = bus->resource[0];
> @@ -172,7 +215,13 @@ static void pci_setup_bridge(struct pci_
> pci_write_config_dword(bridge, PCI_IO_BASE, l);
> /* Update upper 16 bits of I/O base/limit. */
> pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
> -
> +}
> +static void pci_setup_bridge_mmio(struct pci_bus *bus)
> +{
> + struct pci_dev *bridge = bus->self;
> + struct resource *res;
> + struct pci_bus_region region;
> + u32 l;
> /* Set up the top and bottom of the PCI Memory segment
> for this bus. */
> res = bus->resource[1];
> @@ -187,6 +236,14 @@ static void pci_setup_bridge(struct pci_
> dev_info(&bridge->dev, " bridge window [mem disabled]\n");
> }
> pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
> +}
> +static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
> +{
> + struct pci_dev *bridge = bus->self;
> + struct resource *res;
> + struct pci_bus_region region;
> + u32 l, bu, lu;
> + int pref_mem64;
>
> /* Clear out the upper 32 bits of PREF limit.
> If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
> @@ -219,10 +276,37 @@ static void pci_setup_bridge(struct pci_
> pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
> pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
> }
> +}
> +static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
> +{
> + struct pci_dev *bridge = bus->self;
> +
> + if (pci_is_enabled(bridge))
> + return;
> +
> + dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
> + bus->secondary, bus->subordinate);
> +
> + if (type & IORESOURCE_IO)
> + pci_setup_bridge_io(bus);
> +
> + if (type & IORESOURCE_MEM)
> + pci_setup_bridge_mmio(bus);
> +
> + if (type & IORESOURCE_PREFETCH)
> + pci_setup_bridge_mmio_pref(bus);
>
> pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
> }
>
> +static void pci_setup_bridge(struct pci_bus *bus)
> +{
> + unsigned long type = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + __pci_setup_bridge(bus, type);
> +}
> +
> /* Check whether the bridge supports optional I/O and
> prefetchable memory ranges. If not, the respective
> base/limit registers must be read-only and read as 0. */
> @@ -543,19 +627,20 @@ void __ref pci_bus_size_bridges(struct p
> }
> EXPORT_SYMBOL(pci_bus_size_bridges);
>
> -void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> +static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
> + struct resource_list *fail_head)
> {
> struct pci_bus *b;
> struct pci_dev *dev;
>
> - pbus_assign_resources_sorted(bus);
> + pbus_assign_resources_sorted(bus, fail_head);
>
> list_for_each_entry(dev, &bus->devices, bus_list) {
> b = dev->subordinate;
> if (!b)
> continue;
>
> - pci_bus_assign_resources(b);
> + __pci_bus_assign_resources(b, fail_head);
>
> switch (dev->class >> 8) {
> case PCI_CLASS_BRIDGE_PCI:
> @@ -573,15 +658,130 @@ void __ref pci_bus_assign_resources(cons
> }
> }
> }
> +
> +void __ref pci_bus_assign_resources(const struct pci_bus *bus)
> +{
> + __pci_bus_assign_resources(bus, NULL);
> +}
> EXPORT_SYMBOL(pci_bus_assign_resources);
>
> +static void release_children_resource(struct resource *r)
> +{
> + struct resource *p;
> + resource_size_t size;
> +
> + p = r->child;
> + while (p) {
> + release_children_resource(p);
> + release_resource(p);
> + printk(KERN_DEBUG "PCI: release child resource %pRt\n", p);
> + /* need to restore size, and keep flags */
> + size = resource_size(p);
> + p->start = 0;
> + p->end = size - 1;
> + p = r->child;
> + }
> +}
> +
> +static void pci_bridge_release_unused_res(struct pci_bus *bus,
> + unsigned long type)
> +{
> + int idx;
> + bool changed = false;
> + struct pci_dev *dev;
> + struct resource *r;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> +
> + /* for pci bridges res only */
> + dev = bus->self;
> + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_BRIDGE_RESOURCES + 3;
> + idx++) {
> + r = &dev->resource[idx];
> + if ((r->flags & type_mask) != type)
> + continue;
> + if (!r->parent)
> + continue;
> + /*
> + * if there are children under that, we should release them
> + * all
> + */
> + release_children_resource(r);
> + if (!release_resource(r)) {
> + dev_printk(KERN_DEBUG, &dev->dev,
> + "resource %d %pRt released\n", idx, r);
> + r->flags = 0;
> + changed = true;
> + }
> + }
> +
> + if (changed) {
> + if (type & IORESOURCE_PREFETCH) {
> + /* avoiding touch the one without PREF */
> + type = IORESOURCE_PREFETCH;
> + }
> + __pci_setup_bridge(bus, type);
> + }
> +}
> +
> +/*
> + * try to release pci bridge resources that is from leaf bridge,
> + * so we can allocate big new one later
> + * check:
> + * 0: only release the bridge and only the bridge is leaf
> + * 1: release all down side bridge for third shoot
> + */
> +static void __ref pci_bus_release_unused_bridge_res(struct pci_bus *bus,
> + unsigned long type,
> + int check_leaf)
> +{
> + struct pci_dev *dev;
> + bool is_leaf_bridge = true;
> +
> + list_for_each_entry(dev, &bus->devices, bus_list) {
> + struct pci_bus *b = dev->subordinate;
> + if (!b)
> + continue;
> +
> + switch (dev->class >> 8) {
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + is_leaf_bridge = false;
> + break;
> +
> + case PCI_CLASS_BRIDGE_PCI:
> + default:
> + is_leaf_bridge = false;
> + if (!check_leaf)
> + pci_bus_release_unused_bridge_res(b, type,
> + check_leaf);
> + break;
> + }
> + }
> +
> + /* The root bus? */
> + if (!bus->self)
> + return;
> +
> + switch (bus->self->class >> 8) {
> + case PCI_CLASS_BRIDGE_CARDBUS:
> + break;
> +
> + case PCI_CLASS_BRIDGE_PCI:
> + default:
> + if ((check_leaf && is_leaf_bridge) || !check_leaf)
> + pci_bridge_release_unused_res(bus, type);
> + break;
> + }
> +}
> +
> static void pci_bus_dump_res(struct pci_bus *bus)
> {
> int i;
>
> for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
> struct resource *res = bus->resource[i];
> - if (!res || !res->end)
> +
> + if (!res || !res->end || !res->flags)
> continue;
>
> dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res);
> @@ -605,10 +805,25 @@ static void pci_bus_dump_resources(struc
> }
> }
>
> +/*
> + * first try will not touch pci bridge res
> + * second try will clear small leaf bridge res
> + * third try will clear related bridge: some aggressive
> + */
> +/* assume we only have 4 level bridges, so only try 5 times */
> +int pci_try_num = 5;
> void __init
> pci_assign_unassigned_resources(void)
> {
> struct pci_bus *bus;
> + int tried_times = 0;
> + int check_leaf = 1;
> + struct resource_list head, *list, *tmp;
> + unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> + IORESOURCE_PREFETCH;
> + unsigned long failed_type;
> +again:
> + head.next = NULL;
>
> /* Depth first, calculate sizes and alignments of all
> subordinate buses. */
> @@ -617,7 +832,58 @@ pci_assign_unassigned_resources(void)
> }
> /* Depth last, allocate resources and update the hardware. */
> list_for_each_entry(bus, &pci_root_buses, node) {
> - pci_bus_assign_resources(bus);
> + __pci_bus_assign_resources(bus, &head);
> + }
> + tried_times++;
> +
> + /* any device complain? */
> + if (!head.next)
> + goto enable_and_dump;
> + failed_type = 0;
> + for (list = head.next; list;) {
> + unsigned long flags = list->res->flags;
> +
> + failed_type |= flags;
> + list = list->next;
> + }
> + /*
> + * io port are tight, don't try extra
> + * or if reach the limit, don't want to try more
> + */
> + failed_type &= type_mask;
> + if ((failed_type == IORESOURCE_IO) || (tried_times >= pci_try_num)) {
> + free_failed_list(&head);
> + goto enable_and_dump;
> + }
> +
> + printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n",
> + tried_times + 1);
> +
> + /*
> + * Try to release leaf bridge's resources that doesn't fit resource of
> + * child device under that bridge
> + */
> + /* third times and later will not check if it is leaf */
> + if ((tried_times + 1) > 2)
> + check_leaf = 0;
> + for (list = head.next; list;) {
> + unsigned long flags = list->res->flags;
> +
> + bus = list->dev->bus;
> + if (list->dev->subordinate)
> + list->res->flags = 0;
> + pci_bus_release_unused_bridge_res(bus, flags & type_mask,
> + check_leaf);
> + tmp = list;
> + list = list->next;
> + kfree(tmp);
> + }
> +
> + goto again;
> +
> +enable_and_dump:
> + /* Depth last, update the hardware. */
> + list_for_each_entry(bus, &pci_root_buses, node) {
> pci_enable_bridges(bus);
> }
>
> Index: linux-2.6/drivers/pci/pci.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/pci.c
> +++ linux-2.6/drivers/pci/pci.c
> @@ -2779,6 +2779,11 @@ static int __init pci_setup(char *str)
> pci_no_aer();
> } else if (!strcmp(str, "nodomains")) {
> pci_no_domains();
> + } else if (!strncmp(str, "try=", 4)) {
> + int try_num = memparse(str + 4, &str);
> +
> + if (try_num > 0 && try_num < 10)
> + pci_try_num = try_num;
> } else if (!strncmp(str, "cbiosize=", 9)) {
> pci_cardbus_io_size = memparse(str + 9, &str);
> } else if (!strncmp(str, "cbmemsize=", 10)) {
> Index: linux-2.6/drivers/pci/pci.h
> ===================================================================
> --- linux-2.6.orig/drivers/pci/pci.h
> +++ linux-2.6/drivers/pci/pci.h
> @@ -203,6 +203,8 @@ static inline int pci_ari_enabled(struct
> return bus->self && bus->self->ari_enabled;
> }
>
> +extern int pci_try_num;
> +
> #ifdef CONFIG_PCI_QUIRKS
> extern int pci_is_reassigndev(struct pci_dev *dev);
> resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 23, 2009, 5:14 PM

Post #3 of 14 (369 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Kenji Kaneshige wrote:
> Hi,
>
> I tried v11 patches. This version seems to fix the problem I
> reported against previous version.
>
> I have no objection against the idea of resource allocation
> changes for PCI express hotplug slots.

thanks

>
> But I still have concern about changing resource allocation for
> other than PCI express hotplug slots. For example, some hotplug
> controller other than PCI express can have multiple slots under
> the same bus. If some hotplug slots are occupied and the others
> are empty at the boot time, I think your code try to shrink the
> bus resources for hotplug slots allocated by BIOS. It would break
> the hot-add on the empty slots due to the resource allocation
> failure.

no,
it will not touch bridge resources that already assigned by BIOS except
some bridge resource is not big enough. and get big one for them.

YH
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 23, 2009, 5:51 PM

Post #4 of 14 (370 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Hi,
>>
>> I tried v11 patches. This version seems to fix the problem I
>> reported against previous version.
>>
>> I have no objection against the idea of resource allocation
>> changes for PCI express hotplug slots.
>
> thanks
>
>> But I still have concern about changing resource allocation for
>> other than PCI express hotplug slots. For example, some hotplug
>> controller other than PCI express can have multiple slots under
>> the same bus. If some hotplug slots are occupied and the others
>> are empty at the boot time, I think your code try to shrink the
>> bus resources for hotplug slots allocated by BIOS. It would break
>> the hot-add on the empty slots due to the resource allocation
>> failure.
>
> no,
> it will not touch bridge resources that already assigned by BIOS except
> some bridge resource is not big enough. and get big one for them.
>

Ok, I understood that if the BIOS assigns enough resources to the
bridge, it has no impact.

One question. I thought your patch shrinks the bridge resource to
allocate enough resource for sibling bridge. But it actually doesn't.
Right?

It would be appreciated if you update the patch description about
the problem and how to fix/improbe it.

Thanks,
Kenji Kaneshige

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 23, 2009, 6:32 PM

Post #5 of 14 (362 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Hi,
>>>
>>> I tried v11 patches. This version seems to fix the problem I
>>> reported against previous version.
>>>
>>> I have no objection against the idea of resource allocation
>>> changes for PCI express hotplug slots.
>>
>> thanks
>>
>>> But I still have concern about changing resource allocation for
>>> other than PCI express hotplug slots. For example, some hotplug
>>> controller other than PCI express can have multiple slots under
>>> the same bus. If some hotplug slots are occupied and the others
>>> are empty at the boot time, I think your code try to shrink the
>>> bus resources for hotplug slots allocated by BIOS. It would break
>>> the hot-add on the empty slots due to the resource allocation
>>> failure.
>>
>> no,
>> it will not touch bridge resources that already assigned by BIOS except
>> some bridge resource is not big enough. and get big one for them.
>>
>
> Ok, I understood that if the BIOS assigns enough resources to the
> bridge, it has no impact.
>
> One question. I thought your patch shrinks the bridge resource to
> allocate enough resource for sibling bridge. But it actually doesn't.
> Right?

ok, i got it. will change pci_try_num default to 1.

>
> It would be appreciated if you update the patch description about
> the problem and how to fix/improbe it.

sure.

YH
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 24, 2009, 3:18 PM

Post #6 of 14 (358 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Hi,
>>>
>>> I tried v11 patches. This version seems to fix the problem I
>>> reported against previous version.
>>>
>>> I have no objection against the idea of resource allocation
>>> changes for PCI express hotplug slots.
>>
>> thanks
>>
>>> But I still have concern about changing resource allocation for
>>> other than PCI express hotplug slots. For example, some hotplug
>>> controller other than PCI express can have multiple slots under
>>> the same bus. If some hotplug slots are occupied and the others
>>> are empty at the boot time, I think your code try to shrink the
>>> bus resources for hotplug slots allocated by BIOS. It would break
>>> the hot-add on the empty slots due to the resource allocation
>>> failure.
>>
>> no,
>> it will not touch bridge resources that already assigned by BIOS except
>> some bridge resource is not big enough. and get big one for them.
>>
>
> Ok, I understood that if the BIOS assigns enough resources to the
> bridge, it has no impact.
>
> One question. I thought your patch shrinks the bridge resource to
> allocate enough resource for sibling bridge. But it actually doesn't.
> Right?

please check if this one could fix the shrinking bridge resource problem...

[PATCH] pci: don't shrink bridge resources

when we are clearing leaf bridge resource and try to get big one, we could shrink the bridge if
there resource under it.

let check with old resource size and make sure we are trying to get big one.

Signed-off-by: Yinghai Lu <yinghai [at] kernel>

---
drivers/pci/setup-bus.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -432,7 +432,7 @@ static void pbus_size_io(struct pci_bus
{
struct pci_dev *dev;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
- unsigned long size = 0, size1 = 0;
+ unsigned long size = 0, size1 = 0, old_size;

if (!b_res)
return;
@@ -457,17 +457,18 @@ static void pbus_size_io(struct pci_bus
}
if (size < min_size)
size = min_size;
+ old_size = resource_size(b_res);
+ if (old_size == 1)
+ old_size = 0;
/* To be fixed in 2.5: we should have sort of HAVE_ISA
flag in the struct pci_bus. */
#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
size = (size & 0xff) + ((size & ~0xffUL) << 2);
#endif
size = ALIGN(size + size1, 4096);
+ if (size < old_size)
+ size = old_size;
if (!size) {
- if (b_res->start || b_res->end)
- dev_info(&bus->self->dev, "disabling bridge window "
- "%pR to [bus %02x-%02x] (unused)\n", b_res,
- bus->secondary, bus->subordinate);
b_res->flags = 0;
return;
}
@@ -483,7 +484,7 @@ static int pbus_size_mem(struct pci_bus
unsigned long type, resource_size_t min_size)
{
struct pci_dev *dev;
- resource_size_t min_align, align, size;
+ resource_size_t min_align, align, size, old_size;
resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */
int order, max_order;
struct resource *b_res = find_free_bus_resource(bus, type);
@@ -533,6 +534,11 @@ static int pbus_size_mem(struct pci_bus
}
if (size < min_size)
size = min_size;
+ old_size = resource_size(b_res);
+ if (old_size == 1)
+ old_size = 0;
+ if (size < old_size)
+ size = old_size;

align = 0;
min_align = 0;
@@ -549,10 +555,6 @@ static int pbus_size_mem(struct pci_bus
}
size = ALIGN(size, min_align);
if (!size) {
- if (b_res->start || b_res->end)
- dev_info(&bus->self->dev, "disabling bridge window "
- "%pR to [bus %02x-%02x] (unused)\n", b_res,
- bus->secondary, bus->subordinate);
b_res->flags = 0;
return 1;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 25, 2009, 3:24 AM

Post #7 of 14 (356 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Hi Yinghai,

I would like to reconfirm what is the problem you're trying to solve
before reviewing and testing the additional patch. To be honest, your
patch looks more and more complicated and it is becoming difficult
for me to review and test it.

By the way, if your problem is that BIOS doesn't assign the resource
to the parent bridge (root port or switch downstream port) of PCIe
hotplug slots, I guess we can improve it with simpler way. I made a
sample patches (no enough testing). Please take a look. Patches are

- [PATCH 1/2] pciehp: remove redundancy in bridge resource allocation
- [PATCH 2/2] pciehp: add support for bridge resource reallocation

Thanks,
Kenji Kaneshige


Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Yinghai Lu wrote:
>>> Kenji Kaneshige wrote:
>>>> Hi,
>>>>
>>>> I tried v11 patches. This version seems to fix the problem I
>>>> reported against previous version.
>>>>
>>>> I have no objection against the idea of resource allocation
>>>> changes for PCI express hotplug slots.
>>> thanks
>>>
>>>> But I still have concern about changing resource allocation for
>>>> other than PCI express hotplug slots. For example, some hotplug
>>>> controller other than PCI express can have multiple slots under
>>>> the same bus. If some hotplug slots are occupied and the others
>>>> are empty at the boot time, I think your code try to shrink the
>>>> bus resources for hotplug slots allocated by BIOS. It would break
>>>> the hot-add on the empty slots due to the resource allocation
>>>> failure.
>>> no,
>>> it will not touch bridge resources that already assigned by BIOS except
>>> some bridge resource is not big enough. and get big one for them.
>>>
>> Ok, I understood that if the BIOS assigns enough resources to the
>> bridge, it has no impact.
>>
>> One question. I thought your patch shrinks the bridge resource to
>> allocate enough resource for sibling bridge. But it actually doesn't.
>> Right?
>
> please check if this one could fix the shrinking bridge resource problem...
>
> [PATCH] pci: don't shrink bridge resources
>
> when we are clearing leaf bridge resource and try to get big one, we could shrink the bridge if
> there resource under it.
>
> let check with old resource size and make sure we are trying to get big one.
>
> Signed-off-by: Yinghai Lu <yinghai [at] kernel>
>
> ---
> drivers/pci/setup-bus.c | 22 ++++++++++++----------
> 1 file changed, 12 insertions(+), 10 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -432,7 +432,7 @@ static void pbus_size_io(struct pci_bus
> {
> struct pci_dev *dev;
> struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
> - unsigned long size = 0, size1 = 0;
> + unsigned long size = 0, size1 = 0, old_size;
>
> if (!b_res)
> return;
> @@ -457,17 +457,18 @@ static void pbus_size_io(struct pci_bus
> }
> if (size < min_size)
> size = min_size;
> + old_size = resource_size(b_res);
> + if (old_size == 1)
> + old_size = 0;
> /* To be fixed in 2.5: we should have sort of HAVE_ISA
> flag in the struct pci_bus. */
> #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
> size = (size & 0xff) + ((size & ~0xffUL) << 2);
> #endif
> size = ALIGN(size + size1, 4096);
> + if (size < old_size)
> + size = old_size;
> if (!size) {
> - if (b_res->start || b_res->end)
> - dev_info(&bus->self->dev, "disabling bridge window "
> - "%pR to [bus %02x-%02x] (unused)\n", b_res,
> - bus->secondary, bus->subordinate);
> b_res->flags = 0;
> return;
> }
> @@ -483,7 +484,7 @@ static int pbus_size_mem(struct pci_bus
> unsigned long type, resource_size_t min_size)
> {
> struct pci_dev *dev;
> - resource_size_t min_align, align, size;
> + resource_size_t min_align, align, size, old_size;
> resource_size_t aligns[12]; /* Alignments from 1Mb to 2Gb */
> int order, max_order;
> struct resource *b_res = find_free_bus_resource(bus, type);
> @@ -533,6 +534,11 @@ static int pbus_size_mem(struct pci_bus
> }
> if (size < min_size)
> size = min_size;
> + old_size = resource_size(b_res);
> + if (old_size == 1)
> + old_size = 0;
> + if (size < old_size)
> + size = old_size;
>
> align = 0;
> min_align = 0;
> @@ -549,10 +555,6 @@ static int pbus_size_mem(struct pci_bus
> }
> size = ALIGN(size, min_align);
> if (!size) {
> - if (b_res->start || b_res->end)
> - dev_info(&bus->self->dev, "disabling bridge window "
> - "%pR to [bus %02x-%02x] (unused)\n", b_res,
> - bus->secondary, bus->subordinate);
> b_res->flags = 0;
> return 1;
> }
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 25, 2009, 9:44 AM

Post #8 of 14 (355 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Kenji Kaneshige wrote:
> Hi Yinghai,
>
> I would like to reconfirm what is the problem you're trying to solve
> before reviewing and testing the additional patch. To be honest, your
> patch looks more and more complicated and it is becoming difficult
> for me to review and test it.

the real problem:
1. boot time:

BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to the bridge, but stop
assigning resource to the device under that bridge, because the device need big resource.

so patch1 is trying to
a. pci assign unassign and record the failed device resource.
b. clear the BIOS assigned resource of the parent bridge of fail device
c. go back and call pci assign unsigned
d. if it still fail, will go up more bridges. and clear and try again.

2. hotplug:
BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to every bridge. (8M)
but when insert one card that big resource, the card can not get resource. because kernel will not touch the
bridge resource.

so patch2 is trying to
a. assign resource to devices with that slot. and record fail devices
b. if there is some failed, will clear sepcifically io port of bridge, or mmio of bridge, or mmio pref of bridge.
c. try to assign the parent bridge of the slot.

so it will keep original assigned resource by BIOS if possible.

and you have tested patch1 and patch2 in V11, but said patch1 may have shrinking resource problem.
the patch3 is addressing the patch1 that could shrinking resource for non-pcie hotplug bridge...


>
> By the way, if your problem is that BIOS doesn't assign the resource
> to the parent bridge (root port or switch downstream port) of PCIe
> hotplug slots, I guess we can improve it with simpler way. I made a
> sample patches (no enough testing). Please take a look. Patches are
>
> - [PATCH 1/2] pciehp: remove redundancy in bridge resource allocation
> - [PATCH 2/2] pciehp: add support for bridge resource reallocation

like some earlier version of patch2 (release it them all at first) but have pciehp_realloc parameter.
could be useful in some case when current patch2 (try and increase) could use up all space.
i like to have that after patch2.

YH
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 25, 2009, 10:43 PM

Post #9 of 14 (355 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Yinghai Lu wrote:
> Kenji Kaneshige wrote:
>> Hi Yinghai,
>>
>> I would like to reconfirm what is the problem you're trying to solve
>> before reviewing and testing the additional patch. To be honest, your
>> patch looks more and more complicated and it is becoming difficult
>> for me to review and test it.
>
> the real problem:
> 1. boot time:
>
> BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to the bridge, but stop
> assigning resource to the device under that bridge, because the device need big resource.
>
> so patch1 is trying to
> a. pci assign unassign and record the failed device resource.
> b. clear the BIOS assigned resource of the parent bridge of fail device
> c. go back and call pci assign unsigned
> d. if it still fail, will go up more bridges. and clear and try again.
>
> 2. hotplug:
> BIOS separate IO range between several IOHs, and on some slots, BIOS assign the resource to every bridge. (8M)
> but when insert one card that big resource, the card can not get resource. because kernel will not touch the
> bridge resource.
>
> so patch2 is trying to
> a. assign resource to devices with that slot. and record fail devices
> b. if there is some failed, will clear sepcifically io port of bridge, or mmio of bridge, or mmio pref of bridge.
> c. try to assign the parent bridge of the slot.
>
> so it will keep original assigned resource by BIOS if possible.
>
> and you have tested patch1 and patch2 in V11, but said patch1 may have shrinking resource problem.
> the patch3 is addressing the patch1 that could shrinking resource for non-pcie hotplug bridge...
>

Thank you for the explanation. The patch3 seems to solve my concern.

Your patch only touch the leaf bridge at the 2nd try of resource
assignment. IIRC, this behavior is to prevent from shrinking bridge
resources. Am I correct? I'm not sure but I think we don't need this
behavior because now that we have another mechanism to prevent
from shrinking bridge resource.

Thanks,
Kenji Kaneshige



>
>> By the way, if your problem is that BIOS doesn't assign the resource
>> to the parent bridge (root port or switch downstream port) of PCIe
>> hotplug slots, I guess we can improve it with simpler way. I made a
>> sample patches (no enough testing). Please take a look. Patches are
>>
>> - [PATCH 1/2] pciehp: remove redundancy in bridge resource allocation
>> - [PATCH 2/2] pciehp: add support for bridge resource reallocation
>
> like some earlier version of patch2 (release it them all at first) but have pciehp_realloc parameter.
> could be useful in some case when current patch2 (try and increase) could use up all space.
> i like to have that after patch2.
>
> YH
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 25, 2009, 11:30 PM

Post #10 of 14 (354 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

On Nov 25, 2009, at 10:43 PM, Kenji Kaneshige <kaneshige.kenji [at] jp
> wrote:

> Yinghai Lu wrote:
>> Kenji Kaneshige wrote:
>>> Hi Yinghai,
>>>
>>> I would like to reconfirm what is the problem you're trying to solve
>>> before reviewing and testing the additional patch. To be honest,
>>> your
>>> patch looks more and more complicated and it is becoming difficult
>>> for me to review and test it.
>> the real problem:
>> 1. boot time:
>> BIOS separate IO range between several IOHs, and on some slots,
>> BIOS assign the resource to the bridge, but stop
>> assigning resource to the device under that bridge, because the
>> device need big resource.
>> so patch1 is trying to a. pci assign unassign and record the failed
>> device resource.
>> b. clear the BIOS assigned resource of the parent bridge of fail
>> device
>> c. go back and call pci assign unsigned
>> d. if it still fail, will go up more bridges. and clear and try
>> again.
>> 2. hotplug:
>> BIOS separate IO range between several IOHs, and on some slots,
>> BIOS assign the resource to every bridge. (8M)
>> but when insert one card that big resource, the card can not get
>> resource. because kernel will not touch the bridge resource.
>> so patch2 is trying to
>> a. assign resource to devices with that slot. and record fail devices
>> b. if there is some failed, will clear sepcifically io port of
>> bridge, or mmio of bridge, or mmio pref of bridge.
>> c. try to assign the parent bridge of the slot.
>> so it will keep original assigned resource by BIOS if possible.
>> and you have tested patch1 and patch2 in V11, but said patch1 may
>> have shrinking resource problem.
>> the patch3 is addressing the patch1 that could shrinking resource
>> for non-pcie hotplug bridge...
>>
>
> Thank you for the explanation. The patch3 seems to solve my concern.
>
> Your patch only touch the leaf bridge at the 2nd try of resource
> assignment. IIRC, this behavior is to prevent from shrinking bridge
> resources. Am I correct? I'm not sure but I think we don't need this
> behavior because now that we have another mechanism to prevent
> from shrinking bridge resource.
>>>

Third patch will only try to increase the bridge res

Try num still default to 5 , it could help other case

Can you send me whole bootlog ?

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 26, 2009, 11:12 PM

Post #11 of 14 (353 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Yinghai wrote:
>
>
>
>
> On Nov 25, 2009, at 10:43 PM, Kenji Kaneshige
> <kaneshige.kenji [at] jp> wrote:
>
>> Yinghai Lu wrote:
>>> Kenji Kaneshige wrote:
>>>> Hi Yinghai,
>>>>
>>>> I would like to reconfirm what is the problem you're trying to solve
>>>> before reviewing and testing the additional patch. To be honest, your
>>>> patch looks more and more complicated and it is becoming difficult
>>>> for me to review and test it.
>>> the real problem:
>>> 1. boot time:
>>> BIOS separate IO range between several IOHs, and on some slots, BIOS
>>> assign the resource to the bridge, but stop
>>> assigning resource to the device under that bridge, because the
>>> device need big resource.
>>> so patch1 is trying to a. pci assign unassign and record the failed
>>> device resource.
>>> b. clear the BIOS assigned resource of the parent bridge of fail device
>>> c. go back and call pci assign unsigned
>>> d. if it still fail, will go up more bridges. and clear and try again.
>>> 2. hotplug:
>>> BIOS separate IO range between several IOHs, and on some slots, BIOS
>>> assign the resource to every bridge. (8M)
>>> but when insert one card that big resource, the card can not get
>>> resource. because kernel will not touch the bridge resource.
>>> so patch2 is trying to
>>> a. assign resource to devices with that slot. and record fail devices
>>> b. if there is some failed, will clear sepcifically io port of
>>> bridge, or mmio of bridge, or mmio pref of bridge.
>>> c. try to assign the parent bridge of the slot.
>>> so it will keep original assigned resource by BIOS if possible.
>>> and you have tested patch1 and patch2 in V11, but said patch1 may
>>> have shrinking resource problem.
>>> the patch3 is addressing the patch1 that could shrinking resource for
>>> non-pcie hotplug bridge...
>>>
>>
>> Thank you for the explanation. The patch3 seems to solve my concern.
>>
>> Your patch only touch the leaf bridge at the 2nd try of resource
>> assignment. IIRC, this behavior is to prevent from shrinking bridge
>> resources. Am I correct? I'm not sure but I think we don't need this
>> behavior because now that we have another mechanism to prevent
>> from shrinking bridge resource.
>>>>
>
> Third patch will only try to increase the bridge res
>
> Try num still default to 5 , it could help other case
>
> Can you send me whole bootlog ?
>

Bad news...

My system doesn't boot (hangup) with your latest set of patches.
Fusion MPT SAS driver initialization failed on some devices.
Please see below

...
Fusion MPT SAS Host driver 3.04.12
mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff 64bit]
mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff 64bit]
mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
...


This problem disappear when I revert the patch 6/9, and my system
can boot. But I found igb driver initialization failed on some
devices even in this case. Please see below

...
igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]
igb 0000:08:00.0: PCI INT A disabled
igb: probe of 0000:08:00.0 failed with error -16
igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]
igb 0000:08:00.1: PCI INT B disabled
igb: probe of 0000:08:00.1 failed with error -16
...

I'll send the whole boot log in both cases privately later.

Thanks,
Kenji Kaneshige

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 26, 2009, 11:52 PM

Post #12 of 14 (355 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

>>
>
> Bad news...
>
> My system doesn't boot (hangup) with your latest set of patches.
> Fusion MPT SAS driver initialization failed on some devices.
> Please see below
>
> ...
> Fusion MPT SAS Host driver 3.04.12
> mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
> mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff 64bit]
> mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
> mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
> mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff 64bit]
> mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
> ...
>
>
> This problem disappear when I revert the patch 6/9, and my system
> can boot. But I found igb driver initialization failed on some
> devices even in this case. Please see below
>
> ...
> igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]
> igb 0000:08:00.0: PCI INT A disabled
> igb: probe of 0000:08:00.0 failed with error -16
> igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
> igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]
> igb 0000:08:00.1: PCI INT B disabled
> igb: probe of 0000:08:00.1 failed with error -16
> ...

that looks werid, without patch 6/9 should only have pci bridge res shrink problem with your last test?

maybe we need keep that feature default to be disabled.

please try

---
drivers/pci/setup-bus.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -918,8 +918,7 @@ static void pci_bus_dump_resources(struc
* second try will clear small leaf bridge res
* third try will clear related bridge: some aggressive
*/
-/* assume we only have 4 level bridges, so only try 5 times */
-int pci_try_num = 5;
+int pci_try_num = 1;
void __init
pci_assign_unassigned_resources(void)
{
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


kaneshige.kenji at jp

Nov 27, 2009, 12:26 AM

Post #13 of 14 (353 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Yinghai Lu wrote:
>> Bad news...
>>
>> My system doesn't boot (hangup) with your latest set of patches.
>> Fusion MPT SAS driver initialization failed on some devices.
>> Please see below
>>
>> ...
>> Fusion MPT SAS Host driver 3.04.12
>> mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
>> mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff 64bit]
>> mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
>> mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
>> mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff 64bit]
>> mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
>> ...
>>
>>
>> This problem disappear when I revert the patch 6/9, and my system
>> can boot. But I found igb driver initialization failed on some
>> devices even in this case. Please see below
>>
>> ...
>> igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]
>> igb 0000:08:00.0: PCI INT A disabled
>> igb: probe of 0000:08:00.0 failed with error -16
>> igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
>> igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]
>> igb 0000:08:00.1: PCI INT B disabled
>> igb: probe of 0000:08:00.1 failed with error -16
>> ...
>
> that looks werid, without patch 6/9 should only have pci bridge res shrink problem with your last test?
>

I don't know why. I might have overlooked it at the previous
test.

> maybe we need keep that feature default to be disabled.

Will do it next week.

Thanks,
Kenji Kaneshige


>
> please try
>
> ---
> drivers/pci/setup-bus.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -918,8 +918,7 @@ static void pci_bus_dump_resources(struc
> * second try will clear small leaf bridge res
> * third try will clear related bridge: some aggressive
> */
> -/* assume we only have 4 level bridges, so only try 5 times */
> -int pci_try_num = 5;
> +int pci_try_num = 1;
> void __init
> pci_assign_unassigned_resources(void)
> {
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


yinghai at kernel

Nov 27, 2009, 3:13 PM

Post #14 of 14 (350 views)
Permalink
Re: [PATCH 1/2] pci: release that leaf bridge' resource that is not big -v11 [In reply to]

Kenji Kaneshige wrote:
> Yinghai Lu wrote:
>>> Bad news...
>>>
>>> My system doesn't boot (hangup) with your latest set of patches.
>>> Fusion MPT SAS driver initialization failed on some devices.
>>> Please see below
>>>
>>> ...
>>> Fusion MPT SAS Host driver 3.04.12
>>> mptsas 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
>>> mptsas 0000:09:00.0: BAR 1: can't reserve [mem 0x00510000-0x00513fff
>>> 64bit]
>>> mptbase: ioc0: ERROR - pci_request_selected_regions() with MEM failed
>>> mptsas 0000:0a:00.0: PCI INT A -> GSI 19 (level, low) -> IRQ 19
>>> mptsas 0000:0a:00.0: BAR 1: can't reserve [mem 0x00610000-0x00613fff
>>> 64bit]
>>> mptbase: ioc1: ERROR - pci_request_selected_regions() with MEM failed
>>> ...
>>>
>>>
>>> This problem disappear when I revert the patch 6/9, and my system
>>> can boot. But I found igb driver initialization failed on some
>>> devices even in this case. Please see below
>>>
>>> ...
>>> igb 0000:08:00.0: BAR 0: can't reserve [mem 0x00100000-0x0011ffff]
>>> igb 0000:08:00.0: PCI INT A disabled
>>> igb: probe of 0000:08:00.0 failed with error -16
>>> igb 0000:08:00.1: PCI INT B -> GSI 18 (level, low) -> IRQ 18
>>> igb 0000:08:00.1: BAR 0: can't reserve [mem 0x00140000-0x0015ffff]
>>> igb 0000:08:00.1: PCI INT B disabled
>>> igb: probe of 0000:08:00.1 failed with error -16
>>> ...
>>
>> that looks werid, without patch 6/9 should only have pci bridge res
>> shrink problem with your last test?
>>
>
> I don't know why. I might have overlooked it at the previous
> test.
>
>> maybe we need keep that feature default to be disabled.
>
> Will do it next week.


>
> Thanks,
> Kenji Kaneshige
>
>
>>
>> please try
>>
>> ---
>> drivers/pci/setup-bus.c | 3 +--
>> 1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> Index: linux-2.6/drivers/pci/setup-bus.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/pci/setup-bus.c
>> +++ linux-2.6/drivers/pci/setup-bus.c
>> @@ -918,8 +918,7 @@ static void pci_bus_dump_resources(struc
>> * second try will clear small leaf bridge res
>> * third try will clear related bridge: some aggressive
>> */
>> -/* assume we only have 4 level bridges, so only try 5 times */
>> -int pci_try_num = 5;
>> +int pci_try_num = 1;
>> void __init
>> pci_assign_unassigned_resources(void)
>> {
>>
>>
>

don't try it. found the cause.

even change pci_try_num = 1 will not help.

...

will have another patch.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Linux kernel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.