Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Linux: Kernel

[PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages

 

 

Linux kernel RSS feed   Index | Next | Previous | View Threaded


aquini at redhat

Aug 21, 2012, 5:47 AM

Post #1 of 5 (46 views)
Permalink
[PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

Besides making balloon pages movable at allocation time and introducing
the necessary primitives to perform balloon page migration/compaction,
this patch also introduces the following locking scheme to provide the
proper synchronization and protection for struct virtio_balloon elements
against concurrent accesses due to parallel operations introduced by
memory compaction / page migration.
- balloon_lock (mutex) : synchronizes the access demand to elements of
struct virtio_balloon and its queue operations;
- pages_lock (spinlock): special protection to balloon pages list against
concurrent list handling operations;
- virtio_baloon->pages list handling sync by RCU operations;

Signed-off-by: Rafael Aquini <aquini [at] redhat>
---
drivers/virtio/virtio_balloon.c | 210 +++++++++++++++++++++++++++++++++++++---
1 file changed, 199 insertions(+), 11 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0908e60..bda7bb0 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -27,6 +27,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/balloon_compaction.h>

/*
* Balloon device works in 4K page units. So each page is pointed to by
@@ -35,6 +36,12 @@
*/
#define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)

+/* flags used to hint compaction procedures about the balloon device status */
+enum balloon_status_flags {
+ BALLOON_REMOVAL = 0, /* balloon device is under removal steps */
+ BALLOON_OK, /* balloon device is up and running */
+};
+
struct virtio_balloon
{
struct virtio_device *vdev;
@@ -46,11 +53,24 @@ struct virtio_balloon
/* The thread servicing the balloon. */
struct task_struct *thread;

+ /* balloon special page->mapping */
+ struct address_space *mapping;
+
+ /* Synchronize access/update to this struct virtio_balloon elements */
+ struct mutex balloon_lock;
+
/* Waiting for host to ack the pages we released. */
wait_queue_head_t acked;

/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
+
+ /* balloon device status flag */
+ unsigned short balloon_status;
+
+ /* Protect 'pages' list against concurrent handling */
+ spinlock_t pages_lock;
+
/*
* The pages we've told the Host we're not using.
* Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
@@ -122,13 +142,17 @@ static void set_page_pfns(u32 pfns[], struct page *page)

static void fill_balloon(struct virtio_balloon *vb, size_t num)
{
+ /* Get the proper GFP alloc mask from vb->mapping flags */
+ gfp_t vb_gfp_mask = mapping_gfp_mask(vb->mapping);
+
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));

+ mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
- __GFP_NOMEMALLOC | __GFP_NOWARN);
+ struct page *page = alloc_page(vb_gfp_mask | __GFP_NORETRY |
+ __GFP_NOWARN | __GFP_NOMEMALLOC);
if (!page) {
if (printk_ratelimit())
dev_printk(KERN_INFO, &vb->vdev->dev,
@@ -141,7 +165,10 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
set_page_pfns(vb->pfns + vb->num_pfns, page);
vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
totalram_pages--;
- list_add(&page->lru, &vb->pages);
+ spin_lock(&vb->pages_lock);
+ list_add_rcu(&page->lru, &vb->pages);
+ assign_balloon_mapping(page, vb->mapping);
+ spin_unlock(&vb->pages_lock);
}

/* Didn't get any? Oh well. */
@@ -149,6 +176,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
return;

tell_host(vb, vb->inflate_vq);
+ mutex_unlock(&vb->balloon_lock);
}

static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
@@ -169,21 +197,48 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));

+ mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- page = list_first_entry(&vb->pages, struct page, lru);
- list_del(&page->lru);
+ /*
+ * We can race against 'virtballoon_isolatepage()' and end up
+ * stumbling across a _temporarily_ empty 'pages' list.
+ */
+ spin_lock(&vb->pages_lock);
+ page = list_first_or_null_rcu(&vb->pages, struct page, lru);
+ if (!page) {
+ spin_unlock(&vb->pages_lock);
+ break;
+ }
+ /*
+ * It is safe now to drop page->mapping and delete this page
+ * from balloon page list, since we are grabbing 'pages_lock'
+ * which prevents 'virtballoon_isolatepage()' from acting.
+ */
+ clear_balloon_mapping(page);
+ list_del_rcu(&page->lru);
+ spin_unlock(&vb->pages_lock);
set_page_pfns(vb->pfns + vb->num_pfns, page);
vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
}
+ /*
+ * Syncrhonize RCU grace period and wait for all RCU read critical side
+ * sections to finish before proceeding with page release steps.
+ * This avoids compaction/migration callback races against balloon
+ * device removal steps.
+ */
+ synchronize_rcu();

/*
* Note that if
* virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
* is true, we *have* to do it in this order
*/
- tell_host(vb, vb->deflate_vq);
- release_pages_by_pfn(vb->pfns, vb->num_pfns);
+ if (vb->num_pfns > 0) {
+ tell_host(vb, vb->deflate_vq);
+ release_pages_by_pfn(vb->pfns, vb->num_pfns);
+ }
+ mutex_unlock(&vb->balloon_lock);
}

static inline void update_stat(struct virtio_balloon *vb, int idx,
@@ -239,6 +294,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
struct scatterlist sg;
unsigned int len;

+ mutex_lock(&vb->balloon_lock);
vb->need_stats_update = 0;
update_balloon_stats(vb);

@@ -249,6 +305,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
BUG();
virtqueue_kick(vq);
+ mutex_unlock(&vb->balloon_lock);
}

static void virtballoon_changed(struct virtio_device *vdev)
@@ -261,22 +318,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
static inline s64 towards_target(struct virtio_balloon *vb)
{
__le32 v;
- s64 target;
+ s64 target, actual;

+ mutex_lock(&vb->balloon_lock);
+ actual = vb->num_pages;
vb->vdev->config->get(vb->vdev,
offsetof(struct virtio_balloon_config, num_pages),
&v, sizeof(v));
target = le32_to_cpu(v);
- return target - vb->num_pages;
+ mutex_unlock(&vb->balloon_lock);
+ return target - actual;
}

static void update_balloon_size(struct virtio_balloon *vb)
{
- __le32 actual = cpu_to_le32(vb->num_pages);
-
+ __le32 actual;
+ mutex_lock(&vb->balloon_lock);
+ actual = cpu_to_le32(vb->num_pages);
vb->vdev->config->set(vb->vdev,
offsetof(struct virtio_balloon_config, actual),
&actual, sizeof(actual));
+ mutex_unlock(&vb->balloon_lock);
}

static int balloon(void *_vballoon)
@@ -339,9 +401,117 @@ static int init_vqs(struct virtio_balloon *vb)
return 0;
}

+#ifdef CONFIG_BALLOON_COMPACTION
+/*
+ * Populate balloon_mapping->a_ops callback method to perform the balloon
+ * page migration task.
+ *
+ * After a ballooned page gets isolated by compaction procedures, this is the
+ * function that performs the page migration on behalf of move_to_new_page(),
+ * when the last calls (page)->mapping->a_ops->migratepage.
+ *
+ * Page migration for virtio balloon is done in a simple swap fashion which
+ * follows these two steps:
+ * 1) insert newpage into vb->pages list and update the host about it;
+ * 2) update the host about the removed old page from vb->pages list;
+ */
+int virtballoon_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ struct virtio_balloon *vb = (void *)__page_balloon_device(page);
+
+ /* at this point, besides very unlikely, a NULL *vb is a serious bug */
+ BUG_ON(!vb);
+
+ /*
+ * Skip page migration step if the memory balloon device is under its
+ * removal procedure, to avoid racing against module unload.
+ *
+ * If there still are isolated (balloon) pages under migration lists,
+ * 'virtballoon_putbackpage()' will take care of them properly, before
+ * the module unload finishes.
+ */
+ if (vb->balloon_status == BALLOON_REMOVAL)
+ return -EAGAIN;
+
+ mutex_lock(&vb->balloon_lock);
+
+ /* balloon's page migration 1st step */
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ spin_lock(&vb->pages_lock);
+ list_add_rcu(&newpage->lru, &vb->pages);
+ spin_unlock(&vb->pages_lock);
+ set_page_pfns(vb->pfns, newpage);
+ tell_host(vb, vb->inflate_vq);
+
+ /* balloon's page migration 2nd step */
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ set_page_pfns(vb->pfns, page);
+ tell_host(vb, vb->deflate_vq);
+
+ mutex_unlock(&vb->balloon_lock);
+ return 0;
+}
+
+/*
+ * Populate balloon_mapping->a_ops callback method to help compaction on
+ * isolating a page from the balloon page list for posterior migration.
+ */
+int virtballoon_isolatepage(struct page *page)
+{
+ struct virtio_balloon *vb = (void *)__page_balloon_device(page);
+ int ret = 0;
+ /*
+ * If we stumble across a NULL *vb here, it means this page has been
+ * already released by 'leak_balloon()'.
+ *
+ * We also skip the page isolation step if the memory balloon device is
+ * under its removal procedure, to avoid racing against module unload.
+ */
+ if (vb && (vb->balloon_status != BALLOON_REMOVAL)) {
+ spin_lock(&vb->pages_lock);
+ /*
+ * virtballoon_isolatepage() can race against leak_balloon(),
+ * and (wrongly) isolate a page that is about to be freed.
+ * Test page->mapping under pages_lock to close that window.
+ */
+ if (rcu_access_pointer(page->mapping) == vb->mapping) {
+ /* It is safe to isolate this page, now */
+ list_del_rcu(&page->lru);
+ ret = 1;
+ }
+ spin_unlock(&vb->pages_lock);
+ }
+ return ret;
+}
+
+/*
+ * Populate balloon_mapping->a_ops callback method to help compaction on
+ * re-inserting a not-migrated isolated page into the balloon page list.
+ */
+void virtballoon_putbackpage(struct page *page)
+{
+ struct virtio_balloon *vb = (void *)__page_balloon_device(page);
+
+ /* at this point, besides very unlikely, a NULL *vb is a serious bug */
+ BUG_ON(!vb);
+
+ spin_lock(&vb->pages_lock);
+ list_add_rcu(&page->lru, &vb->pages);
+ spin_unlock(&vb->pages_lock);
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+/* define the balloon_mapping->a_ops callbacks to allow compaction/migration */
+static DEFINE_BALLOON_MAPPING_AOPS(virtio_balloon_aops,
+ virtballoon_migratepage,
+ virtballoon_isolatepage,
+ virtballoon_putbackpage);
+
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
+ struct address_space *vb_mapping;
int err;

vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -351,12 +521,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
}

INIT_LIST_HEAD(&vb->pages);
+ spin_lock_init(&vb->pages_lock);
+ mutex_init(&vb->balloon_lock);
+
vb->num_pages = 0;
init_waitqueue_head(&vb->config_change);
init_waitqueue_head(&vb->acked);
vb->vdev = vdev;
vb->need_stats_update = 0;

+ /* Allocate a special page->mapping for this balloon device */
+ vb_mapping = alloc_balloon_mapping((void *)vb, &virtio_balloon_aops);
+ if (!vb_mapping) {
+ err = -ENOMEM;
+ goto out_free_vb;
+ }
+ /* Store the page->mapping reference for this balloon device */
+ vb->mapping = vb_mapping;
+
err = init_vqs(vb);
if (err)
goto out_free_vb;
@@ -367,12 +549,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_del_vqs;
}

+ vb->balloon_status = BALLOON_OK;
return 0;

out_del_vqs:
vdev->config->del_vqs(vdev);
out_free_vb:
kfree(vb);
+ kfree(vb_mapping);
out:
return err;
}
@@ -394,8 +578,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;

+ vb->balloon_status = BALLOON_REMOVAL;
kthread_stop(vb->thread);
remove_common(vb);
+ kfree(vb->mapping);
kfree(vb);
}

@@ -409,6 +595,7 @@ static int virtballoon_freeze(struct virtio_device *vdev)
* function is called.
*/

+ vb->balloon_status = BALLOON_REMOVAL;
remove_common(vb);
return 0;
}
@@ -424,6 +611,7 @@ static int virtballoon_restore(struct virtio_device *vdev)

fill_balloon(vb, towards_target(vb));
update_balloon_size(vb);
+ vb->balloon_status = BALLOON_OK;
return 0;
}
#endif
--
1.7.11.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mst at redhat

Aug 21, 2012, 7:40 AM

Post #2 of 5 (40 views)
Permalink
Re: [PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages [In reply to]

On Tue, Aug 21, 2012 at 09:47:46AM -0300, Rafael Aquini wrote:
> Memory fragmentation introduced by ballooning might reduce significantly
> the number of 2MB contiguous memory blocks that can be used within a guest,
> thus imposing performance penalties associated with the reduced number of
> transparent huge pages that could be used by the guest workload.
>
> Besides making balloon pages movable at allocation time and introducing
> the necessary primitives to perform balloon page migration/compaction,
> this patch also introduces the following locking scheme to provide the
> proper synchronization and protection for struct virtio_balloon elements
> against concurrent accesses due to parallel operations introduced by
> memory compaction / page migration.
> - balloon_lock (mutex) : synchronizes the access demand to elements of
> struct virtio_balloon and its queue operations;
> - pages_lock (spinlock): special protection to balloon pages list against
> concurrent list handling operations;
> - virtio_baloon->pages list handling sync by RCU operations;
>
> Signed-off-by: Rafael Aquini <aquini [at] redhat>
> ---
> drivers/virtio/virtio_balloon.c | 210 +++++++++++++++++++++++++++++++++++++---
> 1 file changed, 199 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 0908e60..bda7bb0 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -27,6 +27,7 @@
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> +#include <linux/balloon_compaction.h>
>
> /*
> * Balloon device works in 4K page units. So each page is pointed to by
> @@ -35,6 +36,12 @@
> */
> #define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>
> +/* flags used to hint compaction procedures about the balloon device status */
> +enum balloon_status_flags {
> + BALLOON_REMOVAL = 0, /* balloon device is under removal steps */
> + BALLOON_OK, /* balloon device is up and running */
> +};
> +
> struct virtio_balloon
> {
> struct virtio_device *vdev;
> @@ -46,11 +53,24 @@ struct virtio_balloon
> /* The thread servicing the balloon. */
> struct task_struct *thread;
>
> + /* balloon special page->mapping */
> + struct address_space *mapping;
> +
> + /* Synchronize access/update to this struct virtio_balloon elements */
> + struct mutex balloon_lock;
> +
> /* Waiting for host to ack the pages we released. */
> wait_queue_head_t acked;
>
> /* Number of balloon pages we've told the Host we're not using. */
> unsigned int num_pages;
> +
> + /* balloon device status flag */
> + unsigned short balloon_status;
> +
> + /* Protect 'pages' list against concurrent handling */
> + spinlock_t pages_lock;
> +
> /*
> * The pages we've told the Host we're not using.
> * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
> @@ -122,13 +142,17 @@ static void set_page_pfns(u32 pfns[], struct page *page)
>
> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> {
> + /* Get the proper GFP alloc mask from vb->mapping flags */
> + gfp_t vb_gfp_mask = mapping_gfp_mask(vb->mapping);
> +
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> + mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> - struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
> - __GFP_NOMEMALLOC | __GFP_NOWARN);
> + struct page *page = alloc_page(vb_gfp_mask | __GFP_NORETRY |
> + __GFP_NOWARN | __GFP_NOMEMALLOC);
> if (!page) {
> if (printk_ratelimit())
> dev_printk(KERN_INFO, &vb->vdev->dev,
> @@ -141,7 +165,10 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> set_page_pfns(vb->pfns + vb->num_pfns, page);
> vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> totalram_pages--;
> - list_add(&page->lru, &vb->pages);
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&page->lru, &vb->pages);
> + assign_balloon_mapping(page, vb->mapping);
> + spin_unlock(&vb->pages_lock);
> }
>
> /* Didn't get any? Oh well. */
> @@ -149,6 +176,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> return;
>
> tell_host(vb, vb->inflate_vq);
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> @@ -169,21 +197,48 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> + mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> - page = list_first_entry(&vb->pages, struct page, lru);
> - list_del(&page->lru);
> + /*
> + * We can race against 'virtballoon_isolatepage()' and end up
> + * stumbling across a _temporarily_ empty 'pages' list.
> + */
> + spin_lock(&vb->pages_lock);
> + page = list_first_or_null_rcu(&vb->pages, struct page, lru);

Why is list_first_or_null_rcu called outside
RCU critical section here?

> + if (!page) {
> + spin_unlock(&vb->pages_lock);
> + break;
> + }
> + /*
> + * It is safe now to drop page->mapping and delete this page
> + * from balloon page list, since we are grabbing 'pages_lock'
> + * which prevents 'virtballoon_isolatepage()' from acting.
> + */
> + clear_balloon_mapping(page);
> + list_del_rcu(&page->lru);
> + spin_unlock(&vb->pages_lock);
> set_page_pfns(vb->pfns + vb->num_pfns, page);
> vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> }
> + /*
> + * Syncrhonize RCU grace period and wait for all RCU read critical side
> + * sections to finish before proceeding with page release steps.
> + * This avoids compaction/migration callback races against balloon
> + * device removal steps.
> + */
> + synchronize_rcu();
>
> /*
> * Note that if
> * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
> * is true, we *have* to do it in this order
> */
> - tell_host(vb, vb->deflate_vq);
> - release_pages_by_pfn(vb->pfns, vb->num_pfns);
> + if (vb->num_pfns > 0) {
> + tell_host(vb, vb->deflate_vq);
> + release_pages_by_pfn(vb->pfns, vb->num_pfns);
> + }
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static inline void update_stat(struct virtio_balloon *vb, int idx,
> @@ -239,6 +294,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
> struct scatterlist sg;
> unsigned int len;
>
> + mutex_lock(&vb->balloon_lock);
> vb->need_stats_update = 0;
> update_balloon_stats(vb);
>
> @@ -249,6 +305,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
> if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
> BUG();
> virtqueue_kick(vq);
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static void virtballoon_changed(struct virtio_device *vdev)
> @@ -261,22 +318,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
> static inline s64 towards_target(struct virtio_balloon *vb)
> {
> __le32 v;
> - s64 target;
> + s64 target, actual;
>
> + mutex_lock(&vb->balloon_lock);
> + actual = vb->num_pages;
> vb->vdev->config->get(vb->vdev,
> offsetof(struct virtio_balloon_config, num_pages),
> &v, sizeof(v));
> target = le32_to_cpu(v);
> - return target - vb->num_pages;
> + mutex_unlock(&vb->balloon_lock);
> + return target - actual;
> }
>
> static void update_balloon_size(struct virtio_balloon *vb)
> {
> - __le32 actual = cpu_to_le32(vb->num_pages);
> -
> + __le32 actual;
> + mutex_lock(&vb->balloon_lock);
> + actual = cpu_to_le32(vb->num_pages);
> vb->vdev->config->set(vb->vdev,
> offsetof(struct virtio_balloon_config, actual),
> &actual, sizeof(actual));
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static int balloon(void *_vballoon)
> @@ -339,9 +401,117 @@ static int init_vqs(struct virtio_balloon *vb)
> return 0;
> }
>
> +#ifdef CONFIG_BALLOON_COMPACTION
> +/*
> + * Populate balloon_mapping->a_ops callback method to perform the balloon
> + * page migration task.
> + *
> + * After a ballooned page gets isolated by compaction procedures, this is the
> + * function that performs the page migration on behalf of move_to_new_page(),
> + * when the last calls (page)->mapping->a_ops->migratepage.
> + *
> + * Page migration for virtio balloon is done in a simple swap fashion which
> + * follows these two steps:
> + * 1) insert newpage into vb->pages list and update the host about it;
> + * 2) update the host about the removed old page from vb->pages list;
> + */
> +int virtballoon_migratepage(struct address_space *mapping,
> + struct page *newpage, struct page *page, enum migrate_mode mode)
> +{
> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);

Don't cast to void *.

> +
> + /* at this point, besides very unlikely, a NULL *vb is a serious bug */

A bit of a strange comment. Either it's unlikely or a bug :)
Besides, vb is checked for NULL value, not *vb.

> + BUG_ON(!vb);
> +
> + /*
> + * Skip page migration step if the memory balloon device is under its
> + * removal procedure, to avoid racing against module unload.
> + *

What kind of race does this fix? Why does not
leaking all pages fix it?

> + * If there still are isolated (balloon) pages under migration lists,
> + * 'virtballoon_putbackpage()' will take care of them properly, before
> + * the module unload finishes.
> + */
> + if (vb->balloon_status == BALLOON_REMOVAL)
> + return -EAGAIN;
> +
> + mutex_lock(&vb->balloon_lock);
> +
> + /* balloon's page migration 1st step */
> + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&newpage->lru, &vb->pages);
> + spin_unlock(&vb->pages_lock);
> + set_page_pfns(vb->pfns, newpage);
> + tell_host(vb, vb->inflate_vq);
> +
> + /* balloon's page migration 2nd step */
> + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
> + set_page_pfns(vb->pfns, page);
> + tell_host(vb, vb->deflate_vq);
> +
> + mutex_unlock(&vb->balloon_lock);
> + return 0;
> +}
> +
> +/*
> + * Populate balloon_mapping->a_ops callback method to help compaction on
> + * isolating a page from the balloon page list for posterior migration.
> + */
> +int virtballoon_isolatepage(struct page *page)
> +{
> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);

Cast here looks wrong.

> + int ret = 0;
> + /*
> + * If we stumble across a NULL *vb here, it means this page has been
> + * already released by 'leak_balloon()'.
> + *
> + * We also skip the page isolation step if the memory balloon device is
> + * under its removal procedure, to avoid racing against module unload.
> + */

What kind of race do you have in mind here?
Doesn't leaking all pages in module removal address it?

> + if (vb


How can vb be NULL? Pls document.

> && (vb->balloon_status != BALLOON_REMOVAL)) {

Read of balloon_status needs some kind of memory barrier I think.

> + spin_lock(&vb->pages_lock);
> + /*
> + * virtballoon_isolatepage() can race against leak_balloon(),
> + * and (wrongly) isolate a page that is about to be freed.
> + * Test page->mapping under pages_lock to close that window.
> + */
> + if (rcu_access_pointer(page->mapping) == vb->mapping) {
> + /* It is safe to isolate this page, now */
> + list_del_rcu(&page->lru);
> + ret = 1;
> + }
> + spin_unlock(&vb->pages_lock);
> + }
> + return ret;
> +}
> +
> +/*
> + * Populate balloon_mapping->a_ops callback method to help compaction on
> + * re-inserting a not-migrated isolated page into the balloon page list.
> + */
> +void virtballoon_putbackpage(struct page *page)
> +{
> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);
> +
> + /* at this point, besides very unlikely, a NULL *vb is a serious bug */

A bit of a strange comment. Either it's unlikely or a bug :)
Besides, vb is checked for NULL value, not *vb.

> + BUG_ON(!vb);
> +
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&page->lru, &vb->pages);
> + spin_unlock(&vb->pages_lock);
> +}
> +#endif /* CONFIG_BALLOON_COMPACTION */
> +
> +/* define the balloon_mapping->a_ops callbacks to allow compaction/migration */
> +static DEFINE_BALLOON_MAPPING_AOPS(virtio_balloon_aops,
> + virtballoon_migratepage,
> + virtballoon_isolatepage,
> + virtballoon_putbackpage);
> +
> static int virtballoon_probe(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb;
> + struct address_space *vb_mapping;
> int err;
>
> vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
> @@ -351,12 +521,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> }
>
> INIT_LIST_HEAD(&vb->pages);
> + spin_lock_init(&vb->pages_lock);
> + mutex_init(&vb->balloon_lock);
> +
> vb->num_pages = 0;
> init_waitqueue_head(&vb->config_change);
> init_waitqueue_head(&vb->acked);
> vb->vdev = vdev;
> vb->need_stats_update = 0;
>
> + /* Allocate a special page->mapping for this balloon device */

This comment is not helpful, pls remove.

> + vb_mapping = alloc_balloon_mapping((void *)vb, &virtio_balloon_aops);

Cast to void * is not needed.

> + if (!vb_mapping) {
> + err = -ENOMEM;
> + goto out_free_vb;
> + }
> + /* Store the page->mapping reference for this balloon device */
> + vb->mapping = vb_mapping;

This comment is not helpful, pls remove.

> +
> err = init_vqs(vb);
> if (err)
> goto out_free_vb;
> @@ -367,12 +549,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
> goto out_del_vqs;
> }
>
> + vb->balloon_status = BALLOON_OK;
> return 0;
>
> out_del_vqs:
> vdev->config->del_vqs(vdev);
> out_free_vb:
> kfree(vb);
> + kfree(vb_mapping);

I think it's better to wrap free for vb mapping too.

> out:
> return err;
> }
> @@ -394,8 +578,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb = vdev->priv;
>
> + vb->balloon_status = BALLOON_REMOVAL;

This needs some kind of barrier.

> kthread_stop(vb->thread);
> remove_common(vb);
> + kfree(vb->mapping);
> kfree(vb);
> }
>
> @@ -409,6 +595,7 @@ static int virtballoon_freeze(struct virtio_device *vdev)
> * function is called.
> */
>
> + vb->balloon_status = BALLOON_REMOVAL;

Here module is not going away so why change status?

> remove_common(vb);
> return 0;
> }
> @@ -424,6 +611,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
>
> fill_balloon(vb, towards_target(vb));
> update_balloon_size(vb);
> + vb->balloon_status = BALLOON_OK;

Isn't this too late to set status here? We filled the balloon ...


> return 0;
> }
> #endif
> --
> 1.7.11.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


mst at redhat

Aug 21, 2012, 7:57 AM

Post #3 of 5 (42 views)
Permalink
Re: [PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages [In reply to]

On Tue, Aug 21, 2012 at 09:47:46AM -0300, Rafael Aquini wrote:
> Memory fragmentation introduced by ballooning might reduce significantly
> the number of 2MB contiguous memory blocks that can be used within a guest,
> thus imposing performance penalties associated with the reduced number of
> transparent huge pages that could be used by the guest workload.
>
> Besides making balloon pages movable at allocation time and introducing
> the necessary primitives to perform balloon page migration/compaction,
> this patch also introduces the following locking scheme to provide the
> proper synchronization and protection for struct virtio_balloon elements
> against concurrent accesses due to parallel operations introduced by
> memory compaction / page migration.
> - balloon_lock (mutex) : synchronizes the access demand to elements of
> struct virtio_balloon and its queue operations;
> - pages_lock (spinlock): special protection to balloon pages list against
> concurrent list handling operations;
> - virtio_baloon->pages list handling sync by RCU operations;
>
> Signed-off-by: Rafael Aquini <aquini [at] redhat>

Sorry about sending multiple comments to same patch.
I just thought of some more questions.

> ---
> drivers/virtio/virtio_balloon.c | 210 +++++++++++++++++++++++++++++++++++++---
> 1 file changed, 199 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 0908e60..bda7bb0 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -27,6 +27,7 @@
> #include <linux/delay.h>
> #include <linux/slab.h>
> #include <linux/module.h>
> +#include <linux/balloon_compaction.h>
>
> /*
> * Balloon device works in 4K page units. So each page is pointed to by
> @@ -35,6 +36,12 @@
> */
> #define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>
> +/* flags used to hint compaction procedures about the balloon device status */
> +enum balloon_status_flags {
> + BALLOON_REMOVAL = 0, /* balloon device is under removal steps */
> + BALLOON_OK, /* balloon device is up and running */
> +};
> +
> struct virtio_balloon
> {
> struct virtio_device *vdev;
> @@ -46,11 +53,24 @@ struct virtio_balloon
> /* The thread servicing the balloon. */
> struct task_struct *thread;
>
> + /* balloon special page->mapping */
> + struct address_space *mapping;
> +
> + /* Synchronize access/update to this struct virtio_balloon elements */
> + struct mutex balloon_lock;
> +
> /* Waiting for host to ack the pages we released. */
> wait_queue_head_t acked;
>
> /* Number of balloon pages we've told the Host we're not using. */
> unsigned int num_pages;
> +
> + /* balloon device status flag */
> + unsigned short balloon_status;
> +
> + /* Protect 'pages' list against concurrent handling */
> + spinlock_t pages_lock;
> +
> /*
> * The pages we've told the Host we're not using.
> * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
> @@ -122,13 +142,17 @@ static void set_page_pfns(u32 pfns[], struct page *page)
>
> static void fill_balloon(struct virtio_balloon *vb, size_t num)
> {
> + /* Get the proper GFP alloc mask from vb->mapping flags */
> + gfp_t vb_gfp_mask = mapping_gfp_mask(vb->mapping);
> +
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> + mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> - struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
> - __GFP_NOMEMALLOC | __GFP_NOWARN);
> + struct page *page = alloc_page(vb_gfp_mask | __GFP_NORETRY |
> + __GFP_NOWARN | __GFP_NOMEMALLOC);
> if (!page) {
> if (printk_ratelimit())
> dev_printk(KERN_INFO, &vb->vdev->dev,
> @@ -141,7 +165,10 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> set_page_pfns(vb->pfns + vb->num_pfns, page);
> vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> totalram_pages--;
> - list_add(&page->lru, &vb->pages);
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&page->lru, &vb->pages);
> + assign_balloon_mapping(page, vb->mapping);
> + spin_unlock(&vb->pages_lock);
> }
>
> /* Didn't get any? Oh well. */
> @@ -149,6 +176,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> return;
>
> tell_host(vb, vb->inflate_vq);
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
> @@ -169,21 +197,48 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
> /* We can only do one array worth at a time. */
> num = min(num, ARRAY_SIZE(vb->pfns));
>
> + mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> - page = list_first_entry(&vb->pages, struct page, lru);
> - list_del(&page->lru);
> + /*
> + * We can race against 'virtballoon_isolatepage()' and end up
> + * stumbling across a _temporarily_ empty 'pages' list.
> + */
> + spin_lock(&vb->pages_lock);
> + page = list_first_or_null_rcu(&vb->pages, struct page, lru);
> + if (!page) {
> + spin_unlock(&vb->pages_lock);
> + break;

Now what happens? putback will return the pages here but you have exited
the loop.

> + }
> + /*
> + * It is safe now to drop page->mapping and delete this page
> + * from balloon page list, since we are grabbing 'pages_lock'
> + * which prevents 'virtballoon_isolatepage()' from acting.
> + */
> + clear_balloon_mapping(page);
> + list_del_rcu(&page->lru);
> + spin_unlock(&vb->pages_lock);
> set_page_pfns(vb->pfns + vb->num_pfns, page);
> vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> }
> + /*
> + * Syncrhonize

Typo.

> RCU grace period and wait for all RCU read critical side
> + * sections to finish before proceeding with page release steps.
> + * This avoids compaction/migration callback races against balloon
> + * device removal steps.
> + */
> + synchronize_rcu();
>
> /*
> * Note that if
> * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
> * is true, we *have* to do it in this order
> */
> - tell_host(vb, vb->deflate_vq);
> - release_pages_by_pfn(vb->pfns, vb->num_pfns);
> + if (vb->num_pfns > 0) {
> + tell_host(vb, vb->deflate_vq);
> + release_pages_by_pfn(vb->pfns, vb->num_pfns);
> + }
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static inline void update_stat(struct virtio_balloon *vb, int idx,
> @@ -239,6 +294,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
> struct scatterlist sg;
> unsigned int len;
>
> + mutex_lock(&vb->balloon_lock);
> vb->need_stats_update = 0;
> update_balloon_stats(vb);
>
> @@ -249,6 +305,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
> if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
> BUG();
> virtqueue_kick(vq);
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static void virtballoon_changed(struct virtio_device *vdev)
> @@ -261,22 +318,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
> static inline s64 towards_target(struct virtio_balloon *vb)
> {
> __le32 v;
> - s64 target;
> + s64 target, actual;
>
> + mutex_lock(&vb->balloon_lock);
> + actual = vb->num_pages;
> vb->vdev->config->get(vb->vdev,
> offsetof(struct virtio_balloon_config, num_pages),
> &v, sizeof(v));
> target = le32_to_cpu(v);
> - return target - vb->num_pages;
> + mutex_unlock(&vb->balloon_lock);
> + return target - actual;
> }
>
> static void update_balloon_size(struct virtio_balloon *vb)
> {
> - __le32 actual = cpu_to_le32(vb->num_pages);
> -
> + __le32 actual;
> + mutex_lock(&vb->balloon_lock);
> + actual = cpu_to_le32(vb->num_pages);
> vb->vdev->config->set(vb->vdev,
> offsetof(struct virtio_balloon_config, actual),
> &actual, sizeof(actual));
> + mutex_unlock(&vb->balloon_lock);
> }
>
> static int balloon(void *_vballoon)
> @@ -339,9 +401,117 @@ static int init_vqs(struct virtio_balloon *vb)
> return 0;
> }
>
> +#ifdef CONFIG_BALLOON_COMPACTION
> +/*
> + * Populate balloon_mapping->a_ops callback method to perform the balloon
> + * page migration task.
> + *
> + * After a ballooned page gets isolated by compaction procedures, this is the
> + * function that performs the page migration on behalf of move_to_new_page(),
> + * when the last calls (page)->mapping->a_ops->migratepage.
> + *
> + * Page migration for virtio balloon is done in a simple swap fashion which
> + * follows these two steps:
> + * 1) insert newpage into vb->pages list and update the host about it;
> + * 2) update the host about the removed old page from vb->pages list;
> + */
> +int virtballoon_migratepage(struct address_space *mapping,
> + struct page *newpage, struct page *page, enum migrate_mode mode)
> +{
> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);
> +
> + /* at this point, besides very unlikely, a NULL *vb is a serious bug */
> + BUG_ON(!vb);
> +
> + /*
> + * Skip page migration step if the memory balloon device is under its
> + * removal procedure, to avoid racing against module unload.
> + *
> + * If there still are isolated (balloon) pages under migration lists,
> + * 'virtballoon_putbackpage()' will take care of them properly, before
> + * the module unload finishes.
> + */
> + if (vb->balloon_status == BALLOON_REMOVAL)
> + return -EAGAIN;
> +
> + mutex_lock(&vb->balloon_lock);
> +
> + /* balloon's page migration 1st step */
> + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&newpage->lru, &vb->pages);
> + spin_unlock(&vb->pages_lock);
> + set_page_pfns(vb->pfns, newpage);
> + tell_host(vb, vb->inflate_vq);
> +
> + /* balloon's page migration 2nd step */
> + vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
> + set_page_pfns(vb->pfns, page);
> + tell_host(vb, vb->deflate_vq);
> +
> + mutex_unlock(&vb->balloon_lock);
> + return 0;
> +}
> +
> +/*
> + * Populate balloon_mapping->a_ops callback method to help compaction on
> + * isolating a page from the balloon page list for posterior migration.

What does posterior migration mean?

> + */
> +int virtballoon_isolatepage(struct page *page)
> +{

Nothing at all in this function seems to be virtio balloon
specific. Can it move to mm core?

> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);
> + int ret = 0;
> + /*
> + * If we stumble across a NULL *vb here, it means this page has been
> + * already released by 'leak_balloon()'.
> + *
> + * We also skip the page isolation step if the memory balloon device is
> + * under its removal procedure, to avoid racing against module unload.
> + */
> + if (vb && (vb->balloon_status != BALLOON_REMOVAL)) {
> + spin_lock(&vb->pages_lock);
> + /*
> + * virtballoon_isolatepage() can race against leak_balloon(),
> + * and (wrongly) isolate a page that is about to be freed.
> + * Test page->mapping under pages_lock to close that window.
> + */

Looks strange. If so could vb have gone away completely?
And if so would not this trigger use after free?
Can we take some lock when leaking page to prevent this, instead?
page_lock comes to mind ...

> + if (rcu_access_pointer(page->mapping) == vb->mapping) {
> + /* It is safe to isolate this page, now */
> + list_del_rcu(&page->lru);
> + ret = 1;
> + }
> + spin_unlock(&vb->pages_lock);
> + }
> + return ret;
> +}
> +
> +/*
> + * Populate balloon_mapping->a_ops callback method to help compaction on
> + * re-inserting a not-migrated isolated page into the balloon page list.
> + */
> +void virtballoon_putbackpage(struct page *page)
> +{
> + struct virtio_balloon *vb = (void *)__page_balloon_device(page);
> +
> + /* at this point, besides very unlikely, a NULL *vb is a serious bug */
> + BUG_ON(!vb);
> +
> + spin_lock(&vb->pages_lock);
> + list_add_rcu(&page->lru, &vb->pages);
> + spin_unlock(&vb->pages_lock);
> +}
> +#endif /* CONFIG_BALLOON_COMPACTION */
> +
> +/* define the balloon_mapping->a_ops callbacks to allow compaction/migration */
> +static DEFINE_BALLOON_MAPPING_AOPS(virtio_balloon_aops,
> + virtballoon_migratepage,
> + virtballoon_isolatepage,
> + virtballoon_putbackpage);
> +
> static int virtballoon_probe(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb;
> + struct address_space *vb_mapping;
> int err;
>
> vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
> @@ -351,12 +521,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> }
>
> INIT_LIST_HEAD(&vb->pages);
> + spin_lock_init(&vb->pages_lock);
> + mutex_init(&vb->balloon_lock);
> +
> vb->num_pages = 0;
> init_waitqueue_head(&vb->config_change);
> init_waitqueue_head(&vb->acked);
> vb->vdev = vdev;
> vb->need_stats_update = 0;
>
> + /* Allocate a special page->mapping for this balloon device */
> + vb_mapping = alloc_balloon_mapping((void *)vb, &virtio_balloon_aops);
> + if (!vb_mapping) {
> + err = -ENOMEM;
> + goto out_free_vb;
> + }
> + /* Store the page->mapping reference for this balloon device */
> + vb->mapping = vb_mapping;
> +
> err = init_vqs(vb);
> if (err)
> goto out_free_vb;
> @@ -367,12 +549,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
> goto out_del_vqs;
> }
>
> + vb->balloon_status = BALLOON_OK;
> return 0;
>
> out_del_vqs:
> vdev->config->del_vqs(vdev);
> out_free_vb:
> kfree(vb);
> + kfree(vb_mapping);
> out:
> return err;
> }
> @@ -394,8 +578,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb = vdev->priv;
>
> + vb->balloon_status = BALLOON_REMOVAL;
> kthread_stop(vb->thread);
> remove_common(vb);
> + kfree(vb->mapping);
> kfree(vb);
> }
>
> @@ -409,6 +595,7 @@ static int virtballoon_freeze(struct virtio_device *vdev)
> * function is called.
> */
>
> + vb->balloon_status = BALLOON_REMOVAL;
> remove_common(vb);
> return 0;
> }
> @@ -424,6 +611,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
>
> fill_balloon(vb, towards_target(vb));
> update_balloon_size(vb);
> + vb->balloon_status = BALLOON_OK;
> return 0;
> }
> #endif
> --
> 1.7.11.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


peterz at infradead

Aug 21, 2012, 8:34 AM

Post #4 of 5 (38 views)
Permalink
Re: [PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages [In reply to]

On Tue, 2012-08-21 at 17:40 +0300, Michael S. Tsirkin wrote:
> > + vb->balloon_status = BALLOON_REMOVAL;
>
> This needs some kind of barrier.
>
> > kthread_stop(vb->thread);

kthread_stop() implies an smp_wmb() [ because it needs to do a wakeup ].
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


peterz at infradead

Aug 21, 2012, 8:37 AM

Post #5 of 5 (37 views)
Permalink
Re: [PATCH v8 3/5] virtio_balloon: introduce migration primitives to balloon pages [In reply to]

On Tue, 2012-08-21 at 17:40 +0300, Michael S. Tsirkin wrote:
> > + spin_lock(&vb->pages_lock);
> > + page = list_first_or_null_rcu(&vb->pages, struct page, lru);
>
> Why is list_first_or_null_rcu called outside
> RCU critical section here?

It looks like vb->pages_lock is the exclusive (or modification)
counterpart to the rcu-read-lock in this particular case, so it should
be fine.

Although for that same reason, it seems superfluous to use the RCU list
method since we're exclusive with list manipulations anyway.

> > + if (!page) {
> > + spin_unlock(&vb->pages_lock);
> > + break;
> > + }
> > + /*
> > + * It is safe now to drop page->mapping and delete this page
> > + * from balloon page list, since we are grabbing 'pages_lock'
> > + * which prevents 'virtballoon_isolatepage()' from acting.
> > + */
> > + clear_balloon_mapping(page);
> > + list_del_rcu(&page->lru);
> > + spin_unlock(&vb->pages_lock);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo [at] vger
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Linux kernel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.