Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Xen: Devel

[PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl

 

 

Xen devel RSS feed   Index | Next | Previous | View Threaded


david.vrabel at citrix

Aug 29, 2012, 6:15 AM

Post #1 of 19 (297 views)
Permalink
[PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl

From: David Vrabel <david.vrabel [at] citrix>

PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
field for reporting the error code for every frame that could not be
mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.

Signed-off-by: David Vrabel <david.vrabel [at] citrix>
---
drivers/xen/privcmd.c | 78 +++++++++++++++++++++++++++++++++++++-----------
include/xen/privcmd.h | 21 ++++++++++++-
2 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ccee0f1..ddd32cf 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -248,18 +248,23 @@ struct mmap_batch_state {
struct vm_area_struct *vma;
int err;

- xen_pfn_t __user *user;
+ xen_pfn_t __user *user_mfn;
+ int __user *user_err;
};

static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
+ int *err = data;
struct mmap_batch_state *st = state;
+ int ret;

- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
+ ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain);
+ if (ret < 0) {
+ *err = ret;
+ if (st->err == 0 || st->err == -ENOENT)
+ st->err = ret;
}
st->va += PAGE_SIZE;

@@ -268,18 +273,30 @@ static int mmap_batch_fn(void *data, void *state)

static int mmap_return_errors(void *data, void *state)
{
- xen_pfn_t *mfnp = data;
+ int *err = data;
struct mmap_batch_state *st = state;
+ int ret;
+
+ if (st->user_err)
+ return __put_user(*err, st->user_err++);
+ else {
+ xen_pfn_t mfn;

- return put_user(*mfnp, st->user++);
+ ret = __get_user(mfn, st->user_mfn);
+ if (ret < 0)
+ return ret;
+
+ mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
+ return __put_user(mfn, st->user_mfn++);
+ }
}

static struct vm_operations_struct privcmd_vm_ops;

-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
int ret;
- struct privcmd_mmapbatch m;
+ struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
@@ -289,15 +306,32 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
if (!xen_initial_domain())
return -EPERM;

- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
+ switch (version) {
+ case 1:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+ return -EFAULT;
+ /* Returns per-frame error in m.arr. */
+ m.err = NULL;
+ if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+ return -EFAULT;
+ break;
+ case 2:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+ return -EFAULT;
+ /* Returns per-frame error code in m.err. */
+ if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
+ }

nr_pages = m.num;
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;

ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
+ (xen_pfn_t *)m.arr);

if (ret || list_empty(&pagelist))
goto out;
@@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)

up_write(&mm->mmap_sem);

- if (state.err > 0) {
- state.user = m.arr;
+ if (state.err) {
+ state.user_mfn = (xen_pfn_t *)m.arr;
+ state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
+ &pagelist,
+ mmap_return_errors, &state);
+ if (!ret)
+ ret = state.err;
+ } else if (m.err)
+ __clear_user(m.err, m.num * sizeof(*m.err));

out:
free_page_list(&pagelist);
@@ -354,7 +392,11 @@ static long privcmd_ioctl(struct file *file,
break;

case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
+ ret = privcmd_ioctl_mmap_batch(udata, 1);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH_V2:
+ ret = privcmd_ioctl_mmap_batch(udata, 2);
break;

default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 17857fb..37e5255 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -59,13 +59,30 @@ struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
__u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+ xen_pfn_t __user *arr; /* array of mfns - or'd with
+ PRIVCMD_MMAPBATCH_MFN_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
+
+struct privcmd_mmapbatch_v2 {
+ unsigned int num; /* number of pages to populate */
+ domid_t dom; /* target domain */
+ __u64 addr; /* virtual address */
+ const xen_pfn_t __user *arr; /* array of mfns */
+ int __user *err; /* array of error codes */
};

/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
* Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 if all pages were mapped successfully. -ENOENT if all
+ * failed mappings returned -ENOENT, otherwise the error code of the
+ * first failed mapping.
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -73,5 +90,7 @@ struct privcmd_mmapbatch {
_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
+ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))

#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
--
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 29, 2012, 9:14 AM

Post #2 of 19 (249 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Aug 29, 2012, at 9:15 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel [at] citrix>
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> ---
> drivers/xen/privcmd.c | 78 +++++++++++++++++++++++++++++++++++++-----------
> include/xen/privcmd.h | 21 ++++++++++++-
> 2 files changed, 80 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..ddd32cf 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -248,18 +248,23 @@ struct mmap_batch_state {
> struct vm_area_struct *vma;
> int err;
>
> - xen_pfn_t __user *user;
> + xen_pfn_t __user *user_mfn;
> + int __user *user_err;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> + int *err = data;
Am I missing something or is there an aliasing here? Both mfnp and err point to data?
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> + if (ret < 0) {
> + *err = ret;
> + if (st->err == 0 || st->err == -ENOENT)
> + st->err = ret;
This will unset -ENOENT if a frame after an ENOENT error fails differently.

> }
> st->va += PAGE_SIZE;
>
> @@ -268,18 +273,30 @@ static int mmap_batch_fn(void *data, void *state)
>
> static int mmap_return_errors(void *data, void *state)
> {
> - xen_pfn_t *mfnp = data;
> + int *err = data;
> struct mmap_batch_state *st = state;
> + int ret;
> +
> + if (st->user_err)
> + return __put_user(*err, st->user_err++);
> + else {
> + xen_pfn_t mfn;
>
> - return put_user(*mfnp, st->user++);
> + ret = __get_user(mfn, st->user_mfn);
> + if (ret < 0)
> + return ret;
> +
> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(mfn, st->user_mfn++);
> + }
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> @@ -289,15 +306,32 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + (xen_pfn_t *)m.arr);
>
> if (ret || list_empty(&pagelist))
> goto out;
> @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.err) {
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.user_err = m.err;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist,
> + mmap_return_errors, &state);
The callback now maps data to err (instead of mfnp … but I see no change to the gather_array other than a cast …am I missing something?

Thanks
Andres
> + if (!ret)
> + ret = state.err;
> + } else if (m.err)
> + __clear_user(m.err, m.num * sizeof(*m.err));
>
> out:
> free_page_list(&pagelist);
> @@ -354,7 +392,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..37e5255 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,30 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 if all pages were mapped successfully. -ENOENT if all
> + * failed mappings returned -ENOENT, otherwise the error code of the
> + * first failed mapping.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +90,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> --
> 1.7.2.5
>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


david.vrabel at citrix

Aug 29, 2012, 9:36 AM

Post #3 of 19 (246 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On 29/08/12 17:14, Andres Lagar-Cavilla wrote:
>
> On Aug 29, 2012, at 9:15 AM, David Vrabel wrote:
>
>> From: David Vrabel <david.vrabel [at] citrix>
>>
>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>> field for reporting the error code for every frame that could not be
>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
[...]
>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>> index ccee0f1..ddd32cf 100644
>> --- a/drivers/xen/privcmd.c
>> +++ b/drivers/xen/privcmd.c
>> @@ -248,18 +248,23 @@ struct mmap_batch_state {
>> struct vm_area_struct *vma;
>> int err;
>>
>> - xen_pfn_t __user *user;
>> + xen_pfn_t __user *user_mfn;
>> + int __user *user_err;
>> };
>>
>> static int mmap_batch_fn(void *data, void *state)
>> {
>> xen_pfn_t *mfnp = data;
>> + int *err = data;
> Am I missing something or is there an aliasing here? Both mfnp and err point to data?

There is deliberate aliasing here. We use the mfn array to save the
error codes for later processing.

>> struct mmap_batch_state *st = state;
>> + int ret;
>>
>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> - st->vma->vm_page_prot, st->domain) < 0) {
>> - *mfnp |= 0xf0000000U;
>> - st->err++;
>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> + st->vma->vm_page_prot, st->domain);
>> + if (ret < 0) {
>> + *err = ret;
>> + if (st->err == 0 || st->err == -ENOENT)
>> + st->err = ret;
> This will unset -ENOENT if a frame after an ENOENT error fails differently.

I thought that was what the original implementation did but it seems it
does not.

>> @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>
>> up_write(&mm->mmap_sem);
>>
>> - if (state.err > 0) {
>> - state.user = m.arr;
>> + if (state.err) {
>> + state.user_mfn = (xen_pfn_t *)m.arr;
>> + state.user_err = m.err;
>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>> - &pagelist,
>> - mmap_return_errors, &state);
>> - }
>> + &pagelist,
>> + mmap_return_errors, &state);

> The callback now maps data to err (instead of mfnp … but I see no
> change to the gather_array other than a cast …am I missing something?

The kernel mfn and the err array are aliased.

I could have made gather_array() allow the kernel array to have larger
elements than the user array but that looked like too much work.

David

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 29, 2012, 11:10 AM

Post #4 of 19 (245 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Aug 29, 2012, at 12:36 PM, David Vrabel wrote:

> On 29/08/12 17:14, Andres Lagar-Cavilla wrote:
>>
>> On Aug 29, 2012, at 9:15 AM, David Vrabel wrote:
>>
>>> From: David Vrabel <david.vrabel [at] citrix>
>>>
>>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>> field for reporting the error code for every frame that could not be
>>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> [...]
>>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>>> index ccee0f1..ddd32cf 100644
>>> --- a/drivers/xen/privcmd.c
>>> +++ b/drivers/xen/privcmd.c
>>> @@ -248,18 +248,23 @@ struct mmap_batch_state {
>>> struct vm_area_struct *vma;
>>> int err;
>>>
>>> - xen_pfn_t __user *user;
>>> + xen_pfn_t __user *user_mfn;
>>> + int __user *user_err;
>>> };
>>>
>>> static int mmap_batch_fn(void *data, void *state)
>>> {
>>> xen_pfn_t *mfnp = data;
>>> + int *err = data;
>> Am I missing something or is there an aliasing here? Both mfnp and err point to data?
>
> There is deliberate aliasing here. We use the mfn array to save the
> error codes for later processing.

May I suggest a comment to clarify this here? Are xen_pfn_t and int the same size in both bitnesses? The very fact that I raise the question is an argument against this black-magic aliasing. Imho.

A explicit union for each slot in the *data, or passing both arrays to the callback looks better to me.

>
>>> struct mmap_batch_state *st = state;
>>> + int ret;
>>>
>>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> - st->vma->vm_page_prot, st->domain) < 0) {
>>> - *mfnp |= 0xf0000000U;
>>> - st->err++;
>>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> + st->vma->vm_page_prot, st->domain);
>>> + if (ret < 0) {
>>> + *err = ret;
>>> + if (st->err == 0 || st->err == -ENOENT)
>>> + st->err = ret;
>> This will unset -ENOENT if a frame after an ENOENT error fails differently.
>
> I thought that was what the original implementation did but it seems it
> does not
I think the best way to do this is:

if ((ret == -ENOENT) && (st->err == 0))
st->err = -ENOENT;

Then st->err is -ENOENT if at least there was one individual -ENOENT or zero otherwise. Which is the expectation of libxc (barring an EFAULT or some other higher-level whole-operation error).

Andres

> .
>
>>> @@ -325,12 +359,16 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>
>>> up_write(&mm->mmap_sem);
>>>
>>> - if (state.err > 0) {
>>> - state.user = m.arr;
>>> + if (state.err) {
>>> + state.user_mfn = (xen_pfn_t *)m.arr;
>>> + state.user_err = m.err;
>>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>> - &pagelist,
>>> - mmap_return_errors, &state);
>>> - }
>>> + &pagelist,
>>> + mmap_return_errors, &state);
>
>> The callback now maps data to err (instead of mfnp … but I see no
>> change to the gather_array other than a cast …am I missing something?
>
> The kernel mfn and the err array are aliased.
>
> I could have made gather_array() allow the kernel array to have larger
> elements than the user array but that looked like too much work.
>
> David


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


david.vrabel at citrix

Aug 30, 2012, 5:58 AM

Post #5 of 19 (251 views)
Permalink
[PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

From: David Vrabel <david.vrabel [at] citrix>

PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
field for reporting the error code for every frame that could not be
mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.

Signed-off-by: David Vrabel <david.vrabel [at] citrix>
---
drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
include/xen/privcmd.h | 23 +++++++++++-
2 files changed, 102 insertions(+), 20 deletions(-)

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ccee0f1..c0e89e7 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
*/
static int gather_array(struct list_head *pagelist,
unsigned nelem, size_t size,
- void __user *data)
+ const void __user *data)
{
unsigned pageidx;
void *pagedata;
@@ -248,18 +248,37 @@ struct mmap_batch_state {
struct vm_area_struct *vma;
int err;

- xen_pfn_t __user *user;
+ xen_pfn_t __user *user_mfn;
+ int __user *user_err;
};

static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ int ret;

- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
+ ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain);
+ if (ret < 0) {
+ /*
+ * Error reporting is a mess but userspace relies on
+ * it behaving this way.
+ *
+ * V2 needs to a) return the result of each frame's
+ * remap; and b) return -ENOENT if any frame failed
+ * with -ENOENT.
+ *
+ * In this first pass the error code is saved by
+ * overwriting the mfn and an error is indicated in
+ * st->err.
+ *
+ * The second pass by mmap_return_errors() will write
+ * the error codes to user space and get the right
+ * ioctl return value.
+ */
+ *(int *)mfnp = ret;
+ st->err = ret;
}
st->va += PAGE_SIZE;

@@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ int ret;
+
+ if (st->user_err) {
+ int err = *(int *)mfnp;
+
+ if (err == -ENOENT)
+ st->err = err;

- return put_user(*mfnp, st->user++);
+ return __put_user(err, st->user_err++);
+ } else {
+ xen_pfn_t mfn;
+
+ ret = __get_user(mfn, st->user_mfn);
+ if (ret < 0)
+ return ret;
+
+ mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
+ return __put_user(mfn, st->user_mfn++);
+ }
}

static struct vm_operations_struct privcmd_vm_ops;

-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
int ret;
- struct privcmd_mmapbatch m;
+ struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
@@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
if (!xen_initial_domain())
return -EPERM;

- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
+ switch (version) {
+ case 1:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+ return -EFAULT;
+ /* Returns per-frame error in m.arr. */
+ m.err = NULL;
+ if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+ return -EFAULT;
+ break;
+ case 2:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+ return -EFAULT;
+ /* Returns per-frame error code in m.err. */
+ if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
+ }

nr_pages = m.num;
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;

- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);

if (ret || list_empty(&pagelist))
goto out;
@@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)

up_write(&mm->mmap_sem);

- if (state.err > 0) {
- state.user = m.arr;
+ if (state.err) {
+ state.err = 0;
+ state.user_mfn = (xen_pfn_t *)m.arr;
+ state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
+ &pagelist,
+ mmap_return_errors, &state);
+ if (ret >= 0)
+ ret = state.err;
+ } else if (m.err)
+ __clear_user(m.err, m.num * sizeof(*m.err));

out:
free_page_list(&pagelist);
@@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
break;

case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
+ ret = privcmd_ioctl_mmap_batch(udata, 1);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH_V2:
+ ret = privcmd_ioctl_mmap_batch(udata, 2);
break;

default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 17857fb..f60d75c 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
__u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+ xen_pfn_t __user *arr; /* array of mfns - or'd with
+ PRIVCMD_MMAPBATCH_MFN_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
+
+struct privcmd_mmapbatch_v2 {
+ unsigned int num; /* number of pages to populate */
+ domid_t dom; /* target domain */
+ __u64 addr; /* virtual address */
+ const xen_pfn_t __user *arr; /* array of mfns */
+ int __user *err; /* array of error codes */
};

/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
* Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame). On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc. As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
+ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))

#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
--
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 30, 2012, 9:41 AM

Post #6 of 19 (242 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

David,
The patch looks functionally ok, but I still have two lingering concerns:
- the hideous casting of mfn into err
- why not signal paged out frames for V1

Rather than keep writing English, I wrote some C :)

And took the liberty to include your signed-off. David & Konrad, let me know what you think, and once we settle on either version we can move into unit testing this.

Thanks
Andres

commit 3c0c619f11a26b7bc3f12a1c477cf969c25de231
Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
Date: Thu Aug 30 12:23:33 2012 -0400

xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl

PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
field for reporting the error code for every frame that could not be
mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.

Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
in the mfn array.

Signed-off-by: David Vrabel <david.vrabel [at] citrix>
Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 85226cb..6562e29 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
*/
static int gather_array(struct list_head *pagelist,
unsigned nelem, size_t size,
- void __user *data)
+ const void __user *data)
{
unsigned pageidx;
void *pagedata;
@@ -246,20 +246,54 @@ struct mmap_batch_state {
domid_t domain;
unsigned long va;
struct vm_area_struct *vma;
+ /* A tristate:
+ * 0 for no errors
+ * 1 if at least one error has happened (and no
+ * -ENOENT errors have happened)
+ * -ENOENT if at least 1 -ENOENT has happened.
+ */
int err;

- xen_pfn_t __user *user;
+ xen_pfn_t __user *user_mfn;
+ int __user *user_err;
};

static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ int ret;
+
+ ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain);
+ if (ret < 0) {
+ /*
+ * V2 provides a user-space (pre-checked for access) user_err
+ * pointer, in which we store the individual map error codes.
+ *
+ * V1 encodes the error codes in the 32bit top nibble of the
+ * mfn (with its known limitations vis-a-vis 64 bit callers).
+ *
+ * In either case, global state.err is zero unless one or more
+ * individual maps fail with -ENOENT, in which case it is -ENOENT.
+ *
+ */
+ if (st->user_err)
+ BUG_ON(__put_user(ret, st->user_err++));
+ else {
+ xen_pfn_t nibble = (ret == -ENOENT) ?
+ PRIVCMD_MMAPBATCH_PAGED_ERROR :
+ PRIVCMD_MMAPBATCH_MFN_ERROR;
+ *mfnp |= nibble;
+ }

- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
+ if (ret == -ENOENT)
+ st->err = -ENOENT;
+ else {
+ /* Record that at least one error has happened. */
+ if (st->err == 0)
+ st->err = 1;
+ }
}
st->va += PAGE_SIZE;

@@ -271,15 +305,18 @@ static int mmap_return_errors(void *data, void *state)
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;

- return put_user(*mfnp, st->user++);
+ if (st->user_err == NULL)
+ return __put_user(*mfnp, st->user_mfn++);
+
+ return 0;
}

static struct vm_operations_struct privcmd_vm_ops;

-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
int ret;
- struct privcmd_mmapbatch m;
+ struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
@@ -289,15 +326,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
if (!xen_initial_domain())
return -EPERM;

- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
+ switch (version) {
+ case 1:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+ return -EFAULT;
+ /* Returns per-frame error in m.arr. */
+ m.err = NULL;
+ if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+ return -EFAULT;
+ break;
+ case 2:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+ return -EFAULT;
+ /* Returns per-frame error code in m.err. */
+ if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
+ }

nr_pages = m.num;
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;

- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);

if (ret || list_empty(&pagelist))
goto out;
@@ -315,22 +368,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
goto out;
}

- state.domain = m.dom;
- state.vma = vma;
- state.va = m.addr;
- state.err = 0;
+ state.domain = m.dom;
+ state.vma = vma;
+ state.va = m.addr;
+ state.err = 0;
+ state.user_err = m.err;

- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_batch_fn, &state);
+ /* mmap_batch_fn guarantees ret == 0 */
+ BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state));

up_write(&mm->mmap_sem);

- if (state.err > 0) {
- state.user = m.arr;
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
+ if (state.err) {
+ if (state.err == -ENOENT)
+ ret = -ENOENT;
+ /* V1 still needs to write back nibbles. */
+ if (m.err == NULL)
+ {
+ int efault;
+ state.user_mfn = (xen_pfn_t *)m.arr;
+ efault = traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist,
+ mmap_return_errors, &state);
+ if (efault)
+ ret = efault;
+ }
+ } else if (m.err)
+ __clear_user(m.err, m.num * sizeof(*m.err));

out:
free_page_list(&pagelist);
@@ -354,7 +419,11 @@ static long privcmd_ioctl(struct file *file,
break;

case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
+ ret = privcmd_ioctl_mmap_batch(udata, 1);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH_V2:
+ ret = privcmd_ioctl_mmap_batch(udata, 2);
break;

default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 45c1aa1..a853168 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
__u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+ xen_pfn_t __user *arr; /* array of mfns - or'd with
+ PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+ unsigned int num; /* number of pages to populate */
+ domid_t dom; /* target domain */
+ __u64 addr; /* virtual address */
+ const xen_pfn_t __user *arr; /* array of mfns */
+ int __user *err; /* array of error codes */
};

/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
* Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame). On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc. As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
+ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))

#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel [at] citrix>
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> ---
> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> include/xen/privcmd.h | 23 +++++++++++-
> 2 files changed, 102 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..c0e89e7 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> */
> static int gather_array(struct list_head *pagelist,
> unsigned nelem, size_t size,
> - void __user *data)
> + const void __user *data)
> {
> unsigned pageidx;
> void *pagedata;
> @@ -248,18 +248,37 @@ struct mmap_batch_state {
> struct vm_area_struct *vma;
> int err;
>
> - xen_pfn_t __user *user;
> + xen_pfn_t __user *user_mfn;
> + int __user *user_err;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> + if (ret < 0) {
> + /*
> + * Error reporting is a mess but userspace relies on
> + * it behaving this way.
> + *
> + * V2 needs to a) return the result of each frame's
> + * remap; and b) return -ENOENT if any frame failed
> + * with -ENOENT.
> + *
> + * In this first pass the error code is saved by
> + * overwriting the mfn and an error is indicated in
> + * st->err.
> + *
> + * The second pass by mmap_return_errors() will write
> + * the error codes to user space and get the right
> + * ioctl return value.
> + */
> + *(int *)mfnp = ret;
> + st->err = ret;
> }
> st->va += PAGE_SIZE;
>
> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
> +
> + if (st->user_err) {
> + int err = *(int *)mfnp;
> +
> + if (err == -ENOENT)
> + st->err = err;
>
> - return put_user(*mfnp, st->user++);
> + return __put_user(err, st->user_err++);
> + } else {
> + xen_pfn_t mfn;
> +
> + ret = __get_user(mfn, st->user_mfn);
> + if (ret < 0)
> + return ret;
> +
> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(mfn, st->user_mfn++);
> + }
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>
> if (ret || list_empty(&pagelist))
> goto out;
> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.err) {
> + state.err = 0;
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.user_err = m.err;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist,
> + mmap_return_errors, &state);
> + if (ret >= 0)
> + ret = state.err;
> + } else if (m.err)
> + __clear_user(m.err, m.num * sizeof(*m.err));
>
> out:
> free_page_list(&pagelist);
> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..f60d75c 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame). On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> --
> 1.7.2.5
>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


david.vrabel at citrix

Aug 30, 2012, 10:04 AM

Post #7 of 19 (242 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On 30/08/12 17:41, Andres Lagar-Cavilla wrote:
> David,
> The patch looks functionally ok, but I still have two lingering concerns:
> - the hideous casting of mfn into err

I considered couple of other approaches (unions, extending
gather_array() to add gaps for the int return). They were all worse.

I also tried your proposal here but it doesn't work. See below.

> - why not signal paged out frames for V1

Because V1 is broken on 64bit and there doesn't seem to be any point in
changing it given that libxc won't call it if V2 is present,

> Rather than keep writing English, I wrote some C :)
>
> And took the liberty to include your signed-off. David & Konrad, let
> me know what you think, and once we settle on either version we can move
> into unit testing this.
[...]
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
> +
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> + if (ret < 0) {
> + /*
> + * V2 provides a user-space (pre-checked for access) user_err
> + * pointer, in which we store the individual map error codes.
> + *
> + * V1 encodes the error codes in the 32bit top nibble of the
> + * mfn (with its known limitations vis-a-vis 64 bit callers).
> + *
> + * In either case, global state.err is zero unless one or more
> + * individual maps fail with -ENOENT, in which case it is -ENOENT.
> + *
> + */
> + if (st->user_err)
> + BUG_ON(__put_user(ret, st->user_err++));

You can't access user space pages here while holding
current->mm->mmap_sem. I tried this and it would sometimes deadlock in
the page fault handler.

access_ok() only checks if the pointer is in the user space virtual
address space - not that a valid mapping exists and is writable. So
BUG_ON(__put_user()) should not be done.

David

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 30, 2012, 11:29 AM

Post #8 of 19 (243 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Aug 30, 2012, at 1:04 PM, David Vrabel wrote:

> On 30/08/12 17:41, Andres Lagar-Cavilla wrote:
>> David,
>> The patch looks functionally ok, but I still have two lingering concerns:
>> - the hideous casting of mfn into err
>
> I considered couple of other approaches (unions, extending
> gather_array() to add gaps for the int return). They were all worse.
>
> I also tried your proposal here but it doesn't work. See below.
>
>> - why not signal paged out frames for V1
>
> Because V1 is broken on 64bit and there doesn't seem to be any point in
> changing it given that libxc won't call it if V2 is present,
>
>> Rather than keep writing English, I wrote some C :)
>>
>> And took the liberty to include your signed-off. David & Konrad, let
>> me know what you think, and once we settle on either version we can move
>> into unit testing this.
> [...]
>> static int mmap_batch_fn(void *data, void *state)
>> {
>> xen_pfn_t *mfnp = data;
>> struct mmap_batch_state *st = state;
>> + int ret;
>> +
>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> + st->vma->vm_page_prot, st->domain);
>> + if (ret < 0) {
>> + /*
>> + * V2 provides a user-space (pre-checked for access) user_err
>> + * pointer, in which we store the individual map error codes.
>> + *
>> + * V1 encodes the error codes in the 32bit top nibble of the
>> + * mfn (with its known limitations vis-a-vis 64 bit callers).
>> + *
>> + * In either case, global state.err is zero unless one or more
>> + * individual maps fail with -ENOENT, in which case it is -ENOENT.
>> + *
>> + */
>> + if (st->user_err)
>> + BUG_ON(__put_user(ret, st->user_err++));
>
> You can't access user space pages here while holding
> current->mm->mmap_sem. I tried this and it would sometimes deadlock in
> the page fault handler.
>
> access_ok() only checks if the pointer is in the user space virtual
> address space - not that a valid mapping exists and is writable. So
> BUG_ON(__put_user()) should not be done.

Very true. Thanks for the pointer. Clearly the reason for the gather_array/traverse_pages structure.
Re-posting my version
Andres
>
> David


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 30, 2012, 11:32 AM

Post #9 of 19 (244 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

Second repost of my version, heavily based on David's.

Complementary to this patch, on the xen tree I intend to add PRIVCMD_MMAPBATCH_*_ERROR into the libxc header files, and remove XEN_DOMCTL_PFINFO_PAGEDTAB from domctl.h

Please review. Thanks
Andres

commit 3f40e8d79b7e032527ee207a97499ddbc81ca12b
Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
Date: Thu Aug 30 12:23:33 2012 -0400

xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl

PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
field for reporting the error code for every frame that could not be
mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.

Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
in the mfn array.

Signed-off-by: David Vrabel <david.vrabel [at] citrix>
Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 85226cb..5a03dc1 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
*/
static int gather_array(struct list_head *pagelist,
unsigned nelem, size_t size,
- void __user *data)
+ const void __user *data)
{
unsigned pageidx;
void *pagedata;
@@ -246,20 +246,42 @@ struct mmap_batch_state {
domid_t domain;
unsigned long va;
struct vm_area_struct *vma;
- int err;
-
- xen_pfn_t __user *user;
+ /* A tristate:
+ * 0 for no errors
+ * 1 if at least one error has happened (and no
+ * -ENOENT errors have happened)
+ * -ENOENT if at least 1 -ENOENT has happened.
+ */
+ int global_error;
+ /* An array for individual errors */
+ int *err;
+
+ /* User-space pointers to store errors in the second pass. */
+ xen_pfn_t __user *user_mfn;
+ int __user *user_err;
};

static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ int ret;

- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
+ ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain);
+
+ /* Store error code for second pass. */
+ *(st->err++) = ret;
+
+ /* And see if it affects the global global_error. */
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ st->global_error = -ENOENT;
+ else {
+ /* Record that at least one error has happened. */
+ if (st->global_error == 0)
+ st->global_error = 1;
+ }
}
st->va += PAGE_SIZE;

@@ -270,37 +292,76 @@ static int mmap_return_errors(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
-
- return put_user(*mfnp, st->user++);
+ int err = *(st->err++);
+
+ /*
+ * V2 provides a user-space user_err pointer, in which we store the
+ * individual map error codes.
+ */
+ if (st->user_err)
+ return __put_user(err, st->user_err++);
+
+ /*
+ * V1 encodes the error codes in the 32bit top nibble of the
+ * mfn (with its known limitations vis-a-vis 64 bit callers).
+ */
+ *mfnp |= (err == -ENOENT) ?
+ PRIVCMD_MMAPBATCH_PAGED_ERROR :
+ PRIVCMD_MMAPBATCH_MFN_ERROR;
+ return __put_user(*mfnp, st->user_mfn++);
}

static struct vm_operations_struct privcmd_vm_ops;

-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
int ret;
- struct privcmd_mmapbatch m;
+ struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
LIST_HEAD(pagelist);
+ int *err_array;
struct mmap_batch_state state;

if (!xen_initial_domain())
return -EPERM;

- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
+ switch (version) {
+ case 1:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+ return -EFAULT;
+ /* Returns per-frame error in m.arr. */
+ m.err = NULL;
+ if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+ return -EFAULT;
+ break;
+ case 2:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+ return -EFAULT;
+ /* Returns per-frame error code in m.err. */
+ if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
+ }

nr_pages = m.num;
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;

- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);

if (ret || list_empty(&pagelist))
- goto out;
+ goto out_no_err_array;
+
+ err_array = kmalloc_array(m.num, sizeof(int), GFP_KERNEL);
+ if (err_array == NULL)
+ {
+ ret = -ENOMEM;
+ goto out_no_err_array;
+ }

down_write(&mm->mmap_sem);

@@ -315,24 +376,38 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
goto out;
}

- state.domain = m.dom;
- state.vma = vma;
- state.va = m.addr;
- state.err = 0;
+ state.domain = m.dom;
+ state.vma = vma;
+ state.va = m.addr;
+ state.global_error = 0;
+ state.err = err_array;

- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_batch_fn, &state);
+ /* mmap_batch_fn guarantees ret == 0 */
+ BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state));

up_write(&mm->mmap_sem);

- if (state.err > 0) {
- state.user = m.arr;
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
+ if (state.global_error) {
+ int efault;
+
+ if (state.global_error == -ENOENT)
+ ret = -ENOENT;
+
+ /* Write back errors in second pass. */
+ state.user_mfn = (xen_pfn_t *)m.arr;
+ state.user_err = m.err;
+ state.err = err_array;
+ efault = traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_return_errors, &state);
+ if (efault)
+ ret = efault;
+ } else if (m.err)
+ ret = __clear_user(m.err, m.num * sizeof(*m.err));

out:
+ kfree(err_array);
+out_no_err_array:
free_page_list(&pagelist);

return ret;
@@ -354,7 +429,11 @@ static long privcmd_ioctl(struct file *file,
break;

case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
+ ret = privcmd_ioctl_mmap_batch(udata, 1);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH_V2:
+ ret = privcmd_ioctl_mmap_batch(udata, 2);
break;

default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 45c1aa1..a853168 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
__u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+ xen_pfn_t __user *arr; /* array of mfns - or'd with
+ PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+ unsigned int num; /* number of pages to populate */
+ domid_t dom; /* target domain */
+ __u64 addr; /* virtual address */
+ const xen_pfn_t __user *arr; /* array of mfns */
+ int __user *err; /* array of error codes */
};

/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
* Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame). On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc. As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
+ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))

#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel [at] citrix>
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> ---
> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> include/xen/privcmd.h | 23 +++++++++++-
> 2 files changed, 102 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..c0e89e7 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> */
> static int gather_array(struct list_head *pagelist,
> unsigned nelem, size_t size,
> - void __user *data)
> + const void __user *data)
> {
> unsigned pageidx;
> void *pagedata;
> @@ -248,18 +248,37 @@ struct mmap_batch_state {
> struct vm_area_struct *vma;
> int err;
>
> - xen_pfn_t __user *user;
> + xen_pfn_t __user *user_mfn;
> + int __user *user_err;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> + if (ret < 0) {
> + /*
> + * Error reporting is a mess but userspace relies on
> + * it behaving this way.
> + *
> + * V2 needs to a) return the result of each frame's
> + * remap; and b) return -ENOENT if any frame failed
> + * with -ENOENT.
> + *
> + * In this first pass the error code is saved by
> + * overwriting the mfn and an error is indicated in
> + * st->err.
> + *
> + * The second pass by mmap_return_errors() will write
> + * the error codes to user space and get the right
> + * ioctl return value.
> + */
> + *(int *)mfnp = ret;
> + st->err = ret;
> }
> st->va += PAGE_SIZE;
>
> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
> +
> + if (st->user_err) {
> + int err = *(int *)mfnp;
> +
> + if (err == -ENOENT)
> + st->err = err;
>
> - return put_user(*mfnp, st->user++);
> + return __put_user(err, st->user_err++);
> + } else {
> + xen_pfn_t mfn;
> +
> + ret = __get_user(mfn, st->user_mfn);
> + if (ret < 0)
> + return ret;
> +
> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(mfn, st->user_mfn++);
> + }
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>
> if (ret || list_empty(&pagelist))
> goto out;
> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.err) {
> + state.err = 0;
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.user_err = m.err;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist,
> + mmap_return_errors, &state);
> + if (ret >= 0)
> + ret = state.err;
> + } else if (m.err)
> + __clear_user(m.err, m.num * sizeof(*m.err));
>
> out:
> free_page_list(&pagelist);
> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..f60d75c 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame). On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> --
> 1.7.2.5
>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Aug 31, 2012, 12:02 AM

Post #10 of 19 (248 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Thu, 2012-08-30 at 19:29 +0100, Andres Lagar-Cavilla wrote:
> On Aug 30, 2012, at 1:04 PM, David Vrabel wrote:
> > You can't access user space pages here while holding
> > current->mm->mmap_sem. I tried this and it would sometimes deadlock in
> > the page fault handler.
> >
> > access_ok() only checks if the pointer is in the user space virtual
> > address space - not that a valid mapping exists and is writable. So
> > BUG_ON(__put_user()) should not be done.
>
> Very true. Thanks for the pointer. Clearly the reason for the gather_array/traverse_pages structure.

/me has flashbacks to the mammoth debugging session which led to
http://xenbits.xen.org/hg/linux-2.6.18-xen.hg/rev/043dc7488c11.

Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


david.vrabel at citrix

Aug 31, 2012, 6:08 AM

Post #11 of 19 (249 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On 30/08/12 19:32, Andres Lagar-Cavilla wrote:
> Second repost of my version, heavily based on David's.

Doing another allocation that may be larger than a page (and its
associated additional error paths) seems to me to be a hammer to crack
the (admittedly bit wonky) casting nut.

That said, it's up to Konrad which version he prefers.

There are also some minor improvements you could make if you respin this
patch.

> Complementary to this patch, on the xen tree I intend to add
> PRIVCMD_MMAPBATCH_*_ERROR into the libxc header files, and remove
> XEN_DOMCTL_PFINFO_PAGEDTAB from domctl.h

Yes, a good idea. There's no correspondence between the ioctl's error
reporting values and the DOMCTL_PFINFO flags.

> commit 3f40e8d79b7e032527ee207a97499ddbc81ca12b
> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> Date: Thu Aug 30 12:23:33 2012 -0400
>
> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
> in the mfn array.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
[...]
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> LIST_HEAD(pagelist);
> + int *err_array;

int *err_array = NULL;

and you could avoid the additional jump label as kfree(NULL) is safe.

> struct mmap_batch_state state;
>
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>
> if (ret || list_empty(&pagelist))
> - goto out;
> + goto out_no_err_array;
> +
> + err_array = kmalloc_array(m.num, sizeof(int), GFP_KERNEL);

kcalloc() (see below).

> + if (err_array == NULL)
> + {

Style: if (err_array == NULL) {

> + if (state.global_error) {
> + int efault;
> +
> + if (state.global_error == -ENOENT)
> + ret = -ENOENT;
> +
> + /* Write back errors in second pass. */
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.user_err = m.err;
> + state.err = err_array;
> + efault = traverse_pages(m.num, sizeof(xen_pfn_t),
> + &pagelist, mmap_return_errors, &state);
> + if (efault)
> + ret = efault;
> + } else if (m.err)
> + ret = __clear_user(m.err, m.num * sizeof(*m.err));

Since you have an array of errors already there's no need to iterate
through all the MFNs again for V2. A simple copy_to_user() is
sufficient provided err_array was zeroed with kcalloc().

if (m.err)
ret = __copy_to_user(m.err, err_array, m.num * sizeof(*m.err));
else {
/* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
state.err = err_array;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
}

if (ret == 0 && state.global_error == -ENOENT)
ret = -ENOENT;

David

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 31, 2012, 6:13 AM

Post #12 of 19 (245 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Aug 31, 2012, at 9:08 AM, David Vrabel wrote:

> On 30/08/12 19:32, Andres Lagar-Cavilla wrote:
>> Second repost of my version, heavily based on David's.
>
> Doing another allocation that may be larger than a page (and its
> associated additional error paths) seems to me to be a hammer to crack
> the (admittedly bit wonky) casting nut.
>
> That said, it's up to Konrad which version he prefers.

Yeah absolutely.

>
> There are also some minor improvements you could make if you respin this
> patch.
>
>> Complementary to this patch, on the xen tree I intend to add
>> PRIVCMD_MMAPBATCH_*_ERROR into the libxc header files, and remove
>> XEN_DOMCTL_PFINFO_PAGEDTAB from domctl.h
>
> Yes, a good idea. There's no correspondence between the ioctl's error
> reporting values and the DOMCTL_PFINFO flags.
>
>> commit 3f40e8d79b7e032527ee207a97499ddbc81ca12b
>> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>> Date: Thu Aug 30 12:23:33 2012 -0400
>>
>> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>>
>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>> field for reporting the error code for every frame that could not be
>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>
>> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
>> in the mfn array.
>>
>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
>> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> [...]
>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>> {
>> int ret;
>> - struct privcmd_mmapbatch m;
>> + struct privcmd_mmapbatch_v2 m;
>> struct mm_struct *mm = current->mm;
>> struct vm_area_struct *vma;
>> unsigned long nr_pages;
>> LIST_HEAD(pagelist);
>> + int *err_array;
>
> int *err_array = NULL;
>
> and you could avoid the additional jump label as kfree(NULL) is safe.

Didn't know, great.

>
>> struct mmap_batch_state state;
>>
>> if (!xen_initial_domain())
>> return -EPERM;
>>
>> - if (copy_from_user(&m, udata, sizeof(m)))
>> - return -EFAULT;
>> + switch (version) {
>> + case 1:
>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>> + return -EFAULT;
>> + /* Returns per-frame error in m.arr. */
>> + m.err = NULL;
>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>> + return -EFAULT;
>> + break;
>> + case 2:
>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
>> + return -EFAULT;
>> + /* Returns per-frame error code in m.err. */
>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>> + return -EFAULT;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>>
>> nr_pages = m.num;
>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>> return -EINVAL;
>>
>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>> - m.arr);
>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>>
>> if (ret || list_empty(&pagelist))
>> - goto out;
>> + goto out_no_err_array;
>> +
>> + err_array = kmalloc_array(m.num, sizeof(int), GFP_KERNEL);
>
> kcalloc() (see below).
>
>> + if (err_array == NULL)
>> + {
>
> Style: if (err_array == NULL) {
>
>> + if (state.global_error) {
>> + int efault;
>> +
>> + if (state.global_error == -ENOENT)
>> + ret = -ENOENT;
>> +
>> + /* Write back errors in second pass. */
>> + state.user_mfn = (xen_pfn_t *)m.arr;
>> + state.user_err = m.err;
>> + state.err = err_array;
>> + efault = traverse_pages(m.num, sizeof(xen_pfn_t),
>> + &pagelist, mmap_return_errors, &state);
>> + if (efault)
>> + ret = efault;
>> + } else if (m.err)
>> + ret = __clear_user(m.err, m.num * sizeof(*m.err));
>
> Since you have an array of errors already there's no need to iterate
> through all the MFNs again for V2. A simple copy_to_user() is
> sufficient provided err_array was zeroed with kcalloc().
I can kcalloc for extra paranoia, but the code will set all error slots, and is guaranteed to not jump over anything. I +1 the shortcut you outline below. Re-spin coming.
Andres
>
> if (m.err)
> ret = __copy_to_user(m.err, err_array, m.num * sizeof(*m.err));
> else {
> /* Write back errors in second pass. */
> state.user_mfn = (xen_pfn_t *)m.arr;
> state.user_err = m.err;
> state.err = err_array;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> &pagelist, mmap_return_errors, &state);
> }
>
> if (ret == 0 && state.global_error == -ENOENT)
> ret = -ENOENT;
>
> David


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Aug 31, 2012, 6:59 AM

Post #13 of 19 (244 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

Re-spin of alternative patch after David's feedback.
Thanks
Andres

commit ab351a5cef1797935b083c2f6e72800a8949c515
Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
Date: Thu Aug 30 12:23:33 2012 -0400

xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl

PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
field for reporting the error code for every frame that could not be
mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.

Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
in the mfn array.

Signed-off-by: David Vrabel <david.vrabel [at] citrix>
Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 85226cb..5386f20 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
*/
static int gather_array(struct list_head *pagelist,
unsigned nelem, size_t size,
- void __user *data)
+ const void __user *data)
{
unsigned pageidx;
void *pagedata;
@@ -246,61 +246,117 @@ struct mmap_batch_state {
domid_t domain;
unsigned long va;
struct vm_area_struct *vma;
- int err;
-
- xen_pfn_t __user *user;
+ /* A tristate:
+ * 0 for no errors
+ * 1 if at least one error has happened (and no
+ * -ENOENT errors have happened)
+ * -ENOENT if at least 1 -ENOENT has happened.
+ */
+ int global_error;
+ /* An array for individual errors */
+ int *err;
+
+ /* User-space mfn array to store errors in the second pass for V1. */
+ xen_pfn_t __user *user_mfn;
};

static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ int ret;

- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
+ ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain);
+
+ /* Store error code for second pass. */
+ *(st->err++) = ret;
+
+ /* And see if it affects the global_error. */
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ st->global_error = -ENOENT;
+ else {
+ /* Record that at least one error has happened. */
+ if (st->global_error == 0)
+ st->global_error = 1;
+ }
}
st->va += PAGE_SIZE;

return 0;
}

-static int mmap_return_errors(void *data, void *state)
+static int mmap_return_errors_v1(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
-
- return put_user(*mfnp, st->user++);
+ int err = *(st->err++);
+
+ /*
+ * V1 encodes the error codes in the 32bit top nibble of the
+ * mfn (with its known limitations vis-a-vis 64 bit callers).
+ */
+ *mfnp |= (err == -ENOENT) ?
+ PRIVCMD_MMAPBATCH_PAGED_ERROR :
+ PRIVCMD_MMAPBATCH_MFN_ERROR;
+ return __put_user(*mfnp, st->user_mfn++);
}

static struct vm_operations_struct privcmd_vm_ops;

-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
int ret;
- struct privcmd_mmapbatch m;
+ struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
LIST_HEAD(pagelist);
+ int *err_array = NULL;
struct mmap_batch_state state;

if (!xen_initial_domain())
return -EPERM;

- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
+ switch (version) {
+ case 1:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+ return -EFAULT;
+ /* Returns per-frame error in m.arr. */
+ m.err = NULL;
+ if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+ return -EFAULT;
+ break;
+ case 2:
+ if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+ return -EFAULT;
+ /* Returns per-frame error code in m.err. */
+ if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
+ }

nr_pages = m.num;
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;

- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
+
+ if (ret)
+ goto out;
+ if (list_empty(&pagelist)) {
+ ret = -EINVAL;
+ goto out;
+ }

- if (ret || list_empty(&pagelist))
+ err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
+ if (err_array == NULL) {
+ ret = -ENOMEM;
goto out;
+ }

down_write(&mm->mmap_sem);

@@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
goto out;
}

- state.domain = m.dom;
- state.vma = vma;
- state.va = m.addr;
- state.err = 0;
+ state.domain = m.dom;
+ state.vma = vma;
+ state.va = m.addr;
+ state.global_error = 0;
+ state.err = err_array;

- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_batch_fn, &state);
+ /* mmap_batch_fn guarantees ret == 0 */
+ BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state));

up_write(&mm->mmap_sem);

- if (state.err > 0) {
- state.user = m.arr;
+ if (state.global_error && (version == 1)) {
+ /* Write back errors in second pass. */
+ state.user_mfn = (xen_pfn_t *)m.arr;
+ state.err = err_array;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
+ &pagelist, mmap_return_errors_v1, &state);
+ } else
+ ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
+
+ /* If we have not had any EFAULT-like global errors then set the global
+ * error to -ENOENT if necessary. */
+ if ((ret == 0) && (state.global_error == -ENOENT))
+ ret = -ENOENT;

out:
+ kfree(err_array);
free_page_list(&pagelist);

return ret;
@@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
break;

case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
+ ret = privcmd_ioctl_mmap_batch(udata, 1);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH_V2:
+ ret = privcmd_ioctl_mmap_batch(udata, 2);
break;

default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 45c1aa1..a853168 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
int num; /* number of pages to populate */
domid_t dom; /* target domain */
__u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+ xen_pfn_t __user *arr; /* array of mfns - or'd with
+ PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+ unsigned int num; /* number of pages to populate */
+ domid_t dom; /* target domain */
+ __u64 addr; /* virtual address */
+ const xen_pfn_t __user *arr; /* array of mfns */
+ int __user *err; /* array of error codes */
};

/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
* Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame). On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc. As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
+ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))

#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel [at] citrix>
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> ---
> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> include/xen/privcmd.h | 23 +++++++++++-
> 2 files changed, 102 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..c0e89e7 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> */
> static int gather_array(struct list_head *pagelist,
> unsigned nelem, size_t size,
> - void __user *data)
> + const void __user *data)
> {
> unsigned pageidx;
> void *pagedata;
> @@ -248,18 +248,37 @@ struct mmap_batch_state {
> struct vm_area_struct *vma;
> int err;
>
> - xen_pfn_t __user *user;
> + xen_pfn_t __user *user_mfn;
> + int __user *user_err;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> + if (ret < 0) {
> + /*
> + * Error reporting is a mess but userspace relies on
> + * it behaving this way.
> + *
> + * V2 needs to a) return the result of each frame's
> + * remap; and b) return -ENOENT if any frame failed
> + * with -ENOENT.
> + *
> + * In this first pass the error code is saved by
> + * overwriting the mfn and an error is indicated in
> + * st->err.
> + *
> + * The second pass by mmap_return_errors() will write
> + * the error codes to user space and get the right
> + * ioctl return value.
> + */
> + *(int *)mfnp = ret;
> + st->err = ret;
> }
> st->va += PAGE_SIZE;
>
> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
> +
> + if (st->user_err) {
> + int err = *(int *)mfnp;
> +
> + if (err == -ENOENT)
> + st->err = err;
>
> - return put_user(*mfnp, st->user++);
> + return __put_user(err, st->user_err++);
> + } else {
> + xen_pfn_t mfn;
> +
> + ret = __get_user(mfn, st->user_mfn);
> + if (ret < 0)
> + return ret;
> +
> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(mfn, st->user_mfn++);
> + }
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>
> if (ret || list_empty(&pagelist))
> goto out;
> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.err) {
> + state.err = 0;
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.user_err = m.err;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist,
> + mmap_return_errors, &state);
> + if (ret >= 0)
> + ret = state.err;
> + } else if (m.err)
> + __clear_user(m.err, m.num * sizeof(*m.err));
>
> out:
> free_page_list(&pagelist);
> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..f60d75c 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame). On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> --
> 1.7.2.5
>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


konrad.wilk at oracle

Sep 5, 2012, 9:17 AM

Post #14 of 19 (234 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Fri, Aug 31, 2012 at 09:13:44AM -0400, Andres Lagar-Cavilla wrote:
>
> On Aug 31, 2012, at 9:08 AM, David Vrabel wrote:
>
> > On 30/08/12 19:32, Andres Lagar-Cavilla wrote:
> >> Second repost of my version, heavily based on David's.
> >
> > Doing another allocation that may be larger than a page (and its
> > associated additional error paths) seems to me to be a hammer to crack
> > the (admittedly bit wonky) casting nut.
> >
> > That said, it's up to Konrad which version he prefers.
>
> Yeah absolutely.

This one (with the comments) looks nicer.
>
> >
> > There are also some minor improvements you could make if you respin this
> > patch.
> >
> >> Complementary to this patch, on the xen tree I intend to add
> >> PRIVCMD_MMAPBATCH_*_ERROR into the libxc header files, and remove
> >> XEN_DOMCTL_PFINFO_PAGEDTAB from domctl.h
> >
> > Yes, a good idea. There's no correspondence between the ioctl's error
> > reporting values and the DOMCTL_PFINFO flags.
> >
> >> commit 3f40e8d79b7e032527ee207a97499ddbc81ca12b
> >> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> >> Date: Thu Aug 30 12:23:33 2012 -0400
> >>
> >> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
> >>
> >> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> >> field for reporting the error code for every frame that could not be
> >> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> >>
> >> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
> >> in the mfn array.
> >>
> >> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> >> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> > [...]
> >> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> >> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> >> {
> >> int ret;
> >> - struct privcmd_mmapbatch m;
> >> + struct privcmd_mmapbatch_v2 m;
> >> struct mm_struct *mm = current->mm;
> >> struct vm_area_struct *vma;
> >> unsigned long nr_pages;
> >> LIST_HEAD(pagelist);
> >> + int *err_array;
> >
> > int *err_array = NULL;
> >
> > and you could avoid the additional jump label as kfree(NULL) is safe.
>
> Didn't know, great.
>
> >
> >> struct mmap_batch_state state;
> >>
> >> if (!xen_initial_domain())
> >> return -EPERM;
> >>
> >> - if (copy_from_user(&m, udata, sizeof(m)))
> >> - return -EFAULT;
> >> + switch (version) {
> >> + case 1:
> >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> >> + return -EFAULT;
> >> + /* Returns per-frame error in m.arr. */
> >> + m.err = NULL;
> >> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> >> + return -EFAULT;
> >> + break;
> >> + case 2:
> >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> >> + return -EFAULT;
> >> + /* Returns per-frame error code in m.err. */
> >> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> >> + return -EFAULT;
> >> + break;
> >> + default:
> >> + return -EINVAL;
> >> + }
> >>
> >> nr_pages = m.num;
> >> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> >> return -EINVAL;
> >>
> >> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> >> - m.arr);
> >> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> >>
> >> if (ret || list_empty(&pagelist))
> >> - goto out;
> >> + goto out_no_err_array;
> >> +
> >> + err_array = kmalloc_array(m.num, sizeof(int), GFP_KERNEL);
> >
> > kcalloc() (see below).
> >
> >> + if (err_array == NULL)
> >> + {
> >
> > Style: if (err_array == NULL) {
> >
> >> + if (state.global_error) {
> >> + int efault;
> >> +
> >> + if (state.global_error == -ENOENT)
> >> + ret = -ENOENT;
> >> +
> >> + /* Write back errors in second pass. */
> >> + state.user_mfn = (xen_pfn_t *)m.arr;
> >> + state.user_err = m.err;
> >> + state.err = err_array;
> >> + efault = traverse_pages(m.num, sizeof(xen_pfn_t),
> >> + &pagelist, mmap_return_errors, &state);
> >> + if (efault)
> >> + ret = efault;
> >> + } else if (m.err)
> >> + ret = __clear_user(m.err, m.num * sizeof(*m.err));
> >
> > Since you have an array of errors already there's no need to iterate
> > through all the MFNs again for V2. A simple copy_to_user() is
> > sufficient provided err_array was zeroed with kcalloc().
> I can kcalloc for extra paranoia, but the code will set all error slots, and is guaranteed to not jump over anything. I +1 the shortcut you outline below. Re-spin coming.
> Andres
> >
> > if (m.err)
> > ret = __copy_to_user(m.err, err_array, m.num * sizeof(*m.err));
> > else {
> > /* Write back errors in second pass. */
> > state.user_mfn = (xen_pfn_t *)m.arr;
> > state.user_err = m.err;
> > state.err = err_array;
> > ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> > &pagelist, mmap_return_errors, &state);
> > }
> >
> > if (ret == 0 && state.global_error == -ENOENT)
> > ret = -ENOENT;
> >
> > David

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


konrad.wilk at oracle

Sep 5, 2012, 9:21 AM

Post #15 of 19 (231 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
> Re-spin of alternative patch after David's feedback.
> Thanks
> Andres

applied. fixed some whitespace issues.
>
> commit ab351a5cef1797935b083c2f6e72800a8949c515
> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> Date: Thu Aug 30 12:23:33 2012 -0400
>
> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
> in the mfn array.
>
> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 85226cb..5386f20 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> */
> static int gather_array(struct list_head *pagelist,
> unsigned nelem, size_t size,
> - void __user *data)
> + const void __user *data)
> {
> unsigned pageidx;
> void *pagedata;
> @@ -246,61 +246,117 @@ struct mmap_batch_state {
> domid_t domain;
> unsigned long va;
> struct vm_area_struct *vma;
> - int err;
> -
> - xen_pfn_t __user *user;
> + /* A tristate:
> + * 0 for no errors
> + * 1 if at least one error has happened (and no
> + * -ENOENT errors have happened)
> + * -ENOENT if at least 1 -ENOENT has happened.
> + */
> + int global_error;
> + /* An array for individual errors */
> + int *err;
> +
> + /* User-space mfn array to store errors in the second pass for V1. */
> + xen_pfn_t __user *user_mfn;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> +
> + /* Store error code for second pass. */
> + *(st->err++) = ret;
> +
> + /* And see if it affects the global_error. */
> + if (ret < 0) {
> + if (ret == -ENOENT)
> + st->global_error = -ENOENT;
> + else {
> + /* Record that at least one error has happened. */
> + if (st->global_error == 0)
> + st->global_error = 1;
> + }
> }
> st->va += PAGE_SIZE;
>
> return 0;
> }
>
> -static int mmap_return_errors(void *data, void *state)
> +static int mmap_return_errors_v1(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> -
> - return put_user(*mfnp, st->user++);
> + int err = *(st->err++);
> +
> + /*
> + * V1 encodes the error codes in the 32bit top nibble of the
> + * mfn (with its known limitations vis-a-vis 64 bit callers).
> + */
> + *mfnp |= (err == -ENOENT) ?
> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
> + PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(*mfnp, st->user_mfn++);
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> LIST_HEAD(pagelist);
> + int *err_array = NULL;
> struct mmap_batch_state state;
>
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> +
> + if (ret)
> + goto out;
> + if (list_empty(&pagelist)) {
> + ret = -EINVAL;
> + goto out;
> + }
>
> - if (ret || list_empty(&pagelist))
> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
> + if (err_array == NULL) {
> + ret = -ENOMEM;
> goto out;
> + }
>
> down_write(&mm->mmap_sem);
>
> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> goto out;
> }
>
> - state.domain = m.dom;
> - state.vma = vma;
> - state.va = m.addr;
> - state.err = 0;
> + state.domain = m.dom;
> + state.vma = vma;
> + state.va = m.addr;
> + state.global_error = 0;
> + state.err = err_array;
>
> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist, mmap_batch_fn, &state);
> + /* mmap_batch_fn guarantees ret == 0 */
> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
> + &pagelist, mmap_batch_fn, &state));
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.global_error && (version == 1)) {
> + /* Write back errors in second pass. */
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.err = err_array;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist, mmap_return_errors_v1, &state);
> + } else
> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
> +
> + /* If we have not had any EFAULT-like global errors then set the global
> + * error to -ENOENT if necessary. */
> + if ((ret == 0) && (state.global_error == -ENOENT))
> + ret = -ENOENT;
>
> out:
> + kfree(err_array);
> free_page_list(&pagelist);
>
> return ret;
> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 45c1aa1..a853168 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_*_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame). On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>
> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
>
> > From: David Vrabel <david.vrabel [at] citrix>
> >
> > PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> > field for reporting the error code for every frame that could not be
> > mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> >
> > Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> > ---
> > drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> > include/xen/privcmd.h | 23 +++++++++++-
> > 2 files changed, 102 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> > index ccee0f1..c0e89e7 100644
> > --- a/drivers/xen/privcmd.c
> > +++ b/drivers/xen/privcmd.c
> > @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> > */
> > static int gather_array(struct list_head *pagelist,
> > unsigned nelem, size_t size,
> > - void __user *data)
> > + const void __user *data)
> > {
> > unsigned pageidx;
> > void *pagedata;
> > @@ -248,18 +248,37 @@ struct mmap_batch_state {
> > struct vm_area_struct *vma;
> > int err;
> >
> > - xen_pfn_t __user *user;
> > + xen_pfn_t __user *user_mfn;
> > + int __user *user_err;
> > };
> >
> > static int mmap_batch_fn(void *data, void *state)
> > {
> > xen_pfn_t *mfnp = data;
> > struct mmap_batch_state *st = state;
> > + int ret;
> >
> > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > - st->vma->vm_page_prot, st->domain) < 0) {
> > - *mfnp |= 0xf0000000U;
> > - st->err++;
> > + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > + st->vma->vm_page_prot, st->domain);
> > + if (ret < 0) {
> > + /*
> > + * Error reporting is a mess but userspace relies on
> > + * it behaving this way.
> > + *
> > + * V2 needs to a) return the result of each frame's
> > + * remap; and b) return -ENOENT if any frame failed
> > + * with -ENOENT.
> > + *
> > + * In this first pass the error code is saved by
> > + * overwriting the mfn and an error is indicated in
> > + * st->err.
> > + *
> > + * The second pass by mmap_return_errors() will write
> > + * the error codes to user space and get the right
> > + * ioctl return value.
> > + */
> > + *(int *)mfnp = ret;
> > + st->err = ret;
> > }
> > st->va += PAGE_SIZE;
> >
> > @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> > {
> > xen_pfn_t *mfnp = data;
> > struct mmap_batch_state *st = state;
> > + int ret;
> > +
> > + if (st->user_err) {
> > + int err = *(int *)mfnp;
> > +
> > + if (err == -ENOENT)
> > + st->err = err;
> >
> > - return put_user(*mfnp, st->user++);
> > + return __put_user(err, st->user_err++);
> > + } else {
> > + xen_pfn_t mfn;
> > +
> > + ret = __get_user(mfn, st->user_mfn);
> > + if (ret < 0)
> > + return ret;
> > +
> > + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> > + return __put_user(mfn, st->user_mfn++);
> > + }
> > }
> >
> > static struct vm_operations_struct privcmd_vm_ops;
> >
> > -static long privcmd_ioctl_mmap_batch(void __user *udata)
> > +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> > {
> > int ret;
> > - struct privcmd_mmapbatch m;
> > + struct privcmd_mmapbatch_v2 m;
> > struct mm_struct *mm = current->mm;
> > struct vm_area_struct *vma;
> > unsigned long nr_pages;
> > @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> > if (!xen_initial_domain())
> > return -EPERM;
> >
> > - if (copy_from_user(&m, udata, sizeof(m)))
> > - return -EFAULT;
> > + switch (version) {
> > + case 1:
> > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> > + return -EFAULT;
> > + /* Returns per-frame error in m.arr. */
> > + m.err = NULL;
> > + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> > + return -EFAULT;
> > + break;
> > + case 2:
> > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> > + return -EFAULT;
> > + /* Returns per-frame error code in m.err. */
> > + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> > + return -EFAULT;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> >
> > nr_pages = m.num;
> > if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> > return -EINVAL;
> >
> > - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> > - m.arr);
> > + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> >
> > if (ret || list_empty(&pagelist))
> > goto out;
> > @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> >
> > up_write(&mm->mmap_sem);
> >
> > - if (state.err > 0) {
> > - state.user = m.arr;
> > + if (state.err) {
> > + state.err = 0;
> > + state.user_mfn = (xen_pfn_t *)m.arr;
> > + state.user_err = m.err;
> > ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> > - &pagelist,
> > - mmap_return_errors, &state);
> > - }
> > + &pagelist,
> > + mmap_return_errors, &state);
> > + if (ret >= 0)
> > + ret = state.err;
> > + } else if (m.err)
> > + __clear_user(m.err, m.num * sizeof(*m.err));
> >
> > out:
> > free_page_list(&pagelist);
> > @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> > break;
> >
> > case IOCTL_PRIVCMD_MMAPBATCH:
> > - ret = privcmd_ioctl_mmap_batch(udata);
> > + ret = privcmd_ioctl_mmap_batch(udata, 1);
> > + break;
> > +
> > + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> > + ret = privcmd_ioctl_mmap_batch(udata, 2);
> > break;
> >
> > default:
> > diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> > index 17857fb..f60d75c 100644
> > --- a/include/xen/privcmd.h
> > +++ b/include/xen/privcmd.h
> > @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> > int num; /* number of pages to populate */
> > domid_t dom; /* target domain */
> > __u64 addr; /* virtual address */
> > - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> > + xen_pfn_t __user *arr; /* array of mfns - or'd with
> > + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> > +};
> > +
> > +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> > +
> > +struct privcmd_mmapbatch_v2 {
> > + unsigned int num; /* number of pages to populate */
> > + domid_t dom; /* target domain */
> > + __u64 addr; /* virtual address */
> > + const xen_pfn_t __user *arr; /* array of mfns */
> > + int __user *err; /* array of error codes */
> > };
> >
> > /*
> > * @cmd: IOCTL_PRIVCMD_HYPERCALL
> > * @arg: &privcmd_hypercall_t
> > * Return: Value returned from execution of the specified hypercall.
> > + *
> > + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> > + * @arg: &struct privcmd_mmapbatch_v2
> > + * Return: 0 on success (i.e., arg->err contains valid error codes for
> > + * each frame). On an error other than a failed frame remap, -1 is
> > + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> > + * if the operation was otherwise successful but any frame failed with
> > + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> > */
> > #define IOCTL_PRIVCMD_HYPERCALL \
> > _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> > @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> > _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> > #define IOCTL_PRIVCMD_MMAPBATCH \
> > _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> > +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> > + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> >
> > #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> > --
> > 1.7.2.5
> >

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Sep 5, 2012, 10:09 AM

Post #16 of 19 (231 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

Super. To which branch are you applying these now? (n00b question but have to ask)
Andres
On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote:

> On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
>> Re-spin of alternative patch after David's feedback.
>> Thanks
>> Andres
>
> applied. fixed some whitespace issues.
>>
>> commit ab351a5cef1797935b083c2f6e72800a8949c515
>> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>> Date: Thu Aug 30 12:23:33 2012 -0400
>>
>> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>>
>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>> field for reporting the error code for every frame that could not be
>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>
>> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
>> in the mfn array.
>>
>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
>> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>>
>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>> index 85226cb..5386f20 100644
>> --- a/drivers/xen/privcmd.c
>> +++ b/drivers/xen/privcmd.c
>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>> */
>> static int gather_array(struct list_head *pagelist,
>> unsigned nelem, size_t size,
>> - void __user *data)
>> + const void __user *data)
>> {
>> unsigned pageidx;
>> void *pagedata;
>> @@ -246,61 +246,117 @@ struct mmap_batch_state {
>> domid_t domain;
>> unsigned long va;
>> struct vm_area_struct *vma;
>> - int err;
>> -
>> - xen_pfn_t __user *user;
>> + /* A tristate:
>> + * 0 for no errors
>> + * 1 if at least one error has happened (and no
>> + * -ENOENT errors have happened)
>> + * -ENOENT if at least 1 -ENOENT has happened.
>> + */
>> + int global_error;
>> + /* An array for individual errors */
>> + int *err;
>> +
>> + /* User-space mfn array to store errors in the second pass for V1. */
>> + xen_pfn_t __user *user_mfn;
>> };
>>
>> static int mmap_batch_fn(void *data, void *state)
>> {
>> xen_pfn_t *mfnp = data;
>> struct mmap_batch_state *st = state;
>> + int ret;
>>
>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> - st->vma->vm_page_prot, st->domain) < 0) {
>> - *mfnp |= 0xf0000000U;
>> - st->err++;
>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>> + st->vma->vm_page_prot, st->domain);
>> +
>> + /* Store error code for second pass. */
>> + *(st->err++) = ret;
>> +
>> + /* And see if it affects the global_error. */
>> + if (ret < 0) {
>> + if (ret == -ENOENT)
>> + st->global_error = -ENOENT;
>> + else {
>> + /* Record that at least one error has happened. */
>> + if (st->global_error == 0)
>> + st->global_error = 1;
>> + }
>> }
>> st->va += PAGE_SIZE;
>>
>> return 0;
>> }
>>
>> -static int mmap_return_errors(void *data, void *state)
>> +static int mmap_return_errors_v1(void *data, void *state)
>> {
>> xen_pfn_t *mfnp = data;
>> struct mmap_batch_state *st = state;
>> -
>> - return put_user(*mfnp, st->user++);
>> + int err = *(st->err++);
>> +
>> + /*
>> + * V1 encodes the error codes in the 32bit top nibble of the
>> + * mfn (with its known limitations vis-a-vis 64 bit callers).
>> + */
>> + *mfnp |= (err == -ENOENT) ?
>> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
>> + PRIVCMD_MMAPBATCH_MFN_ERROR;
>> + return __put_user(*mfnp, st->user_mfn++);
>> }
>>
>> static struct vm_operations_struct privcmd_vm_ops;
>>
>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>> {
>> int ret;
>> - struct privcmd_mmapbatch m;
>> + struct privcmd_mmapbatch_v2 m;
>> struct mm_struct *mm = current->mm;
>> struct vm_area_struct *vma;
>> unsigned long nr_pages;
>> LIST_HEAD(pagelist);
>> + int *err_array = NULL;
>> struct mmap_batch_state state;
>>
>> if (!xen_initial_domain())
>> return -EPERM;
>>
>> - if (copy_from_user(&m, udata, sizeof(m)))
>> - return -EFAULT;
>> + switch (version) {
>> + case 1:
>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>> + return -EFAULT;
>> + /* Returns per-frame error in m.arr. */
>> + m.err = NULL;
>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>> + return -EFAULT;
>> + break;
>> + case 2:
>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
>> + return -EFAULT;
>> + /* Returns per-frame error code in m.err. */
>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>> + return -EFAULT;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>>
>> nr_pages = m.num;
>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>> return -EINVAL;
>>
>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>> - m.arr);
>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>> +
>> + if (ret)
>> + goto out;
>> + if (list_empty(&pagelist)) {
>> + ret = -EINVAL;
>> + goto out;
>> + }
>>
>> - if (ret || list_empty(&pagelist))
>> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
>> + if (err_array == NULL) {
>> + ret = -ENOMEM;
>> goto out;
>> + }
>>
>> down_write(&mm->mmap_sem);
>>
>> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>> goto out;
>> }
>>
>> - state.domain = m.dom;
>> - state.vma = vma;
>> - state.va = m.addr;
>> - state.err = 0;
>> + state.domain = m.dom;
>> + state.vma = vma;
>> + state.va = m.addr;
>> + state.global_error = 0;
>> + state.err = err_array;
>>
>> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>> - &pagelist, mmap_batch_fn, &state);
>> + /* mmap_batch_fn guarantees ret == 0 */
>> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
>> + &pagelist, mmap_batch_fn, &state));
>>
>> up_write(&mm->mmap_sem);
>>
>> - if (state.err > 0) {
>> - state.user = m.arr;
>> + if (state.global_error && (version == 1)) {
>> + /* Write back errors in second pass. */
>> + state.user_mfn = (xen_pfn_t *)m.arr;
>> + state.err = err_array;
>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>> - &pagelist,
>> - mmap_return_errors, &state);
>> - }
>> + &pagelist, mmap_return_errors_v1, &state);
>> + } else
>> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
>> +
>> + /* If we have not had any EFAULT-like global errors then set the global
>> + * error to -ENOENT if necessary. */
>> + if ((ret == 0) && (state.global_error == -ENOENT))
>> + ret = -ENOENT;
>>
>> out:
>> + kfree(err_array);
>> free_page_list(&pagelist);
>>
>> return ret;
>> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
>> break;
>>
>> case IOCTL_PRIVCMD_MMAPBATCH:
>> - ret = privcmd_ioctl_mmap_batch(udata);
>> + ret = privcmd_ioctl_mmap_batch(udata, 1);
>> + break;
>> +
>> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
>> + ret = privcmd_ioctl_mmap_batch(udata, 2);
>> break;
>>
>> default:
>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>> index 45c1aa1..a853168 100644
>> --- a/include/xen/privcmd.h
>> +++ b/include/xen/privcmd.h
>> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
>> int num; /* number of pages to populate */
>> domid_t dom; /* target domain */
>> __u64 addr; /* virtual address */
>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>> + xen_pfn_t __user *arr; /* array of mfns - or'd with
>> + PRIVCMD_MMAPBATCH_*_ERROR on err */
>> +};
>> +
>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
>> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
>> +
>> +struct privcmd_mmapbatch_v2 {
>> + unsigned int num; /* number of pages to populate */
>> + domid_t dom; /* target domain */
>> + __u64 addr; /* virtual address */
>> + const xen_pfn_t __user *arr; /* array of mfns */
>> + int __user *err; /* array of error codes */
>> };
>>
>> /*
>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
>> * @arg: &privcmd_hypercall_t
>> * Return: Value returned from execution of the specified hypercall.
>> + *
>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>> + * @arg: &struct privcmd_mmapbatch_v2
>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>> + * each frame). On an error other than a failed frame remap, -1 is
>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
>> + * if the operation was otherwise successful but any frame failed with
>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>> */
>> #define IOCTL_PRIVCMD_HYPERCALL \
>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>> #define IOCTL_PRIVCMD_MMAPBATCH \
>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>>
>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>>
>> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
>>
>>> From: David Vrabel <david.vrabel [at] citrix>
>>>
>>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>> field for reporting the error code for every frame that could not be
>>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>>
>>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
>>> ---
>>> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
>>> include/xen/privcmd.h | 23 +++++++++++-
>>> 2 files changed, 102 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>>> index ccee0f1..c0e89e7 100644
>>> --- a/drivers/xen/privcmd.c
>>> +++ b/drivers/xen/privcmd.c
>>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>>> */
>>> static int gather_array(struct list_head *pagelist,
>>> unsigned nelem, size_t size,
>>> - void __user *data)
>>> + const void __user *data)
>>> {
>>> unsigned pageidx;
>>> void *pagedata;
>>> @@ -248,18 +248,37 @@ struct mmap_batch_state {
>>> struct vm_area_struct *vma;
>>> int err;
>>>
>>> - xen_pfn_t __user *user;
>>> + xen_pfn_t __user *user_mfn;
>>> + int __user *user_err;
>>> };
>>>
>>> static int mmap_batch_fn(void *data, void *state)
>>> {
>>> xen_pfn_t *mfnp = data;
>>> struct mmap_batch_state *st = state;
>>> + int ret;
>>>
>>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> - st->vma->vm_page_prot, st->domain) < 0) {
>>> - *mfnp |= 0xf0000000U;
>>> - st->err++;
>>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>> + st->vma->vm_page_prot, st->domain);
>>> + if (ret < 0) {
>>> + /*
>>> + * Error reporting is a mess but userspace relies on
>>> + * it behaving this way.
>>> + *
>>> + * V2 needs to a) return the result of each frame's
>>> + * remap; and b) return -ENOENT if any frame failed
>>> + * with -ENOENT.
>>> + *
>>> + * In this first pass the error code is saved by
>>> + * overwriting the mfn and an error is indicated in
>>> + * st->err.
>>> + *
>>> + * The second pass by mmap_return_errors() will write
>>> + * the error codes to user space and get the right
>>> + * ioctl return value.
>>> + */
>>> + *(int *)mfnp = ret;
>>> + st->err = ret;
>>> }
>>> st->va += PAGE_SIZE;
>>>
>>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
>>> {
>>> xen_pfn_t *mfnp = data;
>>> struct mmap_batch_state *st = state;
>>> + int ret;
>>> +
>>> + if (st->user_err) {
>>> + int err = *(int *)mfnp;
>>> +
>>> + if (err == -ENOENT)
>>> + st->err = err;
>>>
>>> - return put_user(*mfnp, st->user++);
>>> + return __put_user(err, st->user_err++);
>>> + } else {
>>> + xen_pfn_t mfn;
>>> +
>>> + ret = __get_user(mfn, st->user_mfn);
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
>>> + return __put_user(mfn, st->user_mfn++);
>>> + }
>>> }
>>>
>>> static struct vm_operations_struct privcmd_vm_ops;
>>>
>>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>>> {
>>> int ret;
>>> - struct privcmd_mmapbatch m;
>>> + struct privcmd_mmapbatch_v2 m;
>>> struct mm_struct *mm = current->mm;
>>> struct vm_area_struct *vma;
>>> unsigned long nr_pages;
>>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>> if (!xen_initial_domain())
>>> return -EPERM;
>>>
>>> - if (copy_from_user(&m, udata, sizeof(m)))
>>> - return -EFAULT;
>>> + switch (version) {
>>> + case 1:
>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>>> + return -EFAULT;
>>> + /* Returns per-frame error in m.arr. */
>>> + m.err = NULL;
>>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>>> + return -EFAULT;
>>> + break;
>>> + case 2:
>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
>>> + return -EFAULT;
>>> + /* Returns per-frame error code in m.err. */
>>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>>> + return -EFAULT;
>>> + break;
>>> + default:
>>> + return -EINVAL;
>>> + }
>>>
>>> nr_pages = m.num;
>>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>>> return -EINVAL;
>>>
>>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>>> - m.arr);
>>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>>>
>>> if (ret || list_empty(&pagelist))
>>> goto out;
>>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>
>>> up_write(&mm->mmap_sem);
>>>
>>> - if (state.err > 0) {
>>> - state.user = m.arr;
>>> + if (state.err) {
>>> + state.err = 0;
>>> + state.user_mfn = (xen_pfn_t *)m.arr;
>>> + state.user_err = m.err;
>>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>> - &pagelist,
>>> - mmap_return_errors, &state);
>>> - }
>>> + &pagelist,
>>> + mmap_return_errors, &state);
>>> + if (ret >= 0)
>>> + ret = state.err;
>>> + } else if (m.err)
>>> + __clear_user(m.err, m.num * sizeof(*m.err));
>>>
>>> out:
>>> free_page_list(&pagelist);
>>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
>>> break;
>>>
>>> case IOCTL_PRIVCMD_MMAPBATCH:
>>> - ret = privcmd_ioctl_mmap_batch(udata);
>>> + ret = privcmd_ioctl_mmap_batch(udata, 1);
>>> + break;
>>> +
>>> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
>>> + ret = privcmd_ioctl_mmap_batch(udata, 2);
>>> break;
>>>
>>> default:
>>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>>> index 17857fb..f60d75c 100644
>>> --- a/include/xen/privcmd.h
>>> +++ b/include/xen/privcmd.h
>>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
>>> int num; /* number of pages to populate */
>>> domid_t dom; /* target domain */
>>> __u64 addr; /* virtual address */
>>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>>> + xen_pfn_t __user *arr; /* array of mfns - or'd with
>>> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
>>> +};
>>> +
>>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
>>> +
>>> +struct privcmd_mmapbatch_v2 {
>>> + unsigned int num; /* number of pages to populate */
>>> + domid_t dom; /* target domain */
>>> + __u64 addr; /* virtual address */
>>> + const xen_pfn_t __user *arr; /* array of mfns */
>>> + int __user *err; /* array of error codes */
>>> };
>>>
>>> /*
>>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
>>> * @arg: &privcmd_hypercall_t
>>> * Return: Value returned from execution of the specified hypercall.
>>> + *
>>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>>> + * @arg: &struct privcmd_mmapbatch_v2
>>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>>> + * each frame). On an error other than a failed frame remap, -1 is
>>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
>>> + * if the operation was otherwise successful but any frame failed with
>>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>>> */
>>> #define IOCTL_PRIVCMD_HYPERCALL \
>>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
>>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>>> #define IOCTL_PRIVCMD_MMAPBATCH \
>>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
>>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>>>
>>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>>> --
>>> 1.7.2.5
>>>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


konrad.wilk at oracle

Sep 5, 2012, 10:40 AM

Post #17 of 19 (229 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Wed, Sep 05, 2012 at 01:09:32PM -0400, Andres Lagar-Cavilla wrote:
> Super. To which branch are you applying these now? (n00b question but have to ask)

They will show up on stable/for-linus-3.7 once the overnight tests pass.

> Andres
> On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote:
>
> > On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
> >> Re-spin of alternative patch after David's feedback.
> >> Thanks
> >> Andres
> >
> > applied. fixed some whitespace issues.
> >>
> >> commit ab351a5cef1797935b083c2f6e72800a8949c515
> >> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> >> Date: Thu Aug 30 12:23:33 2012 -0400
> >>
> >> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
> >>
> >> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> >> field for reporting the error code for every frame that could not be
> >> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> >>
> >> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
> >> in the mfn array.
> >>
> >> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> >> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
> >>
> >> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> >> index 85226cb..5386f20 100644
> >> --- a/drivers/xen/privcmd.c
> >> +++ b/drivers/xen/privcmd.c
> >> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> >> */
> >> static int gather_array(struct list_head *pagelist,
> >> unsigned nelem, size_t size,
> >> - void __user *data)
> >> + const void __user *data)
> >> {
> >> unsigned pageidx;
> >> void *pagedata;
> >> @@ -246,61 +246,117 @@ struct mmap_batch_state {
> >> domid_t domain;
> >> unsigned long va;
> >> struct vm_area_struct *vma;
> >> - int err;
> >> -
> >> - xen_pfn_t __user *user;
> >> + /* A tristate:
> >> + * 0 for no errors
> >> + * 1 if at least one error has happened (and no
> >> + * -ENOENT errors have happened)
> >> + * -ENOENT if at least 1 -ENOENT has happened.
> >> + */
> >> + int global_error;
> >> + /* An array for individual errors */
> >> + int *err;
> >> +
> >> + /* User-space mfn array to store errors in the second pass for V1. */
> >> + xen_pfn_t __user *user_mfn;
> >> };
> >>
> >> static int mmap_batch_fn(void *data, void *state)
> >> {
> >> xen_pfn_t *mfnp = data;
> >> struct mmap_batch_state *st = state;
> >> + int ret;
> >>
> >> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> >> - st->vma->vm_page_prot, st->domain) < 0) {
> >> - *mfnp |= 0xf0000000U;
> >> - st->err++;
> >> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> >> + st->vma->vm_page_prot, st->domain);
> >> +
> >> + /* Store error code for second pass. */
> >> + *(st->err++) = ret;
> >> +
> >> + /* And see if it affects the global_error. */
> >> + if (ret < 0) {
> >> + if (ret == -ENOENT)
> >> + st->global_error = -ENOENT;
> >> + else {
> >> + /* Record that at least one error has happened. */
> >> + if (st->global_error == 0)
> >> + st->global_error = 1;
> >> + }
> >> }
> >> st->va += PAGE_SIZE;
> >>
> >> return 0;
> >> }
> >>
> >> -static int mmap_return_errors(void *data, void *state)
> >> +static int mmap_return_errors_v1(void *data, void *state)
> >> {
> >> xen_pfn_t *mfnp = data;
> >> struct mmap_batch_state *st = state;
> >> -
> >> - return put_user(*mfnp, st->user++);
> >> + int err = *(st->err++);
> >> +
> >> + /*
> >> + * V1 encodes the error codes in the 32bit top nibble of the
> >> + * mfn (with its known limitations vis-a-vis 64 bit callers).
> >> + */
> >> + *mfnp |= (err == -ENOENT) ?
> >> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
> >> + PRIVCMD_MMAPBATCH_MFN_ERROR;
> >> + return __put_user(*mfnp, st->user_mfn++);
> >> }
> >>
> >> static struct vm_operations_struct privcmd_vm_ops;
> >>
> >> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> >> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> >> {
> >> int ret;
> >> - struct privcmd_mmapbatch m;
> >> + struct privcmd_mmapbatch_v2 m;
> >> struct mm_struct *mm = current->mm;
> >> struct vm_area_struct *vma;
> >> unsigned long nr_pages;
> >> LIST_HEAD(pagelist);
> >> + int *err_array = NULL;
> >> struct mmap_batch_state state;
> >>
> >> if (!xen_initial_domain())
> >> return -EPERM;
> >>
> >> - if (copy_from_user(&m, udata, sizeof(m)))
> >> - return -EFAULT;
> >> + switch (version) {
> >> + case 1:
> >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> >> + return -EFAULT;
> >> + /* Returns per-frame error in m.arr. */
> >> + m.err = NULL;
> >> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> >> + return -EFAULT;
> >> + break;
> >> + case 2:
> >> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> >> + return -EFAULT;
> >> + /* Returns per-frame error code in m.err. */
> >> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> >> + return -EFAULT;
> >> + break;
> >> + default:
> >> + return -EINVAL;
> >> + }
> >>
> >> nr_pages = m.num;
> >> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> >> return -EINVAL;
> >>
> >> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> >> - m.arr);
> >> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> >> +
> >> + if (ret)
> >> + goto out;
> >> + if (list_empty(&pagelist)) {
> >> + ret = -EINVAL;
> >> + goto out;
> >> + }
> >>
> >> - if (ret || list_empty(&pagelist))
> >> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
> >> + if (err_array == NULL) {
> >> + ret = -ENOMEM;
> >> goto out;
> >> + }
> >>
> >> down_write(&mm->mmap_sem);
> >>
> >> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> >> goto out;
> >> }
> >>
> >> - state.domain = m.dom;
> >> - state.vma = vma;
> >> - state.va = m.addr;
> >> - state.err = 0;
> >> + state.domain = m.dom;
> >> + state.vma = vma;
> >> + state.va = m.addr;
> >> + state.global_error = 0;
> >> + state.err = err_array;
> >>
> >> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> >> - &pagelist, mmap_batch_fn, &state);
> >> + /* mmap_batch_fn guarantees ret == 0 */
> >> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
> >> + &pagelist, mmap_batch_fn, &state));
> >>
> >> up_write(&mm->mmap_sem);
> >>
> >> - if (state.err > 0) {
> >> - state.user = m.arr;
> >> + if (state.global_error && (version == 1)) {
> >> + /* Write back errors in second pass. */
> >> + state.user_mfn = (xen_pfn_t *)m.arr;
> >> + state.err = err_array;
> >> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> >> - &pagelist,
> >> - mmap_return_errors, &state);
> >> - }
> >> + &pagelist, mmap_return_errors_v1, &state);
> >> + } else
> >> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
> >> +
> >> + /* If we have not had any EFAULT-like global errors then set the global
> >> + * error to -ENOENT if necessary. */
> >> + if ((ret == 0) && (state.global_error == -ENOENT))
> >> + ret = -ENOENT;
> >>
> >> out:
> >> + kfree(err_array);
> >> free_page_list(&pagelist);
> >>
> >> return ret;
> >> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
> >> break;
> >>
> >> case IOCTL_PRIVCMD_MMAPBATCH:
> >> - ret = privcmd_ioctl_mmap_batch(udata);
> >> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> >> + break;
> >> +
> >> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> >> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> >> break;
> >>
> >> default:
> >> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> >> index 45c1aa1..a853168 100644
> >> --- a/include/xen/privcmd.h
> >> +++ b/include/xen/privcmd.h
> >> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
> >> int num; /* number of pages to populate */
> >> domid_t dom; /* target domain */
> >> __u64 addr; /* virtual address */
> >> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> >> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> >> + PRIVCMD_MMAPBATCH_*_ERROR on err */
> >> +};
> >> +
> >> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> >> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
> >> +
> >> +struct privcmd_mmapbatch_v2 {
> >> + unsigned int num; /* number of pages to populate */
> >> + domid_t dom; /* target domain */
> >> + __u64 addr; /* virtual address */
> >> + const xen_pfn_t __user *arr; /* array of mfns */
> >> + int __user *err; /* array of error codes */
> >> };
> >>
> >> /*
> >> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> >> * @arg: &privcmd_hypercall_t
> >> * Return: Value returned from execution of the specified hypercall.
> >> + *
> >> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> >> + * @arg: &struct privcmd_mmapbatch_v2
> >> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> >> + * each frame). On an error other than a failed frame remap, -1 is
> >> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> >> + * if the operation was otherwise successful but any frame failed with
> >> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> >> */
> >> #define IOCTL_PRIVCMD_HYPERCALL \
> >> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> >> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
> >> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> >> #define IOCTL_PRIVCMD_MMAPBATCH \
> >> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> >> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> >> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> >>
> >> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> >>
> >> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
> >>
> >>> From: David Vrabel <david.vrabel [at] citrix>
> >>>
> >>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> >>> field for reporting the error code for every frame that could not be
> >>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> >>>
> >>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
> >>> ---
> >>> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> >>> include/xen/privcmd.h | 23 +++++++++++-
> >>> 2 files changed, 102 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> >>> index ccee0f1..c0e89e7 100644
> >>> --- a/drivers/xen/privcmd.c
> >>> +++ b/drivers/xen/privcmd.c
> >>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> >>> */
> >>> static int gather_array(struct list_head *pagelist,
> >>> unsigned nelem, size_t size,
> >>> - void __user *data)
> >>> + const void __user *data)
> >>> {
> >>> unsigned pageidx;
> >>> void *pagedata;
> >>> @@ -248,18 +248,37 @@ struct mmap_batch_state {
> >>> struct vm_area_struct *vma;
> >>> int err;
> >>>
> >>> - xen_pfn_t __user *user;
> >>> + xen_pfn_t __user *user_mfn;
> >>> + int __user *user_err;
> >>> };
> >>>
> >>> static int mmap_batch_fn(void *data, void *state)
> >>> {
> >>> xen_pfn_t *mfnp = data;
> >>> struct mmap_batch_state *st = state;
> >>> + int ret;
> >>>
> >>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> >>> - st->vma->vm_page_prot, st->domain) < 0) {
> >>> - *mfnp |= 0xf0000000U;
> >>> - st->err++;
> >>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> >>> + st->vma->vm_page_prot, st->domain);
> >>> + if (ret < 0) {
> >>> + /*
> >>> + * Error reporting is a mess but userspace relies on
> >>> + * it behaving this way.
> >>> + *
> >>> + * V2 needs to a) return the result of each frame's
> >>> + * remap; and b) return -ENOENT if any frame failed
> >>> + * with -ENOENT.
> >>> + *
> >>> + * In this first pass the error code is saved by
> >>> + * overwriting the mfn and an error is indicated in
> >>> + * st->err.
> >>> + *
> >>> + * The second pass by mmap_return_errors() will write
> >>> + * the error codes to user space and get the right
> >>> + * ioctl return value.
> >>> + */
> >>> + *(int *)mfnp = ret;
> >>> + st->err = ret;
> >>> }
> >>> st->va += PAGE_SIZE;
> >>>
> >>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> >>> {
> >>> xen_pfn_t *mfnp = data;
> >>> struct mmap_batch_state *st = state;
> >>> + int ret;
> >>> +
> >>> + if (st->user_err) {
> >>> + int err = *(int *)mfnp;
> >>> +
> >>> + if (err == -ENOENT)
> >>> + st->err = err;
> >>>
> >>> - return put_user(*mfnp, st->user++);
> >>> + return __put_user(err, st->user_err++);
> >>> + } else {
> >>> + xen_pfn_t mfn;
> >>> +
> >>> + ret = __get_user(mfn, st->user_mfn);
> >>> + if (ret < 0)
> >>> + return ret;
> >>> +
> >>> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> >>> + return __put_user(mfn, st->user_mfn++);
> >>> + }
> >>> }
> >>>
> >>> static struct vm_operations_struct privcmd_vm_ops;
> >>>
> >>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> >>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> >>> {
> >>> int ret;
> >>> - struct privcmd_mmapbatch m;
> >>> + struct privcmd_mmapbatch_v2 m;
> >>> struct mm_struct *mm = current->mm;
> >>> struct vm_area_struct *vma;
> >>> unsigned long nr_pages;
> >>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> >>> if (!xen_initial_domain())
> >>> return -EPERM;
> >>>
> >>> - if (copy_from_user(&m, udata, sizeof(m)))
> >>> - return -EFAULT;
> >>> + switch (version) {
> >>> + case 1:
> >>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> >>> + return -EFAULT;
> >>> + /* Returns per-frame error in m.arr. */
> >>> + m.err = NULL;
> >>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> >>> + return -EFAULT;
> >>> + break;
> >>> + case 2:
> >>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> >>> + return -EFAULT;
> >>> + /* Returns per-frame error code in m.err. */
> >>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> >>> + return -EFAULT;
> >>> + break;
> >>> + default:
> >>> + return -EINVAL;
> >>> + }
> >>>
> >>> nr_pages = m.num;
> >>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> >>> return -EINVAL;
> >>>
> >>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> >>> - m.arr);
> >>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> >>>
> >>> if (ret || list_empty(&pagelist))
> >>> goto out;
> >>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> >>>
> >>> up_write(&mm->mmap_sem);
> >>>
> >>> - if (state.err > 0) {
> >>> - state.user = m.arr;
> >>> + if (state.err) {
> >>> + state.err = 0;
> >>> + state.user_mfn = (xen_pfn_t *)m.arr;
> >>> + state.user_err = m.err;
> >>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> >>> - &pagelist,
> >>> - mmap_return_errors, &state);
> >>> - }
> >>> + &pagelist,
> >>> + mmap_return_errors, &state);
> >>> + if (ret >= 0)
> >>> + ret = state.err;
> >>> + } else if (m.err)
> >>> + __clear_user(m.err, m.num * sizeof(*m.err));
> >>>
> >>> out:
> >>> free_page_list(&pagelist);
> >>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> >>> break;
> >>>
> >>> case IOCTL_PRIVCMD_MMAPBATCH:
> >>> - ret = privcmd_ioctl_mmap_batch(udata);
> >>> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> >>> + break;
> >>> +
> >>> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> >>> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> >>> break;
> >>>
> >>> default:
> >>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> >>> index 17857fb..f60d75c 100644
> >>> --- a/include/xen/privcmd.h
> >>> +++ b/include/xen/privcmd.h
> >>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> >>> int num; /* number of pages to populate */
> >>> domid_t dom; /* target domain */
> >>> __u64 addr; /* virtual address */
> >>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> >>> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> >>> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> >>> +};
> >>> +
> >>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> >>> +
> >>> +struct privcmd_mmapbatch_v2 {
> >>> + unsigned int num; /* number of pages to populate */
> >>> + domid_t dom; /* target domain */
> >>> + __u64 addr; /* virtual address */
> >>> + const xen_pfn_t __user *arr; /* array of mfns */
> >>> + int __user *err; /* array of error codes */
> >>> };
> >>>
> >>> /*
> >>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> >>> * @arg: &privcmd_hypercall_t
> >>> * Return: Value returned from execution of the specified hypercall.
> >>> + *
> >>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> >>> + * @arg: &struct privcmd_mmapbatch_v2
> >>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> >>> + * each frame). On an error other than a failed frame remap, -1 is
> >>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> >>> + * if the operation was otherwise successful but any frame failed with
> >>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> >>> */
> >>> #define IOCTL_PRIVCMD_HYPERCALL \
> >>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> >>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> >>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> >>> #define IOCTL_PRIVCMD_MMAPBATCH \
> >>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> >>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> >>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> >>>
> >>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> >>> --
> >>> 1.7.2.5
> >>>

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


andreslc at gridcentric

Sep 6, 2012, 6:41 AM

Post #18 of 19 (232 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Sep 5, 2012, at 1:40 PM, Konrad Rzeszutek Wilk wrote:

> On Wed, Sep 05, 2012 at 01:09:32PM -0400, Andres Lagar-Cavilla wrote:
>> Super. To which branch are you applying these now? (n00b question but have to ask)
>
> They will show up on stable/for-linus-3.7 once the overnight tests pass.

I would be very surprised if this passed your nighties. This is because the following hunk is necessary:

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 5386f20..e4dfa3b 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -389,7 +389,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.err = err_array;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors_v1, &state);
- } else
+ } else if (version == 2)
ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));

/* If we have not had any EFAULT-like global errors then set the global

I can either resubmit the original patch with this squashed in, or send this stand-alone. Nightlies may have passed if your libxc never exercises v1 in favor of v2. But this is broken for v1 as it will unconditionally attempt a copy to user on a NULL target and this set rc to EFAULT.

Andres

>
>> Andres
>> On Sep 5, 2012, at 12:21 PM, Konrad Rzeszutek Wilk wrote:
>>
>>> On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
>>>> Re-spin of alternative patch after David's feedback.
>>>> Thanks
>>>> Andres
>>>
>>> applied. fixed some whitespace issues.
>>>>
>>>> commit ab351a5cef1797935b083c2f6e72800a8949c515
>>>> Author: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>>>> Date: Thu Aug 30 12:23:33 2012 -0400
>>>>
>>>> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>>>>
>>>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>>> field for reporting the error code for every frame that could not be
>>>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>>>
>>>> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
>>>> in the mfn array.
>>>>
>>>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
>>>> Signed-off-by: Andres Lagar-Cavilla <andres [at] lagarcavilla>
>>>>
>>>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>>>> index 85226cb..5386f20 100644
>>>> --- a/drivers/xen/privcmd.c
>>>> +++ b/drivers/xen/privcmd.c
>>>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>>>> */
>>>> static int gather_array(struct list_head *pagelist,
>>>> unsigned nelem, size_t size,
>>>> - void __user *data)
>>>> + const void __user *data)
>>>> {
>>>> unsigned pageidx;
>>>> void *pagedata;
>>>> @@ -246,61 +246,117 @@ struct mmap_batch_state {
>>>> domid_t domain;
>>>> unsigned long va;
>>>> struct vm_area_struct *vma;
>>>> - int err;
>>>> -
>>>> - xen_pfn_t __user *user;
>>>> + /* A tristate:
>>>> + * 0 for no errors
>>>> + * 1 if at least one error has happened (and no
>>>> + * -ENOENT errors have happened)
>>>> + * -ENOENT if at least 1 -ENOENT has happened.
>>>> + */
>>>> + int global_error;
>>>> + /* An array for individual errors */
>>>> + int *err;
>>>> +
>>>> + /* User-space mfn array to store errors in the second pass for V1. */
>>>> + xen_pfn_t __user *user_mfn;
>>>> };
>>>>
>>>> static int mmap_batch_fn(void *data, void *state)
>>>> {
>>>> xen_pfn_t *mfnp = data;
>>>> struct mmap_batch_state *st = state;
>>>> + int ret;
>>>>
>>>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>>> - st->vma->vm_page_prot, st->domain) < 0) {
>>>> - *mfnp |= 0xf0000000U;
>>>> - st->err++;
>>>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>>> + st->vma->vm_page_prot, st->domain);
>>>> +
>>>> + /* Store error code for second pass. */
>>>> + *(st->err++) = ret;
>>>> +
>>>> + /* And see if it affects the global_error. */
>>>> + if (ret < 0) {
>>>> + if (ret == -ENOENT)
>>>> + st->global_error = -ENOENT;
>>>> + else {
>>>> + /* Record that at least one error has happened. */
>>>> + if (st->global_error == 0)
>>>> + st->global_error = 1;
>>>> + }
>>>> }
>>>> st->va += PAGE_SIZE;
>>>>
>>>> return 0;
>>>> }
>>>>
>>>> -static int mmap_return_errors(void *data, void *state)
>>>> +static int mmap_return_errors_v1(void *data, void *state)
>>>> {
>>>> xen_pfn_t *mfnp = data;
>>>> struct mmap_batch_state *st = state;
>>>> -
>>>> - return put_user(*mfnp, st->user++);
>>>> + int err = *(st->err++);
>>>> +
>>>> + /*
>>>> + * V1 encodes the error codes in the 32bit top nibble of the
>>>> + * mfn (with its known limitations vis-a-vis 64 bit callers).
>>>> + */
>>>> + *mfnp |= (err == -ENOENT) ?
>>>> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
>>>> + PRIVCMD_MMAPBATCH_MFN_ERROR;
>>>> + return __put_user(*mfnp, st->user_mfn++);
>>>> }
>>>>
>>>> static struct vm_operations_struct privcmd_vm_ops;
>>>>
>>>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>>>> {
>>>> int ret;
>>>> - struct privcmd_mmapbatch m;
>>>> + struct privcmd_mmapbatch_v2 m;
>>>> struct mm_struct *mm = current->mm;
>>>> struct vm_area_struct *vma;
>>>> unsigned long nr_pages;
>>>> LIST_HEAD(pagelist);
>>>> + int *err_array = NULL;
>>>> struct mmap_batch_state state;
>>>>
>>>> if (!xen_initial_domain())
>>>> return -EPERM;
>>>>
>>>> - if (copy_from_user(&m, udata, sizeof(m)))
>>>> - return -EFAULT;
>>>> + switch (version) {
>>>> + case 1:
>>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>>>> + return -EFAULT;
>>>> + /* Returns per-frame error in m.arr. */
>>>> + m.err = NULL;
>>>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>>>> + return -EFAULT;
>>>> + break;
>>>> + case 2:
>>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
>>>> + return -EFAULT;
>>>> + /* Returns per-frame error code in m.err. */
>>>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>>>> + return -EFAULT;
>>>> + break;
>>>> + default:
>>>> + return -EINVAL;
>>>> + }
>>>>
>>>> nr_pages = m.num;
>>>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>>>> return -EINVAL;
>>>>
>>>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>>>> - m.arr);
>>>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>>>> +
>>>> + if (ret)
>>>> + goto out;
>>>> + if (list_empty(&pagelist)) {
>>>> + ret = -EINVAL;
>>>> + goto out;
>>>> + }
>>>>
>>>> - if (ret || list_empty(&pagelist))
>>>> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
>>>> + if (err_array == NULL) {
>>>> + ret = -ENOMEM;
>>>> goto out;
>>>> + }
>>>>
>>>> down_write(&mm->mmap_sem);
>>>>
>>>> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>> goto out;
>>>> }
>>>>
>>>> - state.domain = m.dom;
>>>> - state.vma = vma;
>>>> - state.va = m.addr;
>>>> - state.err = 0;
>>>> + state.domain = m.dom;
>>>> + state.vma = vma;
>>>> + state.va = m.addr;
>>>> + state.global_error = 0;
>>>> + state.err = err_array;
>>>>
>>>> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>>> - &pagelist, mmap_batch_fn, &state);
>>>> + /* mmap_batch_fn guarantees ret == 0 */
>>>> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
>>>> + &pagelist, mmap_batch_fn, &state));
>>>>
>>>> up_write(&mm->mmap_sem);
>>>>
>>>> - if (state.err > 0) {
>>>> - state.user = m.arr;
>>>> + if (state.global_error && (version == 1)) {
>>>> + /* Write back errors in second pass. */
>>>> + state.user_mfn = (xen_pfn_t *)m.arr;
>>>> + state.err = err_array;
>>>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>>> - &pagelist,
>>>> - mmap_return_errors, &state);
>>>> - }
>>>> + &pagelist, mmap_return_errors_v1, &state);
>>>> + } else
>>>> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
>>>> +
>>>> + /* If we have not had any EFAULT-like global errors then set the global
>>>> + * error to -ENOENT if necessary. */
>>>> + if ((ret == 0) && (state.global_error == -ENOENT))
>>>> + ret = -ENOENT;
>>>>
>>>> out:
>>>> + kfree(err_array);
>>>> free_page_list(&pagelist);
>>>>
>>>> return ret;
>>>> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
>>>> break;
>>>>
>>>> case IOCTL_PRIVCMD_MMAPBATCH:
>>>> - ret = privcmd_ioctl_mmap_batch(udata);
>>>> + ret = privcmd_ioctl_mmap_batch(udata, 1);
>>>> + break;
>>>> +
>>>> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
>>>> + ret = privcmd_ioctl_mmap_batch(udata, 2);
>>>> break;
>>>>
>>>> default:
>>>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>>>> index 45c1aa1..a853168 100644
>>>> --- a/include/xen/privcmd.h
>>>> +++ b/include/xen/privcmd.h
>>>> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
>>>> int num; /* number of pages to populate */
>>>> domid_t dom; /* target domain */
>>>> __u64 addr; /* virtual address */
>>>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>>>> + xen_pfn_t __user *arr; /* array of mfns - or'd with
>>>> + PRIVCMD_MMAPBATCH_*_ERROR on err */
>>>> +};
>>>> +
>>>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
>>>> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
>>>> +
>>>> +struct privcmd_mmapbatch_v2 {
>>>> + unsigned int num; /* number of pages to populate */
>>>> + domid_t dom; /* target domain */
>>>> + __u64 addr; /* virtual address */
>>>> + const xen_pfn_t __user *arr; /* array of mfns */
>>>> + int __user *err; /* array of error codes */
>>>> };
>>>>
>>>> /*
>>>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
>>>> * @arg: &privcmd_hypercall_t
>>>> * Return: Value returned from execution of the specified hypercall.
>>>> + *
>>>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>>>> + * @arg: &struct privcmd_mmapbatch_v2
>>>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>>>> + * each frame). On an error other than a failed frame remap, -1 is
>>>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
>>>> + * if the operation was otherwise successful but any frame failed with
>>>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>>>> */
>>>> #define IOCTL_PRIVCMD_HYPERCALL \
>>>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>>>> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
>>>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>>>> #define IOCTL_PRIVCMD_MMAPBATCH \
>>>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>>>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
>>>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>>>>
>>>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>>>>
>>>> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
>>>>
>>>>> From: David Vrabel <david.vrabel [at] citrix>
>>>>>
>>>>> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
>>>>> field for reporting the error code for every frame that could not be
>>>>> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>>>>>
>>>>> Signed-off-by: David Vrabel <david.vrabel [at] citrix>
>>>>> ---
>>>>> drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
>>>>> include/xen/privcmd.h | 23 +++++++++++-
>>>>> 2 files changed, 102 insertions(+), 20 deletions(-)
>>>>>
>>>>> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
>>>>> index ccee0f1..c0e89e7 100644
>>>>> --- a/drivers/xen/privcmd.c
>>>>> +++ b/drivers/xen/privcmd.c
>>>>> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>>>>> */
>>>>> static int gather_array(struct list_head *pagelist,
>>>>> unsigned nelem, size_t size,
>>>>> - void __user *data)
>>>>> + const void __user *data)
>>>>> {
>>>>> unsigned pageidx;
>>>>> void *pagedata;
>>>>> @@ -248,18 +248,37 @@ struct mmap_batch_state {
>>>>> struct vm_area_struct *vma;
>>>>> int err;
>>>>>
>>>>> - xen_pfn_t __user *user;
>>>>> + xen_pfn_t __user *user_mfn;
>>>>> + int __user *user_err;
>>>>> };
>>>>>
>>>>> static int mmap_batch_fn(void *data, void *state)
>>>>> {
>>>>> xen_pfn_t *mfnp = data;
>>>>> struct mmap_batch_state *st = state;
>>>>> + int ret;
>>>>>
>>>>> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>>>> - st->vma->vm_page_prot, st->domain) < 0) {
>>>>> - *mfnp |= 0xf0000000U;
>>>>> - st->err++;
>>>>> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
>>>>> + st->vma->vm_page_prot, st->domain);
>>>>> + if (ret < 0) {
>>>>> + /*
>>>>> + * Error reporting is a mess but userspace relies on
>>>>> + * it behaving this way.
>>>>> + *
>>>>> + * V2 needs to a) return the result of each frame's
>>>>> + * remap; and b) return -ENOENT if any frame failed
>>>>> + * with -ENOENT.
>>>>> + *
>>>>> + * In this first pass the error code is saved by
>>>>> + * overwriting the mfn and an error is indicated in
>>>>> + * st->err.
>>>>> + *
>>>>> + * The second pass by mmap_return_errors() will write
>>>>> + * the error codes to user space and get the right
>>>>> + * ioctl return value.
>>>>> + */
>>>>> + *(int *)mfnp = ret;
>>>>> + st->err = ret;
>>>>> }
>>>>> st->va += PAGE_SIZE;
>>>>>
>>>>> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
>>>>> {
>>>>> xen_pfn_t *mfnp = data;
>>>>> struct mmap_batch_state *st = state;
>>>>> + int ret;
>>>>> +
>>>>> + if (st->user_err) {
>>>>> + int err = *(int *)mfnp;
>>>>> +
>>>>> + if (err == -ENOENT)
>>>>> + st->err = err;
>>>>>
>>>>> - return put_user(*mfnp, st->user++);
>>>>> + return __put_user(err, st->user_err++);
>>>>> + } else {
>>>>> + xen_pfn_t mfn;
>>>>> +
>>>>> + ret = __get_user(mfn, st->user_mfn);
>>>>> + if (ret < 0)
>>>>> + return ret;
>>>>> +
>>>>> + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
>>>>> + return __put_user(mfn, st->user_mfn++);
>>>>> + }
>>>>> }
>>>>>
>>>>> static struct vm_operations_struct privcmd_vm_ops;
>>>>>
>>>>> -static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>>> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
>>>>> {
>>>>> int ret;
>>>>> - struct privcmd_mmapbatch m;
>>>>> + struct privcmd_mmapbatch_v2 m;
>>>>> struct mm_struct *mm = current->mm;
>>>>> struct vm_area_struct *vma;
>>>>> unsigned long nr_pages;
>>>>> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>>> if (!xen_initial_domain())
>>>>> return -EPERM;
>>>>>
>>>>> - if (copy_from_user(&m, udata, sizeof(m)))
>>>>> - return -EFAULT;
>>>>> + switch (version) {
>>>>> + case 1:
>>>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
>>>>> + return -EFAULT;
>>>>> + /* Returns per-frame error in m.arr. */
>>>>> + m.err = NULL;
>>>>> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
>>>>> + return -EFAULT;
>>>>> + break;
>>>>> + case 2:
>>>>> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
>>>>> + return -EFAULT;
>>>>> + /* Returns per-frame error code in m.err. */
>>>>> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
>>>>> + return -EFAULT;
>>>>> + break;
>>>>> + default:
>>>>> + return -EINVAL;
>>>>> + }
>>>>>
>>>>> nr_pages = m.num;
>>>>> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
>>>>> return -EINVAL;
>>>>>
>>>>> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
>>>>> - m.arr);
>>>>> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
>>>>>
>>>>> if (ret || list_empty(&pagelist))
>>>>> goto out;
>>>>> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
>>>>>
>>>>> up_write(&mm->mmap_sem);
>>>>>
>>>>> - if (state.err > 0) {
>>>>> - state.user = m.arr;
>>>>> + if (state.err) {
>>>>> + state.err = 0;
>>>>> + state.user_mfn = (xen_pfn_t *)m.arr;
>>>>> + state.user_err = m.err;
>>>>> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
>>>>> - &pagelist,
>>>>> - mmap_return_errors, &state);
>>>>> - }
>>>>> + &pagelist,
>>>>> + mmap_return_errors, &state);
>>>>> + if (ret >= 0)
>>>>> + ret = state.err;
>>>>> + } else if (m.err)
>>>>> + __clear_user(m.err, m.num * sizeof(*m.err));
>>>>>
>>>>> out:
>>>>> free_page_list(&pagelist);
>>>>> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
>>>>> break;
>>>>>
>>>>> case IOCTL_PRIVCMD_MMAPBATCH:
>>>>> - ret = privcmd_ioctl_mmap_batch(udata);
>>>>> + ret = privcmd_ioctl_mmap_batch(udata, 1);
>>>>> + break;
>>>>> +
>>>>> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
>>>>> + ret = privcmd_ioctl_mmap_batch(udata, 2);
>>>>> break;
>>>>>
>>>>> default:
>>>>> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
>>>>> index 17857fb..f60d75c 100644
>>>>> --- a/include/xen/privcmd.h
>>>>> +++ b/include/xen/privcmd.h
>>>>> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
>>>>> int num; /* number of pages to populate */
>>>>> domid_t dom; /* target domain */
>>>>> __u64 addr; /* virtual address */
>>>>> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
>>>>> + xen_pfn_t __user *arr; /* array of mfns - or'd with
>>>>> + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
>>>>> +};
>>>>> +
>>>>> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
>>>>> +
>>>>> +struct privcmd_mmapbatch_v2 {
>>>>> + unsigned int num; /* number of pages to populate */
>>>>> + domid_t dom; /* target domain */
>>>>> + __u64 addr; /* virtual address */
>>>>> + const xen_pfn_t __user *arr; /* array of mfns */
>>>>> + int __user *err; /* array of error codes */
>>>>> };
>>>>>
>>>>> /*
>>>>> * @cmd: IOCTL_PRIVCMD_HYPERCALL
>>>>> * @arg: &privcmd_hypercall_t
>>>>> * Return: Value returned from execution of the specified hypercall.
>>>>> + *
>>>>> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
>>>>> + * @arg: &struct privcmd_mmapbatch_v2
>>>>> + * Return: 0 on success (i.e., arg->err contains valid error codes for
>>>>> + * each frame). On an error other than a failed frame remap, -1 is
>>>>> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
>>>>> + * if the operation was otherwise successful but any frame failed with
>>>>> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>>>>> */
>>>>> #define IOCTL_PRIVCMD_HYPERCALL \
>>>>> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
>>>>> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
>>>>> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
>>>>> #define IOCTL_PRIVCMD_MMAPBATCH \
>>>>> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
>>>>> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
>>>>> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>>>>>
>>>>> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>>>>> --
>>>>> 1.7.2.5
>>>>>


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


konrad.wilk at oracle

Sep 6, 2012, 9:20 AM

Post #19 of 19 (229 views)
Permalink
Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl [In reply to]

On Thu, Sep 06, 2012 at 09:41:44AM -0400, Andres Lagar-Cavilla wrote:
> On Sep 5, 2012, at 1:40 PM, Konrad Rzeszutek Wilk wrote:
>
> > On Wed, Sep 05, 2012 at 01:09:32PM -0400, Andres Lagar-Cavilla wrote:
> >> Super. To which branch are you applying these now? (n00b question but have to ask)
> >
> > They will show up on stable/for-linus-3.7 once the overnight tests pass.
>
> I would be very surprised if this passed your nighties. This is because the following hunk is necessary:

It did :-) I guess the Xen 4.1 isn't using this that much.
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 5386f20..e4dfa3b 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -389,7 +389,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> state.err = err_array;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> &pagelist, mmap_return_errors_v1, &state);
> - } else
> + } else if (version == 2)
> ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
>
> /* If we have not had any EFAULT-like global errors then set the global
>
> I can either resubmit the original patch with this squashed in, or send this stand-alone. Nightlies may have passed if your libxc never exercises v1 in favor of v2. But this is broken for v1 as it will unconditionally attempt a copy to user on a NULL target and this set rc to EFAULT.
>

Send it stand alone pls.

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel

Xen devel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.