Login | Register For Free | Help
Search for: (Advanced)

Mailing List Archive: Xen: Devel

[PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

 

 

First page Previous page 1 2 Next page Last page  View All Xen devel RSS feed   Index | Next | Previous | View Threaded


stefano.stabellini at eu

Feb 23, 2012, 9:48 AM

Post #1 of 36 (462 views)
Permalink
[PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

We need a register to pass the hypercall number because we might not
know it at compile time and HVC only takes an immediate argument.

Among the available registers r12 seems to be the best choice because it
is defined as "intra-procedure call scratch register".

Use the ISS to pass an hypervisor specific tag.

Signed-off-by: Stefano Stabellini <stefano.stabellini [at] eu>
CC: kvm [at] vger
---
arch/arm/include/asm/xen/hypercall.h | 87 +++++++++++++++++++---------------
1 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h
index 404e63f0..04eba1c 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -33,13 +33,17 @@
#ifndef _ASM_ARM_XEN_HYPERCALL_H
#define _ASM_ARM_XEN_HYPERCALL_H

-#define __HVC_IMM(name) "( " #name " & 0xf) + " \
- "((" #name " << 4) & 0xfff00)"
+#include <xen/interface/xen.h>
+#include <asm/errno.h>

-#define ____HYPERCALL(name) ".word 0xe1400070 + " __HVC_IMM(name)
-#define __HYPERCALL(name) ____HYPERCALL(__HYPERVISOR_##name)
+#define XEN_HYPERCALL_TAG "0XEA1"
+
+#define __HVC_IMM(tag) "( " tag " & 0xf) + " \
+ "((" tag " << 4) & 0xfff00)"
+#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG)

#define __HYPERCALL_RETREG "r0"
+#define __HYPERCALL_NUMBER "r12"
#define __HYPERCALL_ARG1REG "r0"
#define __HYPERCALL_ARG2REG "r1"
#define __HYPERCALL_ARG3REG "r2"
@@ -48,30 +52,32 @@

#define __HYPERCALL_DECLS \
register unsigned long __res asm(__HYPERCALL_RETREG); \
+ register unsigned long __num asm(__HYPERCALL_NUMBER) = __num; \
register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;

-#define __HYPERCALL_0PARAM "=r" (__res)
+#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__num)
#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
#define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4)
#define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5)

-#define __HYPERCALL_0ARG()
-#define __HYPERCALL_1ARG(a1) \
- __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1);
-#define __HYPERCALL_2ARG(a1,a2) \
- __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2);
-#define __HYPERCALL_3ARG(a1,a2,a3) \
- __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3);
-#define __HYPERCALL_4ARG(a1,a2,a3,a4) \
- __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4);
-#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \
- __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5);
+#define __HYPERCALL_0ARG(hypercall) \
+ __num = (unsigned long)hypercall;
+#define __HYPERCALL_1ARG(hypercall,a1) \
+ __HYPERCALL_0ARG(hypercall) __arg1 = (unsigned long)(a1);
+#define __HYPERCALL_2ARG(hypercall,a1,a2) \
+ __HYPERCALL_1ARG(hypercall,a1) __arg2 = (unsigned long)(a2);
+#define __HYPERCALL_3ARG(hypercall,a1,a2,a3) \
+ __HYPERCALL_2ARG(hypercall,a1,a2) __arg3 = (unsigned long)(a3);
+#define __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4) \
+ __HYPERCALL_3ARG(hypercall,a1,a2,a3) __arg4 = (unsigned long)(a4);
+#define __HYPERCALL_5ARG(hypercall,a1,a2,a3,a4,a5) \
+ __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4) __arg5 = (unsigned long)(a5);

#define __HYPERCALL_CLOBBER5 "memory"
#define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
@@ -80,102 +86,105 @@
#define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
#define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG

-#define _hypercall0(type, name) \
+#define _hypercall0(type, hypercall) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_0ARG(); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_0ARG(hypercall); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_0PARAM \
: \
: __HYPERCALL_CLOBBER0); \
(type)__res; \
})

-#define _hypercall1(type, name, a1) \
+#define _hypercall1(type, hypercall, a1) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_1ARG(a1); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_1ARG(hypercall, a1); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_1PARAM \
: \
: __HYPERCALL_CLOBBER1); \
(type)__res; \
})

-#define _hypercall2(type, name, a1, a2) \
+#define _hypercall2(type, hypercall, a1, a2) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_2ARG(a1, a2); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_2ARG(hypercall, a1, a2); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_2PARAM \
: \
: __HYPERCALL_CLOBBER2); \
(type)__res; \
})

-#define _hypercall3(type, name, a1, a2, a3) \
+#define _hypercall3(type, hypercall, a1, a2, a3) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_3ARG(a1, a2, a3); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_3ARG(hypercall, a1, a2, a3); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_3PARAM \
: \
: __HYPERCALL_CLOBBER3); \
(type)__res; \
})

-#define _hypercall4(type, name, a1, a2, a3, a4) \
+#define _hypercall4(type, hypercall, a1, a2, a3, a4) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_4ARG(a1, a2, a3, a4); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_4ARG(hypercall, a1, a2, a3, a4); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_4PARAM \
: \
: __HYPERCALL_CLOBBER4); \
(type)__res; \
})

-#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
+#define _hypercall5(type, hypercall, a1, a2, a3, a4, a5) \
({ \
__HYPERCALL_DECLS; \
- __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \
- asm volatile (__HYPERCALL(name) \
+ __HYPERCALL_5ARG(hypercall, a1, a2, a3, a4, a5); \
+ asm volatile (__HYPERCALL \
: __HYPERCALL_5PARAM \
: \
: __HYPERCALL_CLOBBER5); \
(type)__res; \
})

+#define HYPERCALL(name) \
+ (__HYPERVISOR_##name)
+
/* -- Hypercall definitions go below -- */

static inline int
HYPERVISOR_xen_version(int cmd, void *arg)
{
- return _hypercall2(int, xen_version, cmd, arg);
+ return _hypercall2(int, HYPERCALL(xen_version), cmd, arg);
}

static inline int
HYPERVISOR_console_io(int cmd, int count, char *str)
{
- return _hypercall3(int, console_io, cmd, count, str);
+ return _hypercall3(int, HYPERCALL(console_io), cmd, count, str);
}

static inline int
HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
{
- return _hypercall3(int, grant_table_op, cmd, uop, count);
+ return _hypercall3(int, HYPERCALL(grant_table_op), cmd, uop, count);
}

static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
- return _hypercall2(int, sched_op, cmd, arg);
+ return _hypercall2(int, HYPERCALL(sched_op), cmd, arg);
}

static inline int
HYPERVISOR_event_channel_op(int cmd, void *arg)
{
- return _hypercall2(int, event_channel_op, cmd, arg);
+ return _hypercall2(int, HYPERCALL(event_channel_op), cmd, arg);
}

#endif /* _ASM_ARM_XEN_HYPERCALL_H */
--
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 27, 2012, 8:27 AM

Post #2 of 36 (452 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Thu, 2012-02-23 at 17:48 +0000, Stefano Stabellini wrote:
> We need a register to pass the hypercall number because we might not
> know it at compile time and HVC only takes an immediate argument.
>
> Among the available registers r12 seems to be the best choice because it
> is defined as "intra-procedure call scratch register".

R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
immediate (which can only target r0..r7).

Since we support only ARMv7+ there are "T2" and "T3" encodings available
which do allow direct mov of an immediate into R12, but are 32 bit Thumb
instructions.

Should we use r7 instead to maximise instruction density for Thumb code?

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 27, 2012, 9:53 AM

Post #3 of 36 (452 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Thu, Feb 23, 2012 at 05:48:22PM +0000, Stefano Stabellini wrote:
> We need a register to pass the hypercall number because we might not
> know it at compile time and HVC only takes an immediate argument.
>
> Among the available registers r12 seems to be the best choice because it
> is defined as "intra-procedure call scratch register".

This would be massively simplified if you didn't try to inline the HVC.
Does it really need to be inline?

>
> Use the ISS to pass an hypervisor specific tag.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini [at] eu>
> CC: kvm [at] vger
> ---
> arch/arm/include/asm/xen/hypercall.h | 87 +++++++++++++++++++---------------
> 1 files changed, 48 insertions(+), 39 deletions(-)
>
> diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h
> index 404e63f0..04eba1c 100644
> --- a/arch/arm/include/asm/xen/hypercall.h
> +++ b/arch/arm/include/asm/xen/hypercall.h
> @@ -33,13 +33,17 @@
> #ifndef _ASM_ARM_XEN_HYPERCALL_H
> #define _ASM_ARM_XEN_HYPERCALL_H
>
> -#define __HVC_IMM(name) "( " #name " & 0xf) + " \
> - "((" #name " << 4) & 0xfff00)"
> +#include <xen/interface/xen.h>
> +#include <asm/errno.h>
>
> -#define ____HYPERCALL(name) ".word 0xe1400070 + " __HVC_IMM(name)
> -#define __HYPERCALL(name) ____HYPERCALL(__HYPERVISOR_##name)
> +#define XEN_HYPERCALL_TAG "0XEA1"
> +
> +#define __HVC_IMM(tag) "( " tag " & 0xf) + " \
> + "((" tag " << 4) & 0xfff00)"
> +#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG)

Please, do not do this. It won't work in Thumb, where the encodings are
different.

It is reasonable to expect anyone building Xen to have reasonably new
tools, you you can justifiably use

AFLAGS_thisfile.o := -Wa,-march=armv7-a+virt

in the Makefile and just use the hvc instruction directly.


Of course, this is only practical if the HVC invocation is not inlined.
If we can't avoid macro-ising HVC, we should do it globally, not locally
to the Xen code. That way we at least keep all the horror in one place.

Cheers
---Dave

>
> #define __HYPERCALL_RETREG "r0"
> +#define __HYPERCALL_NUMBER "r12"
> #define __HYPERCALL_ARG1REG "r0"
> #define __HYPERCALL_ARG2REG "r1"
> #define __HYPERCALL_ARG3REG "r2"
> @@ -48,30 +52,32 @@
>
> #define __HYPERCALL_DECLS \
> register unsigned long __res asm(__HYPERCALL_RETREG); \
> + register unsigned long __num asm(__HYPERCALL_NUMBER) = __num; \
> register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \
> register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
> register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
> register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
> register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
>
> -#define __HYPERCALL_0PARAM "=r" (__res)
> +#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__num)
> #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
> #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
> #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
> #define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4)
> #define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5)
>
> -#define __HYPERCALL_0ARG()
> -#define __HYPERCALL_1ARG(a1) \
> - __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1);
> -#define __HYPERCALL_2ARG(a1,a2) \
> - __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2);
> -#define __HYPERCALL_3ARG(a1,a2,a3) \
> - __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3);
> -#define __HYPERCALL_4ARG(a1,a2,a3,a4) \
> - __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4);
> -#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \
> - __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5);
> +#define __HYPERCALL_0ARG(hypercall) \
> + __num = (unsigned long)hypercall;
> +#define __HYPERCALL_1ARG(hypercall,a1) \
> + __HYPERCALL_0ARG(hypercall) __arg1 = (unsigned long)(a1);
> +#define __HYPERCALL_2ARG(hypercall,a1,a2) \
> + __HYPERCALL_1ARG(hypercall,a1) __arg2 = (unsigned long)(a2);
> +#define __HYPERCALL_3ARG(hypercall,a1,a2,a3) \
> + __HYPERCALL_2ARG(hypercall,a1,a2) __arg3 = (unsigned long)(a3);
> +#define __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4) \
> + __HYPERCALL_3ARG(hypercall,a1,a2,a3) __arg4 = (unsigned long)(a4);
> +#define __HYPERCALL_5ARG(hypercall,a1,a2,a3,a4,a5) \
> + __HYPERCALL_4ARG(hypercall,a1,a2,a3,a4) __arg5 = (unsigned long)(a5);
>
> #define __HYPERCALL_CLOBBER5 "memory"
> #define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG
> @@ -80,102 +86,105 @@
> #define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG
> #define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG
>
> -#define _hypercall0(type, name) \
> +#define _hypercall0(type, hypercall) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_0ARG(); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_0ARG(hypercall); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_0PARAM \
> : \
> : __HYPERCALL_CLOBBER0); \
> (type)__res; \
> })
>
> -#define _hypercall1(type, name, a1) \
> +#define _hypercall1(type, hypercall, a1) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_1ARG(a1); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_1ARG(hypercall, a1); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_1PARAM \
> : \
> : __HYPERCALL_CLOBBER1); \
> (type)__res; \
> })
>
> -#define _hypercall2(type, name, a1, a2) \
> +#define _hypercall2(type, hypercall, a1, a2) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_2ARG(a1, a2); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_2ARG(hypercall, a1, a2); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_2PARAM \
> : \
> : __HYPERCALL_CLOBBER2); \
> (type)__res; \
> })
>
> -#define _hypercall3(type, name, a1, a2, a3) \
> +#define _hypercall3(type, hypercall, a1, a2, a3) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_3ARG(a1, a2, a3); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_3ARG(hypercall, a1, a2, a3); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_3PARAM \
> : \
> : __HYPERCALL_CLOBBER3); \
> (type)__res; \
> })
>
> -#define _hypercall4(type, name, a1, a2, a3, a4) \
> +#define _hypercall4(type, hypercall, a1, a2, a3, a4) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_4ARG(a1, a2, a3, a4); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_4ARG(hypercall, a1, a2, a3, a4); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_4PARAM \
> : \
> : __HYPERCALL_CLOBBER4); \
> (type)__res; \
> })
>
> -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
> +#define _hypercall5(type, hypercall, a1, a2, a3, a4, a5) \
> ({ \
> __HYPERCALL_DECLS; \
> - __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \
> - asm volatile (__HYPERCALL(name) \
> + __HYPERCALL_5ARG(hypercall, a1, a2, a3, a4, a5); \
> + asm volatile (__HYPERCALL \
> : __HYPERCALL_5PARAM \
> : \
> : __HYPERCALL_CLOBBER5); \
> (type)__res; \
> })
>
> +#define HYPERCALL(name) \
> + (__HYPERVISOR_##name)
> +
> /* -- Hypercall definitions go below -- */
>
> static inline int
> HYPERVISOR_xen_version(int cmd, void *arg)
> {
> - return _hypercall2(int, xen_version, cmd, arg);
> + return _hypercall2(int, HYPERCALL(xen_version), cmd, arg);
> }
>
> static inline int
> HYPERVISOR_console_io(int cmd, int count, char *str)
> {
> - return _hypercall3(int, console_io, cmd, count, str);
> + return _hypercall3(int, HYPERCALL(console_io), cmd, count, str);
> }
>
> static inline int
> HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
> {
> - return _hypercall3(int, grant_table_op, cmd, uop, count);
> + return _hypercall3(int, HYPERCALL(grant_table_op), cmd, uop, count);
> }
>
> static inline int
> HYPERVISOR_sched_op(int cmd, void *arg)
> {
> - return _hypercall2(int, sched_op, cmd, arg);
> + return _hypercall2(int, HYPERCALL(sched_op), cmd, arg);
> }
>
> static inline int
> HYPERVISOR_event_channel_op(int cmd, void *arg)
> {
> - return _hypercall2(int, event_channel_op, cmd, arg);
> + return _hypercall2(int, HYPERCALL(event_channel_op), cmd, arg);
> }
>
> #endif /* _ASM_ARM_XEN_HYPERCALL_H */
> --
> 1.7.2.5
>
>
> _______________________________________________
> linaro-dev mailing list
> linaro-dev [at] lists
> http://lists.linaro.org/mailman/listinfo/linaro-dev

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 27, 2012, 10:03 AM

Post #4 of 36 (458 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, Feb 27, 2012 at 04:27:23PM +0000, Ian Campbell wrote:
> On Thu, 2012-02-23 at 17:48 +0000, Stefano Stabellini wrote:
> > We need a register to pass the hypercall number because we might not
> > know it at compile time and HVC only takes an immediate argument.
> >
> > Among the available registers r12 seems to be the best choice because it
> > is defined as "intra-procedure call scratch register".
>
> R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> immediate (which can only target r0..r7).

This is untrue. The important instructions, like MOV Rd, Rn can access
all the regs. But anyway, there is no such thing as a Thumb-1 kernel,
so we won't really care.

> Since we support only ARMv7+ there are "T2" and "T3" encodings available
> which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> instructions.
>
> Should we use r7 instead to maximise instruction density for Thumb code?

The difference seems trivial when put into context, even if you code a
special Thumb version of the code to maximise density (the Thumb-2 code
which gets built from assembler in the kernel is very suboptimal in
size, but there simply isn't a high proportion of asm code in the kernel
anyway.) I wouldn't consider the ARM/Thumb differences as an important
factor when deciding on a register.

One argument for _not_ using r12 for this purpose is that it is then
harder to put a generic "HVC" function (analogous to the "syscall"
syscall) out-of-line, since r12 could get destroyed by the call.

If you don't think you will ever care about putting HVC out of line
though, it may not matter.

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 27, 2012, 11:33 AM

Post #5 of 36 (454 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, 2012-02-27 at 18:03 +0000, Dave Martin wrote:
> On Mon, Feb 27, 2012 at 04:27:23PM +0000, Ian Campbell wrote:
> > On Thu, 2012-02-23 at 17:48 +0000, Stefano Stabellini wrote:
> > > We need a register to pass the hypercall number because we might not
> > > know it at compile time and HVC only takes an immediate argument.
> > >
> > > Among the available registers r12 seems to be the best choice because it
> > > is defined as "intra-procedure call scratch register".
> >
> > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> > immediate (which can only target r0..r7).
>
> This is untrue. The important instructions, like MOV Rd, Rn can access
> all the regs. But anyway, there is no such thing as a Thumb-1 kernel,
> so we won't really care.

I did say "mov immediate", which is the one which matters when loading a
constant hypercall number (the common case). AFAIK the "mov Rd, #imm" T1
encoding cannot access all registers.

The "mov rd,rn" form only helps for syscall(2) like functions, which are
unusual, at least for Xen, although as Stefano says, they do exist.

> > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > instructions.
> >
> > Should we use r7 instead to maximise instruction density for Thumb code?
>
> The difference seems trivial when put into context, even if you code a
> special Thumb version of the code to maximise density (the Thumb-2 code
> which gets built from assembler in the kernel is very suboptimal in
> size, but there simply isn't a high proportion of asm code in the kernel
> anyway.) I wouldn't consider the ARM/Thumb differences as an important
> factor when deciding on a register.

OK, that's useful information. thanks.

> One argument for _not_ using r12 for this purpose is that it is then
> harder to put a generic "HVC" function (analogous to the "syscall"
> syscall) out-of-line, since r12 could get destroyed by the call.

For an out of line syscall(2) wouldn't the syscall number either be in a
standard C calling convention argument register or on the stack when the
function was called, since it is just a normal argument at that point?
As you point out it cannot be passed in r12 (and could never be, due to
the clobbering).

The syscall function itself would have to move the arguments and syscall
nr etc around before issuing the syscall.

I think the same is true of a similar hypercall(2)

> If you don't think you will ever care about putting HVC out of line
> though, it may not matter.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 27, 2012, 11:48 AM

Post #6 of 36 (456 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, 2012-02-27 at 17:53 +0000, Dave Martin wrote:
> On Thu, Feb 23, 2012 at 05:48:22PM +0000, Stefano Stabellini wrote:
> > We need a register to pass the hypercall number because we might not
> > know it at compile time and HVC only takes an immediate argument.
> >
> > Among the available registers r12 seems to be the best choice because it
> > is defined as "intra-procedure call scratch register".
>
> This would be massively simplified if you didn't try to inline the HVC.
> Does it really need to be inline?
>
> > +#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG)
>
> Please, do not do this. It won't work in Thumb, where the encodings are
> different.
>
> It is reasonable to expect anyone building Xen to have reasonably new
> tools, you you can justifiably use
>
> AFLAGS_thisfile.o := -Wa,-march=armv7-a+virt
>
> in the Makefile and just use the hvc instruction directly.

Our aim is for guest kernel binaries not to be specific to Xen -- i.e.
they should be able to run on baremetal and other hypervisors as well.
The differences should only be in the device-tree passed to the kernel.

> Of course, this is only practical if the HVC invocation is not inlined.

I suppose we could make the stub functions out of line, we just copied
what Xen does on x86.

The only thing which springs to mind is that 5 argument hypercalls will
end up pushing the fifth argument to the stack only to pop it back into
r4 for the hypercall and IIRC it also needs to preserve r4 (callee saved
reg) which is going to involve some small amount of code to move stuff
around too.

So by inlining the functions we avoid some thunking because the compiler
would know exactly what was happening at the hypercall site.

We don't currently have any 6 argument hypercalls but the same would
extend there.

> If we can't avoid macro-ising HVC, we should do it globally, not locally
> to the Xen code. That way we at least keep all the horror in one place.

That sounds like a good idea to me.

Given that Stefano is proposing to make the ISS a (per-hypervisor)
constant we could consider just defining the Thumb and non-Thumb
constants instead of doing all the construction with the __HVC_IMM stuff
-- that would remove a big bit of the macroization.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


peter.maydell at linaro

Feb 27, 2012, 1:05 PM

Post #7 of 36 (456 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On 27 February 2012 16:27, Ian Campbell <Ian.Campbell [at] citrix> wrote:
> R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> immediate (which can only target r0..r7).
>
> Since we support only ARMv7+ there are "T2" and "T3" encodings available
> which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> instructions.
>
> Should we use r7 instead to maximise instruction density for Thumb code?

r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
makes it worth avoiding in this context.

-- PMM

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 28, 2012, 1:46 AM

Post #8 of 36 (453 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, Feb 27, 2012 at 07:48:45PM +0000, Ian Campbell wrote:
> On Mon, 2012-02-27 at 17:53 +0000, Dave Martin wrote:
> > On Thu, Feb 23, 2012 at 05:48:22PM +0000, Stefano Stabellini wrote:
> > > We need a register to pass the hypercall number because we might not
> > > know it at compile time and HVC only takes an immediate argument.
> > >
> > > Among the available registers r12 seems to be the best choice because it
> > > is defined as "intra-procedure call scratch register".
> >
> > This would be massively simplified if you didn't try to inline the HVC.
> > Does it really need to be inline?
> >
> > > +#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG)
> >
> > Please, do not do this. It won't work in Thumb, where the encodings are
> > different.
> >
> > It is reasonable to expect anyone building Xen to have reasonably new
> > tools, you you can justifiably use
> >
> > AFLAGS_thisfile.o := -Wa,-march=armv7-a+virt
> >
> > in the Makefile and just use the hvc instruction directly.
>
> Our aim is for guest kernel binaries not to be specific to Xen -- i.e.
> they should be able to run on baremetal and other hypervisors as well.
> The differences should only be in the device-tree passed to the kernel.
>
> > Of course, this is only practical if the HVC invocation is not inlined.
>
> I suppose we could make the stub functions out of line, we just copied
> what Xen does on x86.
>
> The only thing which springs to mind is that 5 argument hypercalls will
> end up pushing the fifth argument to the stack only to pop it back into
> r4 for the hypercall and IIRC it also needs to preserve r4 (callee saved
> reg) which is going to involve some small amount of code to move stuff
> around too.
>
> So by inlining the functions we avoid some thunking because the compiler
> would know exactly what was happening at the hypercall site.

True ...

>
> We don't currently have any 6 argument hypercalls but the same would
> extend there.
>
> > If we can't avoid macro-ising HVC, we should do it globally, not locally
> > to the Xen code. That way we at least keep all the horror in one place.
>
> That sounds like a good idea to me.
>
> Given that Stefano is proposing to make the ISS a (per-hypervisor)
> constant we could consider just defining the Thumb and non-Thumb
> constants instead of doing all the construction with the __HVC_IMM stuff
> -- that would remove a big bit of the macroization.

It's not quite as simple as that -- emitting instructions using data
directives is not endianness safe, and even in the cases where .long gives
the right result for ARM, it gives the wrong result for 32-bit Thumb
instructions if the opcode is given in human-readable order.

I was trying to solve the same problem for the kvm guys with some global
macros -- I'm aiming to get a patch posted soon, so I'll make sure
you're on CC.

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 28, 2012, 2:07 AM

Post #9 of 36 (454 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Tue, 2012-02-28 at 09:46 +0000, Dave Martin wrote:
> On Mon, Feb 27, 2012 at 07:48:45PM +0000, Ian Campbell wrote:
> > Given that Stefano is proposing to make the ISS a (per-hypervisor)
> > constant we could consider just defining the Thumb and non-Thumb
> > constants instead of doing all the construction with the __HVC_IMM stuff
> > -- that would remove a big bit of the macroization.
>
> It's not quite as simple as that -- emitting instructions using data
> directives is not endianness safe, and even in the cases where .long gives
> the right result for ARM, it gives the wrong result for 32-bit Thumb
> instructions if the opcode is given in human-readable order.

Urk, yes,..

> I was trying to solve the same problem for the kvm guys with some global
> macros -- I'm aiming to get a patch posted soon, so I'll make sure
> you're on CC.

Awesome, thanks!

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 28, 2012, 2:12 AM

Post #10 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, 2012-02-27 at 21:05 +0000, Peter Maydell wrote:
> On 27 February 2012 16:27, Ian Campbell <Ian.Campbell [at] citrix> wrote:
> > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> > immediate (which can only target r0..r7).
> >
> > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > instructions.
> >
> > Should we use r7 instead to maximise instruction density for Thumb code?
>
> r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> makes it worth avoiding in this context.

I think it does.

It actually sounds as if using r12 is fine here, the impact on code
density should be pretty small -- there aren't really all that many call
sites which involve hypercalls.

By way of an example I measured an x86 kernel which should be using more
hypercalls due to pv paging etc and found that 0.014% of the lines in
"objdump -d" contained a call to the hypercall_page. (I know not all
lines of objdump -d output are instructions but it's a reasonable approx
IMHO).

So I think using 3 16 bit instructions slots instead of 2 won't make
much impact in practice.

Thanks,
Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 28, 2012, 2:20 AM

Post #11 of 36 (453 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Mon, Feb 27, 2012 at 07:33:39PM +0000, Ian Campbell wrote:
> On Mon, 2012-02-27 at 18:03 +0000, Dave Martin wrote:
> > On Mon, Feb 27, 2012 at 04:27:23PM +0000, Ian Campbell wrote:
> > > On Thu, 2012-02-23 at 17:48 +0000, Stefano Stabellini wrote:
> > > > We need a register to pass the hypercall number because we might not
> > > > know it at compile time and HVC only takes an immediate argument.
> > > >
> > > > Among the available registers r12 seems to be the best choice because it
> > > > is defined as "intra-procedure call scratch register".
> > >
> > > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> > > immediate (which can only target r0..r7).
> >
> > This is untrue. The important instructions, like MOV Rd, Rn can access
> > all the regs. But anyway, there is no such thing as a Thumb-1 kernel,
> > so we won't really care.
>
> I did say "mov immediate", which is the one which matters when loading a
> constant hypercall number (the common case). AFAIK the "mov Rd, #imm" T1
> encoding cannot access all registers.
>
> The "mov rd,rn" form only helps for syscall(2) like functions, which are
> unusual, at least for Xen, although as Stefano says, they do exist.

Apologies -- looks like I misread you here. I agree, but it's probably
a minor issue nonetheless.

>
> > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > instructions.
> > >
> > > Should we use r7 instead to maximise instruction density for Thumb code?
> >
> > The difference seems trivial when put into context, even if you code a
> > special Thumb version of the code to maximise density (the Thumb-2 code
> > which gets built from assembler in the kernel is very suboptimal in
> > size, but there simply isn't a high proportion of asm code in the kernel
> > anyway.) I wouldn't consider the ARM/Thumb differences as an important
> > factor when deciding on a register.
>
> OK, that's useful information. thanks.
>
> > One argument for _not_ using r12 for this purpose is that it is then
> > harder to put a generic "HVC" function (analogous to the "syscall"
> > syscall) out-of-line, since r12 could get destroyed by the call.
>
> For an out of line syscall(2) wouldn't the syscall number either be in a
> standard C calling convention argument register or on the stack when the
> function was called, since it is just a normal argument at that point?
> As you point out it cannot be passed in r12 (and could never be, due to
> the clobbering).
>
> The syscall function itself would have to move the arguments and syscall
> nr etc around before issuing the syscall.
>
> I think the same is true of a similar hypercall(2)
>
> > If you don't think you will ever care about putting HVC out of line
> > though, it may not matter.

If you have both inline and out-of-line hypercalls, it's hard to ensure
that you never have to shuffle the registers in either case.

Shuffling can be reduced but only at the expense of strange argument
ordering in some cases when calling from C -- the complexity is probably
not worth it. Linux doesn't bother for its own syscalls.

Note that even in assembler, a branch from one section to a label in
another section may cause r12 to get destroyed, so you will need to be
careful about how you code the hypervisor trap handler. However, this
is not different from coding exception handlers in general, so I don't
know that it constitutes a conclusive argument on its own.

My instinctive preference would therefore be for r7 (which also seems to
be good enough for Linux syscalls) -- but it really depends how many
arguments you expect to need to support.

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 28, 2012, 2:48 AM

Post #12 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Tue, 2012-02-28 at 10:20 +0000, Dave Martin wrote:
> On Mon, Feb 27, 2012 at 07:33:39PM +0000, Ian Campbell wrote:
> > On Mon, 2012-02-27 at 18:03 +0000, Dave Martin wrote:
> > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > > instructions.
> > > >
> > > > Should we use r7 instead to maximise instruction density for Thumb code?
> > >
> > > The difference seems trivial when put into context, even if you code a
> > > special Thumb version of the code to maximise density (the Thumb-2 code
> > > which gets built from assembler in the kernel is very suboptimal in
> > > size, but there simply isn't a high proportion of asm code in the kernel
> > > anyway.) I wouldn't consider the ARM/Thumb differences as an important
> > > factor when deciding on a register.
> >
> > OK, that's useful information. thanks.
> >
> > > One argument for _not_ using r12 for this purpose is that it is then
> > > harder to put a generic "HVC" function (analogous to the "syscall"
> > > syscall) out-of-line, since r12 could get destroyed by the call.
> >
> > For an out of line syscall(2) wouldn't the syscall number either be in a
> > standard C calling convention argument register or on the stack when the
> > function was called, since it is just a normal argument at that point?
> > As you point out it cannot be passed in r12 (and could never be, due to
> > the clobbering).
> >
> > The syscall function itself would have to move the arguments and syscall
> > nr etc around before issuing the syscall.
> >
> > I think the same is true of a similar hypercall(2)
> >
> > > If you don't think you will ever care about putting HVC out of line
> > > though, it may not matter.
>
> If you have both inline and out-of-line hypercalls, it's hard to ensure
> that you never have to shuffle the registers in either case.

Agreed.

I think we want to optimise for the inline case since those are the
majority.

The only non-inline case is the special "privcmd ioctl" which is the
mechanism that allows the Xen toolstack to make hypercalls. It's
somewhat akin to syscall(2). By the time you get to it you will already
have done a system call for the ioctl, pulled the arguments from the
ioctl argument structure etc, plus such hypercalls are not really
performance critical.

> Shuffling can be reduced but only at the expense of strange argument
> ordering in some cases when calling from C -- the complexity is probably
> not worth it. Linux doesn't bother for its own syscalls.
>
> Note that even in assembler, a branch from one section to a label in
> another section may cause r12 to get destroyed, so you will need to be
> careful about how you code the hypervisor trap handler. However, this
> is not different from coding exception handlers in general, so I don't
> know that it constitutes a conclusive argument on its own.

We are happy to arrange that this doesn't occur on our trap entry paths,
at least until the guest register state has been saved. Currently the
hypercall dispatcher is in C and gets r12 from the on-stack saved state.
We will likely eventually optimise the hypercall path directly in ASM
and in that case we are happy to take steps to ensure we don't clobber
r12 before we need it.

> My instinctive preference would therefore be for r7 (which also seems to
> be good enough for Linux syscalls) -- but it really depends how many
> arguments you expect to need to support.

Apparently r7 is the frame pointer for gcc in thumb mode which I think
is a good reason to avoid it.

We currently have some 5 argument hypercalls and there have been
occasional suggestions for interfaces which use 6 -- although none of
them have come to reality.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


stefano.stabellini at eu

Feb 28, 2012, 4:21 AM

Post #13 of 36 (452 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Tue, 28 Feb 2012, Dave Martin wrote:
> > Given that Stefano is proposing to make the ISS a (per-hypervisor)
> > constant we could consider just defining the Thumb and non-Thumb
> > constants instead of doing all the construction with the __HVC_IMM stuff
> > -- that would remove a big bit of the macroization.
>
> It's not quite as simple as that -- emitting instructions using data
> directives is not endianness safe, and even in the cases where .long gives
> the right result for ARM, it gives the wrong result for 32-bit Thumb
> instructions if the opcode is given in human-readable order.
>
> I was trying to solve the same problem for the kvm guys with some global
> macros -- I'm aiming to get a patch posted soon, so I'll make sure
> you're on CC.

That would be great, thanks!

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


stefano.stabellini at eu

Feb 28, 2012, 4:23 AM

Post #14 of 36 (454 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Tue, 28 Feb 2012, Ian Campbell wrote:
> On Tue, 2012-02-28 at 10:20 +0000, Dave Martin wrote:
> > On Mon, Feb 27, 2012 at 07:33:39PM +0000, Ian Campbell wrote:
> > > On Mon, 2012-02-27 at 18:03 +0000, Dave Martin wrote:
> > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > > > instructions.
> > > > >
> > > > > Should we use r7 instead to maximise instruction density for Thumb code?
> > > >
> > > > The difference seems trivial when put into context, even if you code a
> > > > special Thumb version of the code to maximise density (the Thumb-2 code
> > > > which gets built from assembler in the kernel is very suboptimal in
> > > > size, but there simply isn't a high proportion of asm code in the kernel
> > > > anyway.) I wouldn't consider the ARM/Thumb differences as an important
> > > > factor when deciding on a register.
> > >
> > > OK, that's useful information. thanks.
> > >
> > > > One argument for _not_ using r12 for this purpose is that it is then
> > > > harder to put a generic "HVC" function (analogous to the "syscall"
> > > > syscall) out-of-line, since r12 could get destroyed by the call.
> > >
> > > For an out of line syscall(2) wouldn't the syscall number either be in a
> > > standard C calling convention argument register or on the stack when the
> > > function was called, since it is just a normal argument at that point?
> > > As you point out it cannot be passed in r12 (and could never be, due to
> > > the clobbering).
> > >
> > > The syscall function itself would have to move the arguments and syscall
> > > nr etc around before issuing the syscall.
> > >
> > > I think the same is true of a similar hypercall(2)
> > >
> > > > If you don't think you will ever care about putting HVC out of line
> > > > though, it may not matter.
> >
> > If you have both inline and out-of-line hypercalls, it's hard to ensure
> > that you never have to shuffle the registers in either case.
>
> Agreed.
>
> I think we want to optimise for the inline case since those are the
> majority.

They are not just the majority, all of them are static inline at the
moment, even on x86 (where the number of hypercalls is much higher).

So yes, we should optimize for the inline case.


> The only non-inline case is the special "privcmd ioctl" which is the
> mechanism that allows the Xen toolstack to make hypercalls. It's
> somewhat akin to syscall(2). By the time you get to it you will already
> have done a system call for the ioctl, pulled the arguments from the
> ioctl argument structure etc, plus such hypercalls are not really
> performance critical.

Even the privcmd hypercall (privcmd_call) is a static inline function,
it is just that at the moment there is only one caller :)


> > Shuffling can be reduced but only at the expense of strange argument
> > ordering in some cases when calling from C -- the complexity is probably
> > not worth it. Linux doesn't bother for its own syscalls.
> >
> > Note that even in assembler, a branch from one section to a label in
> > another section may cause r12 to get destroyed, so you will need to be
> > careful about how you code the hypervisor trap handler. However, this
> > is not different from coding exception handlers in general, so I don't
> > know that it constitutes a conclusive argument on its own.
>
> We are happy to arrange that this doesn't occur on our trap entry paths,
> at least until the guest register state has been saved. Currently the
> hypercall dispatcher is in C and gets r12 from the on-stack saved state.
> We will likely eventually optimise the hypercall path directly in ASM
> and in that case we are happy to take steps to ensure we don't clobber
> r12 before we need it.

Yes, I don't think this should be an issue.


> > My instinctive preference would therefore be for r7 (which also seems to
> > be good enough for Linux syscalls) -- but it really depends how many
> > arguments you expect to need to support.
>
> Apparently r7 is the frame pointer for gcc in thumb mode which I think
> is a good reason to avoid it.
>
> We currently have some 5 argument hypercalls and there have been
> occasional suggestions for interfaces which use 6 -- although none of
> them have come to reality.

I don't have a very strong opinion on which register we should use, but
I would like to avoid r7 if it is already actively used by gcc.

The fact that r12 can be destroyed so easily is actually a good argument
for using it because it means it is less likely to contain useful data
that needs to be saved/restored by gcc.

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 29, 2012, 1:34 AM

Post #15 of 36 (456 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> On Tue, 28 Feb 2012, Ian Campbell wrote:
> > On Tue, 2012-02-28 at 10:20 +0000, Dave Martin wrote:
> > > On Mon, Feb 27, 2012 at 07:33:39PM +0000, Ian Campbell wrote:
> > > > On Mon, 2012-02-27 at 18:03 +0000, Dave Martin wrote:
> > > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > > > > instructions.
> > > > > >
> > > > > > Should we use r7 instead to maximise instruction density for Thumb code?
> > > > >
> > > > > The difference seems trivial when put into context, even if you code a
> > > > > special Thumb version of the code to maximise density (the Thumb-2 code
> > > > > which gets built from assembler in the kernel is very suboptimal in
> > > > > size, but there simply isn't a high proportion of asm code in the kernel
> > > > > anyway.) I wouldn't consider the ARM/Thumb differences as an important
> > > > > factor when deciding on a register.
> > > >
> > > > OK, that's useful information. thanks.
> > > >
> > > > > One argument for _not_ using r12 for this purpose is that it is then
> > > > > harder to put a generic "HVC" function (analogous to the "syscall"
> > > > > syscall) out-of-line, since r12 could get destroyed by the call.
> > > >
> > > > For an out of line syscall(2) wouldn't the syscall number either be in a
> > > > standard C calling convention argument register or on the stack when the
> > > > function was called, since it is just a normal argument at that point?
> > > > As you point out it cannot be passed in r12 (and could never be, due to
> > > > the clobbering).
> > > >
> > > > The syscall function itself would have to move the arguments and syscall
> > > > nr etc around before issuing the syscall.
> > > >
> > > > I think the same is true of a similar hypercall(2)
> > > >
> > > > > If you don't think you will ever care about putting HVC out of line
> > > > > though, it may not matter.
> > >
> > > If you have both inline and out-of-line hypercalls, it's hard to ensure
> > > that you never have to shuffle the registers in either case.
> >
> > Agreed.
> >
> > I think we want to optimise for the inline case since those are the
> > majority.
>
> They are not just the majority, all of them are static inline at the
> moment, even on x86 (where the number of hypercalls is much higher).
>
> So yes, we should optimize for the inline case.
>
>
> > The only non-inline case is the special "privcmd ioctl" which is the
> > mechanism that allows the Xen toolstack to make hypercalls. It's
> > somewhat akin to syscall(2). By the time you get to it you will already
> > have done a system call for the ioctl, pulled the arguments from the
> > ioctl argument structure etc, plus such hypercalls are not really
> > performance critical.
>
> Even the privcmd hypercall (privcmd_call) is a static inline function,
> it is just that at the moment there is only one caller :)
>
>
> > > Shuffling can be reduced but only at the expense of strange argument
> > > ordering in some cases when calling from C -- the complexity is probably
> > > not worth it. Linux doesn't bother for its own syscalls.
> > >
> > > Note that even in assembler, a branch from one section to a label in
> > > another section may cause r12 to get destroyed, so you will need to be
> > > careful about how you code the hypervisor trap handler. However, this
> > > is not different from coding exception handlers in general, so I don't
> > > know that it constitutes a conclusive argument on its own.
> >
> > We are happy to arrange that this doesn't occur on our trap entry paths,
> > at least until the guest register state has been saved. Currently the
> > hypercall dispatcher is in C and gets r12 from the on-stack saved state.
> > We will likely eventually optimise the hypercall path directly in ASM
> > and in that case we are happy to take steps to ensure we don't clobber
> > r12 before we need it.
>
> Yes, I don't think this should be an issue.

Fair enough.

> > > My instinctive preference would therefore be for r7 (which also seems to
> > > be good enough for Linux syscalls) -- but it really depends how many
> > > arguments you expect to need to support.
> >
> > Apparently r7 is the frame pointer for gcc in thumb mode which I think
> > is a good reason to avoid it.
> >
> > We currently have some 5 argument hypercalls and there have been
> > occasional suggestions for interfaces which use 6 -- although none of
> > them have come to reality.
>
> I don't have a very strong opinion on which register we should use, but
> I would like to avoid r7 if it is already actively used by gcc.

But there is no framepointer for Thumb-2 code (?)

> The fact that r12 can be destroyed so easily is actually a good argument
> for using it because it means it is less likely to contain useful data
> that needs to be saved/restored by gcc.

That's a fair point.

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 29, 2012, 1:56 AM

Post #16 of 36 (454 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:

> > I don't have a very strong opinion on which register we should use, but
> > I would like to avoid r7 if it is already actively used by gcc.
>
> But there is no framepointer for Thumb-2 code (?)

Peter Maydell suggested there was:
> r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> makes it worth avoiding in this context.

Sounds like it might be a gcc-ism, possibly a non-default option?

Anyway, I think r12 will be fine for our purposes so the point is rather
moot.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 29, 2012, 3:47 AM

Post #17 of 36 (450 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
>
> > > I don't have a very strong opinion on which register we should use, but
> > > I would like to avoid r7 if it is already actively used by gcc.
> >
> > But there is no framepointer for Thumb-2 code (?)
>
> Peter Maydell suggested there was:
> > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > makes it worth avoiding in this context.
>
> Sounds like it might be a gcc-ism, possibly a non-default option?

I seem to remember discussions about some cruft in gcc related to this.
gcc actually barfs at you if you try to allocate r7 to inline asm
without -fomit-frame-pointer. That use for r7 really relates to the
legacy ABI, so this may be a bug.

> Anyway, I think r12 will be fine for our purposes so the point is rather
> moot.

Yes, it sounds like it. If that r7 issue is a gcc bug, this would avoid
it.

If you leave the job of putting the right constant into r12 up to gcc,
it should generate reasonable for you without having to code it
explicitly anyway:

register int hvc_num asm("r12") = 0xDEADBEEF;

asm volatile (
"hvc 0"
:: "r" (hvc_num)
)

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Feb 29, 2012, 4:58 AM

Post #18 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
>
> > > I don't have a very strong opinion on which register we should use, but
> > > I would like to avoid r7 if it is already actively used by gcc.
> >
> > But there is no framepointer for Thumb-2 code (?)
>
> Peter Maydell suggested there was:
> > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > makes it worth avoiding in this context.
>
> Sounds like it might be a gcc-ism, possibly a non-default option?
>
> Anyway, I think r12 will be fine for our purposes so the point is rather
> moot.

Just had a chat with some tools guys -- apparently, when passing register
arguments to gcc inline asms there really isn't a guarantee that those
variables will be in the expected registers on entry to the inline asm.

If gcc reorders other function calls or other code around the inline asm
(which it can do, except under certain controlled situations), then
intervening code can clobber any registers in general.

Or, to summarise another way, there is no way to control which register
is used to pass something to an inline asm in general (often we get away
with this, and there are a lot of inline asms in the kernel that assume
it works, but the more you inline the more likely you are to get nasty
surprises). There is no workaroud, except on some architectures where
special asm constraints allow specific individual registers to be
specified for operands (i386 for example).

If you need a specific register, this means that you must set up that
register explicitly inside the asm if you want a guarantee that the
code will work:

asm volatile (
"movw r12, %[hvc_num]\n\t"
...
"hvc #0"
:: [hvc_num] "i" (NUMBER) : "r12"
);

Of course, if you need to set up more than about 5 or 6 registers in
this way, the doubled register footprint means that the compiler will
have to start spilling stuff to the stack.


This is the kind of problem which goes away when out-of-lining the
hvc wrapper behind a C function interface, since the ABI then provides
guarantees about how values are mershaled into and out of that code.


Notwithstanding the above, even if we do make theoretically unsound
(but often true) assumptions about inline asms, ARM will be no worse
than other arches in this respect.


Other than serving as a reminder that inline asm is a deep can of
worms, this doesn't really give us a neat solution...

---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


Ian.Campbell at citrix

Feb 29, 2012, 6:44 AM

Post #19 of 36 (452 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, 2012-02-29 at 12:58 +0000, Dave Martin wrote:
> On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> >
> > > > I don't have a very strong opinion on which register we should use, but
> > > > I would like to avoid r7 if it is already actively used by gcc.
> > >
> > > But there is no framepointer for Thumb-2 code (?)
> >
> > Peter Maydell suggested there was:
> > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > makes it worth avoiding in this context.
> >
> > Sounds like it might be a gcc-ism, possibly a non-default option?
> >
> > Anyway, I think r12 will be fine for our purposes so the point is rather
> > moot.
>
> Just had a chat with some tools guys -- apparently, when passing register
> arguments to gcc inline asms there really isn't a guarantee that those
> variables will be in the expected registers on entry to the inline asm.
>
> If gcc reorders other function calls or other code around the inline asm
> (which it can do, except under certain controlled situations), then
> intervening code can clobber any registers in general.
>
> Or, to summarise another way, there is no way to control which register
> is used to pass something to an inline asm in general (often we get away
> with this, and there are a lot of inline asms in the kernel that assume
> it works, but the more you inline the more likely you are to get nasty
> surprises). There is no workaroud, except on some architectures where
> special asm constraints allow specific individual registers to be
> specified for operands (i386 for example).

I had assumed I just couldn't find the right syntax. Useful to know that
I couldn't find it because it doesn't exist!

> If you need a specific register, this means that you must set up that
> register explicitly inside the asm if you want a guarantee that the
> code will work:
>
> asm volatile (
> "movw r12, %[hvc_num]\n\t"

Is gcc (or gas?) smart enough to optimise this away if it turns out that
%[hvc_num] == r12?

> ...
> "hvc #0"
> :: [hvc_num] "i" (NUMBER) : "r12"
> );
>
> Of course, if you need to set up more than about 5 or 6 registers in
> this way, the doubled register footprint means that the compiler will
> have to start spilling stuff to the stack.
>
>
> This is the kind of problem which goes away when out-of-lining the
> hvc wrapper behind a C function interface, since the ABI then provides
> guarantees about how values are mershaled into and out of that code.

I don't think anything would stop gcc from clobbering an argument
register right on function entry (e..g it might move r0 to r8 and
clobber r0, for whatever reason), so that they are no longer where you
expect them to be when you hit the asm. Unlikely perhaps but no more so
than the other issues you've raised?

Or did you mean out-of-line as in "written in a .S file" as well as out
of line?

> Notwithstanding the above, even if we do make theoretically unsound
> (but often true) assumptions about inline asms, ARM will be no worse
> than other arches in this respect.

This is true.

> Other than serving as a reminder that inline asm is a deep can of
> worms, this doesn't really give us a neat solution...

How are system calls implemented on the userspace side? I confess I
don't know what the ARM syscall ABI looks like -- is it all registers or
is some of it on the stack? It sounds like the solution ought to be
pretty similar though.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


stefano.stabellini at eu

Feb 29, 2012, 6:52 AM

Post #20 of 36 (459 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, 29 Feb 2012, Dave Martin wrote:
> On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> >
> > > > I don't have a very strong opinion on which register we should use, but
> > > > I would like to avoid r7 if it is already actively used by gcc.
> > >
> > > But there is no framepointer for Thumb-2 code (?)
> >
> > Peter Maydell suggested there was:
> > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > makes it worth avoiding in this context.
> >
> > Sounds like it might be a gcc-ism, possibly a non-default option?
> >
> > Anyway, I think r12 will be fine for our purposes so the point is rather
> > moot.
>
> Just had a chat with some tools guys -- apparently, when passing register
> arguments to gcc inline asms there really isn't a guarantee that those
> variables will be in the expected registers on entry to the inline asm.
>
> If gcc reorders other function calls or other code around the inline asm
> (which it can do, except under certain controlled situations), then
> intervening code can clobber any registers in general.
>
> Or, to summarise another way, there is no way to control which register
> is used to pass something to an inline asm in general (often we get away
> with this, and there are a lot of inline asms in the kernel that assume
> it works, but the more you inline the more likely you are to get nasty
> surprises). There is no workaroud, except on some architectures where
> special asm constraints allow specific individual registers to be
> specified for operands (i386 for example).
>
> If you need a specific register, this means that you must set up that
> register explicitly inside the asm if you want a guarantee that the
> code will work:
>
> asm volatile (
> "movw r12, %[hvc_num]\n\t"
> ...
> "hvc #0"
> :: [hvc_num] "i" (NUMBER) : "r12"
> );
>

OK, we can arrange the hypercall code to be like that.
Also with your patch series it would be "_hvc" because of the .macro,
right?



> This is the kind of problem which goes away when out-of-lining the
> hvc wrapper behind a C function interface, since the ABI then provides
> guarantees about how values are mershaled into and out of that code.

Do you mean implementing the entire HYPERVISOR_example_op in assembly
and calling it from C?
Because I guess that gcc would still be free to mess with the registers
between the C function entry point and any inline assembly code.

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Mar 1, 2012, 1:35 AM

Post #21 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 02:44:24PM +0000, Ian Campbell wrote:
> On Wed, 2012-02-29 at 12:58 +0000, Dave Martin wrote:
> > On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> > >
> > > > > I don't have a very strong opinion on which register we should use, but
> > > > > I would like to avoid r7 if it is already actively used by gcc.
> > > >
> > > > But there is no framepointer for Thumb-2 code (?)
> > >
> > > Peter Maydell suggested there was:
> > > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > > makes it worth avoiding in this context.
> > >
> > > Sounds like it might be a gcc-ism, possibly a non-default option?
> > >
> > > Anyway, I think r12 will be fine for our purposes so the point is rather
> > > moot.
> >
> > Just had a chat with some tools guys -- apparently, when passing register
> > arguments to gcc inline asms there really isn't a guarantee that those
> > variables will be in the expected registers on entry to the inline asm.
> >
> > If gcc reorders other function calls or other code around the inline asm
> > (which it can do, except under certain controlled situations), then
> > intervening code can clobber any registers in general.
> >
> > Or, to summarise another way, there is no way to control which register
> > is used to pass something to an inline asm in general (often we get away
> > with this, and there are a lot of inline asms in the kernel that assume
> > it works, but the more you inline the more likely you are to get nasty
> > surprises). There is no workaroud, except on some architectures where
> > special asm constraints allow specific individual registers to be
> > specified for operands (i386 for example).
>
> I had assumed I just couldn't find the right syntax. Useful to know that
> I couldn't find it because it doesn't exist!
>
> > If you need a specific register, this means that you must set up that
> > register explicitly inside the asm if you want a guarantee that the
> > code will work:
> >
> > asm volatile (
> > "movw r12, %[hvc_num]\n\t"
>
> Is gcc (or gas?) smart enough to optimise this away if it turns out that
> %[hvc_num] == r12?

No, unfortunately. Except for the information defined by the constraints,
the inline asm block is completely opaque to the compiler (except for
pasting in operands -- which is a string operation done with no knowledge
of what the text means for the assembler).

>
> > ...
> > "hvc #0"
> > :: [hvc_num] "i" (NUMBER) : "r12"
> > );
> >
> > Of course, if you need to set up more than about 5 or 6 registers in
> > this way, the doubled register footprint means that the compiler will
> > have to start spilling stuff to the stack.
> >
> >
> > This is the kind of problem which goes away when out-of-lining the
> > hvc wrapper behind a C function interface, since the ABI then provides
> > guarantees about how values are mershaled into and out of that code.
>
> I don't think anything would stop gcc from clobbering an argument
> register right on function entry (e..g it might move r0 to r8 and
> clobber r0, for whatever reason), so that they are no longer where you
> expect them to be when you hit the asm. Unlikely perhaps but no more so
> than the other issues you've raised?
>
> Or did you mean out-of-line as in "written in a .S file" as well as out
> of line?

Yes. Some toolchains have a concept of out-of-line assembler functions
in a .c file, but gcc doesn't -- the asm is always inline in its
immediate context, even if the containing function won't be inlined.

However, the compiler would have to be applying pretty creative
optimizations to break cases cases where an inlinable function contains,
say, nothing except for declarations, the asm() and a return statement.

I feel that the kernel implicitly relies on such things working in too
many places for breakage of that assumption to go unnoticed.

>
> > Notwithstanding the above, even if we do make theoretically unsound
> > (but often true) assumptions about inline asms, ARM will be no worse
> > than other arches in this respect.
>
> This is true.
>
> > Other than serving as a reminder that inline asm is a deep can of
> > worms, this doesn't really give us a neat solution...
>
> How are system calls implemented on the userspace side? I confess I
> don't know what the ARM syscall ABI looks like -- is it all registers or
> is some of it on the stack? It sounds like the solution ought to be
> pretty similar though.

I _believe_ it's now out of line in most cases.

I'm not sure I totally understand it all, though:

http://www.eglibc.org/cgi-bin/viewvc.cgi/trunk/ports/sysdeps/unix/sysv/linux/arm/eabi/

There is an internal inline syscall wrapper INTERNAL_SYSCALL_RAW(), but
I can't see where it is used. For Thumb code it actually just munges
registers around and calls an out-of-line function.

If I grep the disassembly of a recent EABI libc, there appear to be only
207 svc call sites, and most of them look like they are out-of-linux
wrappers, generated from the DO_CALL macro in
ports/sysdeps/unix/sysv/linux/arm/eabi/sysdep.h

That's based on a hasty reading of the code though... I'm not very
familiar with the way libc works. (Disassembling stripped arm binaries
can also be a bit unrelieable.)

It's also worth nothing that the inline asm sycall macros which used
to exist in userspace <asm/unistd.h> are gone (at least for EABI).

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Mar 1, 2012, 1:51 AM

Post #22 of 36 (453 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 02:52:38PM +0000, Stefano Stabellini wrote:
> On Wed, 29 Feb 2012, Dave Martin wrote:
> > On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> > >
> > > > > I don't have a very strong opinion on which register we should use, but
> > > > > I would like to avoid r7 if it is already actively used by gcc.
> > > >
> > > > But there is no framepointer for Thumb-2 code (?)
> > >
> > > Peter Maydell suggested there was:
> > > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > > makes it worth avoiding in this context.
> > >
> > > Sounds like it might be a gcc-ism, possibly a non-default option?
> > >
> > > Anyway, I think r12 will be fine for our purposes so the point is rather
> > > moot.
> >
> > Just had a chat with some tools guys -- apparently, when passing register
> > arguments to gcc inline asms there really isn't a guarantee that those
> > variables will be in the expected registers on entry to the inline asm.
> >
> > If gcc reorders other function calls or other code around the inline asm
> > (which it can do, except under certain controlled situations), then
> > intervening code can clobber any registers in general.
> >
> > Or, to summarise another way, there is no way to control which register
> > is used to pass something to an inline asm in general (often we get away
> > with this, and there are a lot of inline asms in the kernel that assume
> > it works, but the more you inline the more likely you are to get nasty
> > surprises). There is no workaroud, except on some architectures where
> > special asm constraints allow specific individual registers to be
> > specified for operands (i386 for example).
> >
> > If you need a specific register, this means that you must set up that
> > register explicitly inside the asm if you want a guarantee that the
> > code will work:
> >
> > asm volatile (
> > "movw r12, %[hvc_num]\n\t"
> > ...
> > "hvc #0"
> > :: [hvc_num] "i" (NUMBER) : "r12"
> > );
> >
>
> OK, we can arrange the hypercall code to be like that.
> Also with your patch series it would be "_hvc" because of the .macro,
> right?

Yes, but I would avoid making too many assumptions about the final form
of that patch -- it looks like there's significant work to do there,
since I made some unsafe assumptions about how the tools work...

We might end up with a magic #define after all.

> > This is the kind of problem which goes away when out-of-lining the
> > hvc wrapper behind a C function interface, since the ABI then provides
> > guarantees about how values are mershaled into and out of that code.
>
> Do you mean implementing the entire HYPERVISOR_example_op in assembly
> and calling it from C?
> Because I guess that gcc would still be free to mess with the registers
> between the C function entry point and any inline assembly code.

gcc can arrange for the relevant things to be already in r0-r3 and the
relevant stack slots before branching to a function just as for inline
asm. The only differences are that the compiler cannot choose which
registers to use, and the branch cannot be optimised away by the compiler
(the CPU may be able to optimise the branch away at runtime of course,
but that's another story...)

What libc appears to do is wrap each syscall in a separate function.
This means that it's not necessary to shuffle all the arguments by
one position when invoking the actual syscall. (The generic "syscall"
function does of course need to shuffle the arguments so as to
displace the syscall number from the first argument to r7 --
but that's hard to avoid without inlining.)

For example:

00090b50 <shmdt>:
90b50: e52d7004 push {r7} ; (str r7, [sp, #-4]!)
90b54: e59f7010 ldr r7, [pc, #16] ; 90b6c <shmdt+0x1c>
90b58: ef000000 svc 0x00000000
90b5c: e49d7004 pop {r7} ; (ldr r7, [sp], #4)
90b60: e3700a01 cmn r0, #4096 ; 0x1000
...

Syscalls with more than 4 args still need to load the extra ones
from the stack, of course:

00090090 <getsockopt>:
90090: e92d0090 push {r4, r7}
90094: e59d4008 ldr r4, [sp, #8]
90098: e59f7010 ldr r7, [pc, #16] ; 900b0 <getsockopt+0x20>
9009c: ef000000 svc 0x00000000
...


I don't know whether that makes sense for a hypervisor... it partly
depends on how many different hypercalls there are.

By all means implement it both ways and measure the performance
difference, if possible.

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


linux at arm

Mar 1, 2012, 2:10 AM

Post #23 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 12:58:26PM +0000, Dave Martin wrote:
> On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> >
> > > > I don't have a very strong opinion on which register we should use, but
> > > > I would like to avoid r7 if it is already actively used by gcc.
> > >
> > > But there is no framepointer for Thumb-2 code (?)
> >
> > Peter Maydell suggested there was:
> > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > makes it worth avoiding in this context.
> >
> > Sounds like it might be a gcc-ism, possibly a non-default option?
> >
> > Anyway, I think r12 will be fine for our purposes so the point is rather
> > moot.
>
> Just had a chat with some tools guys -- apparently, when passing register
> arguments to gcc inline asms there really isn't a guarantee that those
> variables will be in the expected registers on entry to the inline asm.

The best you can do is:

register unsigned int foo asm("r7") = value;

asm("blah %0" : : "r" (foo));

and ensure that your assembly checks that %0 _is_ r7 and produces a build
error if not. See the __asmeq() macro in asm/system.h to find out how to
do that.

This feature has been missing from ARM GCC for quite a long time - it's
used extensively on x86 GCC, where they have one register class per
register, so they can do stuff like:

asm("blah %0" : : "a" (value));

and be guaranteed that %0 will be eax.

> If you need a specific register, this means that you must set up that
> register explicitly inside the asm if you want a guarantee that the
> code will work:
>
> asm volatile (
> "movw r12, %[hvc_num]\n\t"
> ...
> "hvc #0"
> :: [hvc_num] "i" (NUMBER) : "r12"
> );
>
> Of course, if you need to set up more than about 5 or 6 registers in
> this way, the doubled register footprint means that the compiler will
> have to start spilling stuff to the stack.

No it won't - it will barf instead - think about it. If you're clobbering
r0 - r5, but need to pass in six values in registers, gcc can't use r0-r5
for that, so it must use the remaining registers. It gets rather unhappy
with that, and starts erroring out (iirc 'too many reloads' or some similar
error.) I've been there.

If you want to do it that way, your only option is to store them to memory
and pass the address of the block into the assembly, and reload them there.
Which is extremely sucky and inefficient.

Practically, the register variable plus asm() does seem to work, and seems
to work for virtually all gcc versions out there (there have been the odd
buggy version, but those bugs appear to get fixed.)


_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


linux at arm

Mar 1, 2012, 2:12 AM

Post #24 of 36 (452 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Wed, Feb 29, 2012 at 02:44:24PM +0000, Ian Campbell wrote:
> > If you need a specific register, this means that you must set up that
> > register explicitly inside the asm if you want a guarantee that the
> > code will work:
> >
> > asm volatile (
> > "movw r12, %[hvc_num]\n\t"
>
> Is gcc (or gas?) smart enough to optimise this away if it turns out that
> %[hvc_num] == r12?

No, and it won't do, because %[hvc_num] is specified in these operands:

> > ...
> > "hvc #0"
> > :: [hvc_num] "i" (NUMBER) : "r12"

to be an integer, not a register.

> How are system calls implemented on the userspace side? I confess I
> don't know what the ARM syscall ABI looks like -- is it all registers or
> is some of it on the stack? It sounds like the solution ought to be
> pretty similar though.

All registers. We have a few which take a pointer to an in memory array,
but those are for some old multiplexed syscalls.

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel


dave.martin at linaro

Mar 1, 2012, 2:27 AM

Post #25 of 36 (455 views)
Permalink
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor [In reply to]

On Thu, Mar 01, 2012 at 10:10:29AM +0000, Russell King - ARM Linux wrote:
> On Wed, Feb 29, 2012 at 12:58:26PM +0000, Dave Martin wrote:
> > On Wed, Feb 29, 2012 at 09:56:02AM +0000, Ian Campbell wrote:
> > > On Wed, 2012-02-29 at 09:34 +0000, Dave Martin wrote:
> > > > On Tue, Feb 28, 2012 at 12:28:29PM +0000, Stefano Stabellini wrote:
> > >
> > > > > I don't have a very strong opinion on which register we should use, but
> > > > > I would like to avoid r7 if it is already actively used by gcc.
> > > >
> > > > But there is no framepointer for Thumb-2 code (?)
> > >
> > > Peter Maydell suggested there was:
> > > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> > > > makes it worth avoiding in this context.
> > >
> > > Sounds like it might be a gcc-ism, possibly a non-default option?
> > >
> > > Anyway, I think r12 will be fine for our purposes so the point is rather
> > > moot.
> >
> > Just had a chat with some tools guys -- apparently, when passing register
> > arguments to gcc inline asms there really isn't a guarantee that those
> > variables will be in the expected registers on entry to the inline asm.
>
> The best you can do is:
>
> register unsigned int foo asm("r7") = value;
>
> asm("blah %0" : : "r" (foo));
>
> and ensure that your assembly checks that %0 _is_ r7 and produces a build
> error if not. See the __asmeq() macro in asm/system.h to find out how to
> do that.
>
> This feature has been missing from ARM GCC for quite a long time - it's
> used extensively on x86 GCC, where they have one register class per
> register, so they can do stuff like:
>
> asm("blah %0" : : "a" (value));
>
> and be guaranteed that %0 will be eax.
>
> > If you need a specific register, this means that you must set up that
> > register explicitly inside the asm if you want a guarantee that the
> > code will work:
> >
> > asm volatile (
> > "movw r12, %[hvc_num]\n\t"
> > ...
> > "hvc #0"
> > :: [hvc_num] "i" (NUMBER) : "r12"
> > );
> >
> > Of course, if you need to set up more than about 5 or 6 registers in
> > this way, the doubled register footprint means that the compiler will
> > have to start spilling stuff to the stack.
>
> No it won't - it will barf instead - think about it. If you're clobbering
> r0 - r5, but need to pass in six values in registers, gcc can't use r0-r5
> for that, so it must use the remaining registers. It gets rather unhappy
> with that, and starts erroring out (iirc 'too many reloads' or some similar
> error.) I've been there.

You're right about that -- I didn't pursue my line of thought to the end,
there. I have see the behaviour you describe.

> If you want to do it that way, your only option is to store them to memory
> and pass the address of the block into the assembly, and reload them there.
> Which is extremely sucky and inefficient.
>
> Practically, the register variable plus asm() does seem to work, and seems
> to work for virtually all gcc versions out there (there have been the odd
> buggy version, but those bugs appear to get fixed.)

That is inconvenient for us, but it's a not a bug. The ability for asm
contraints to be able to gcc to put things in specific registers (as with
the gcc "abcd" constraints for i386) would be nice, but as you point out,
this capability is simply not supported by gcc right now for ARM -- the
compiler guys seem to be pretty opposed to it, so I can't say I anticiapte
this being supported in the near future.

So, where there's a compelling reason to inline these things, we can use
the existing techniques if we're alert to the risks. But in cases where
there isn't a compelling reason, aren't we just inviting fragility
unnecessarily?

Cheers
---Dave

_______________________________________________
Xen-devel mailing list
Xen-devel [at] lists
http://lists.xen.org/xen-devel

First page Previous page 1 2 Next page Last page  View All Xen devel RSS feed   Index | Next | Previous | View Threaded
 
 


Interested in having your list archived? Contact Gossamer Threads
 
  Web Applications & Managed Hosting Powered by Gossamer Threads Inc.