Search
j0ke.net Open Build Service
>
Projects
>
home:netmax
:
openssl
>
openssl
> VIA_padlock_support_on_64systems.patch
Sign Up
|
Log In
Username
Password
Cancel
Overview
Repositories
Revisions
Requests
Users
Advanced
Attributes
Meta
File VIA_padlock_support_on_64systems.patch of Package openssl
Index: openssl-1.0.1c/engines/e_padlock.c =================================================================== --- openssl-1.0.1c.orig/engines/e_padlock.c +++ openssl-1.0.1c/engines/e_padlock.c @@ -101,7 +101,10 @@ compiler choice is limited to GCC and Microsoft C. */ #undef COMPILE_HW_PADLOCK #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) -# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ +# if (defined(__GNUC__) && __GNUC__>=2 && \ + (defined(__i386__) || defined(__i386) || \ + defined(__x86_64__) || defined(__x86_64)) \ + ) || \ (defined(_MSC_VER) && defined(_M_IX86)) # define COMPILE_HW_PADLOCK # endif @@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da * ======================================================= */ #if defined(__GNUC__) && __GNUC__>=2 +#if defined(__i386__) || defined(__i386) /* * As for excessive "push %ebx"/"pop %ebx" found all over. * When generating position-independent code GCC won't let @@ -383,21 +387,6 @@ padlock_available(void) return padlock_use_ace + padlock_use_rng; } -#ifndef OPENSSL_NO_AES -/* Our own htonl()/ntohl() */ -static inline void -padlock_bswapl(AES_KEY *ks) -{ - size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); - unsigned int *key = ks->rd_key; - - while (i--) { - asm volatile ("bswapl %0" : "+r"(*key)); - key++; - } -} -#endif - /* Force key reload from memory to the CPU microcode. Loading EFLAGS from the stack clears EFLAGS[30] which does the trick. */ @@ -456,11 +445,130 @@ static inline void *name(size_t cnt, \ return iv; \ } + +#endif + +#elif defined(__x86_64__) || defined(__x86_64) + +/* Load supported features of the CPU to see if + the PadLock is available. */ + static int +padlock_available(void) +{ + char vendor_string[16]; + unsigned int eax, edx; + + /* Are we running on the Centaur (VIA) CPU? */ + eax = 0x00000000; + vendor_string[12] = 0; + asm volatile ( + "cpuid\n" + "movl %%ebx,(%1)\n" + "movl %%edx,4(%1)\n" + "movl %%ecx,8(%1)\n" + : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); + if (strcmp(vendor_string, "CentaurHauls") != 0) + return 0; + + /* Check for Centaur Extended Feature Flags presence */ + eax = 0xC0000000; + asm volatile ("cpuid" + : "+a"(eax) : : "rbx", "rcx", "rdx"); + if (eax < 0xC0000001) + return 0; + + /* Read the Centaur Extended Feature Flags */ + eax = 0xC0000001; + asm volatile ("cpuid" + : "+a"(eax), "=d"(edx) : : "rbx", "rcx"); + + /* Fill up some flags */ + padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); + padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); + + return padlock_use_ace + padlock_use_rng; +} + +/* Force key reload from memory to the CPU microcode. + Loading EFLAGS from the stack clears EFLAGS[30] + which does the trick. */ + static inline void +padlock_reload_key(void) +{ + asm volatile ("pushfq; popfq"); +} + +#ifndef OPENSSL_NO_AES +/* + * This is heuristic key context tracing. At first one + * believes that one should use atomic swap instructions, + * but it's not actually necessary. Point is that if + * padlock_saved_context was changed by another thread + * after we've read it and before we compare it with cdata, + * our key *shall* be reloaded upon thread context switch + * and we are therefore set in either case... + */ + static inline void +padlock_verify_context(struct padlock_cipher_data *cdata) +{ + asm volatile ( + "pushfq\n" + " btl $30,(%%rsp)\n" + " jnc 1f\n" + " cmpq %2,%1\n" + " je 1f\n" + " popfq\n" + " subq $8,%%rsp\n" + "1: addq $8,%%rsp\n" + " movq %2,%0" + :"+m"(padlock_saved_context) + : "r"(padlock_saved_context), "r"(cdata) : "cc"); +} + +/* Template for padlock_xcrypt_* modes */ +/* BIG FAT WARNING: + * The offsets used with 'leal' instructions + * describe items of the 'padlock_cipher_data' + * structure. + */ +#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ + static inline void *name(size_t cnt, \ + struct padlock_cipher_data *cdata, \ + void *out, const void *inp) \ +{ void *iv; \ + asm volatile ( "leaq 16(%0),%%rdx\n" \ + " leaq 32(%0),%%rbx\n" \ + rep_xcrypt "\n" \ + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ + : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ + : "rbx", "rdx", "cc", "memory"); \ + return iv; \ +} +#endif + +#endif /* cpu */ + +#ifndef OPENSSL_NO_AES + + /* Generate all functions with appropriate opcodes */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ + +/* Our own htonl()/ntohl() */ +static inline void +padlock_bswapl(AES_KEY *ks) +{ + size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); + unsigned int *key = ks->rd_key; + + while (i--) { + asm volatile ("bswapl %0" : "+r"(*key)); + key++; + } +} #endif /* The RNG call itself */ @@ -491,8 +599,8 @@ padlock_xstore(void *addr, unsigned int static inline unsigned char * padlock_memcpy(void *dst,const void *src,size_t n) { - long *d=dst; - const long *s=src; + size_t *d=dst; + const size_t *s=src; n /= sizeof(*d); do { *d++ = *s++; } while (--n); Index: openssl-1.0.1c/engines/e_padlock.c =================================================================== --- openssl-1.0.1c.orig/engines/e_padlock.c +++ openssl-1.0.1c/engines/e_padlock.c @@ -457,30 +457,33 @@ padlock_available(void) { char vendor_string[16]; unsigned int eax, edx; + size_t scratch; /* Are we running on the Centaur (VIA) CPU? */ eax = 0x00000000; vendor_string[12] = 0; asm volatile ( + "movq %%rbx,%1\n" "cpuid\n" - "movl %%ebx,(%1)\n" - "movl %%edx,4(%1)\n" - "movl %%ecx,8(%1)\n" - : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); + "movl %%ebx,(%2)\n" + "movl %%edx,4(%2)\n" + "movl %%ecx,8(%2)\n" + "movq %1,%%rbx" + : "+a"(eax), "=&r"(scratch) : "r"(vendor_string) : "rcx", "rdx"); if (strcmp(vendor_string, "CentaurHauls") != 0) return 0; /* Check for Centaur Extended Feature Flags presence */ eax = 0xC0000000; - asm volatile ("cpuid" - : "+a"(eax) : : "rbx", "rcx", "rdx"); + asm volatile ("movq %%rbx,%1; cpuid; movq %1,%%rbx" + : "+a"(eax), "=&r"(scratch) : : "rcx", "rdx"); if (eax < 0xC0000001) return 0; /* Read the Centaur Extended Feature Flags */ eax = 0xC0000001; - asm volatile ("cpuid" - : "+a"(eax), "=d"(edx) : : "rbx", "rcx"); + asm volatile ("movq %%rbx,%2; cpuid; movq %2,%%rbx" + : "+a"(eax), "=d"(edx), "=&r"(scratch) : : "rcx"); /* Fill up some flags */ padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); @@ -536,12 +539,15 @@ padlock_verify_context(struct padlock_ci struct padlock_cipher_data *cdata, \ void *out, const void *inp) \ { void *iv; \ - asm volatile ( "leaq 16(%0),%%rdx\n" \ + size_t scratch; \ + asm volatile ( "movq %%rbx,%4\n" \ + " leaq 16(%0),%%rdx\n" \ " leaq 32(%0),%%rbx\n" \ rep_xcrypt "\n" \ - : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ + " movq %4,%%rbx" \ + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp), "=&r"(scratch) \ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ - : "rbx", "rdx", "cc", "memory"); \ + : "rdx", "cc", "memory"); \ return iv; \ } #endif