summaryrefslogtreecommitdiffstats
path: root/lib/libcrypto/rc4/rc4_skey.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libcrypto/rc4/rc4_skey.c')
-rw-r--r--lib/libcrypto/rc4/rc4_skey.c55
1 files changed, 43 insertions, 12 deletions
diff --git a/lib/libcrypto/rc4/rc4_skey.c b/lib/libcrypto/rc4/rc4_skey.c
index 60510624fd8..46b77ec3216 100644
--- a/lib/libcrypto/rc4/rc4_skey.c
+++ b/lib/libcrypto/rc4/rc4_skey.c
@@ -57,12 +57,10 @@
*/
#include <openssl/rc4.h>
-#include <openssl/crypto.h>
-#include <openssl/fips.h>
#include "rc4_locl.h"
#include <openssl/opensslv.h>
-const char *RC4_version="RC4" OPENSSL_VERSION_PTEXT;
+const char RC4_version[]="RC4" OPENSSL_VERSION_PTEXT;
const char *RC4_options(void)
{
@@ -87,7 +85,7 @@ const char *RC4_options(void)
* Date: Wed, 14 Sep 1994 06:35:31 GMT
*/
-FIPS_NON_FIPS_VCIPHER_Init(RC4)
+void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
{
register RC4_INT tmp;
register int id1,id2;
@@ -95,26 +93,59 @@ FIPS_NON_FIPS_VCIPHER_Init(RC4)
unsigned int i;
d= &(key->data[0]);
-
- for (i=0; i<256; i++)
- d[i]=i;
key->x = 0;
key->y = 0;
id1=id2=0;
-#define SK_LOOP(n) { \
+#define SK_LOOP(d,n) { \
tmp=d[(n)]; \
id2 = (data[id1] + tmp + id2) & 0xff; \
if (++id1 == len) id1=0; \
d[(n)]=d[id2]; \
d[id2]=tmp; }
+#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM)
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ defined(__INTEL__) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64)
+ if (sizeof(RC4_INT) > 1) {
+ /*
+ * Unlike all other x86 [and x86_64] implementations,
+ * Intel P4 core [including EM64T] was found to perform
+ * poorly with wider RC4_INT. Performance improvement
+ * for IA-32 hand-coded assembler turned out to be 2.8x
+ * if re-coded for RC4_CHAR! It's however inappropriate
+ * to just switch to RC4_CHAR for x86[_64], as non-P4
+ * implementations suffer from significant performance
+ * losses then, e.g. PIII exhibits >2x deterioration,
+ * and so does Opteron. In order to assure optimal
+ * all-round performance, we detect P4 at run-time by
+ * checking upon reserved bit 20 in CPU capability
+ * vector and set up compressed key schedule, which is
+ * recognized by correspondingly updated assembler
+ * module... Bit 20 is set up by OPENSSL_ia32_cpuid.
+ *
+ * <appro@fy.chalmers.se>
+ */
+ if (OPENSSL_ia32cap_P & (1<<20)) {
+ unsigned char *cp=(unsigned char *)d;
+
+ for (i=0;i<256;i++) cp[i]=i;
+ for (i=0;i<256;i++) SK_LOOP(cp,i);
+ /* mark schedule as compressed! */
+ d[256/sizeof(RC4_INT)]=-1;
+ return;
+ }
+ }
+# endif
+#endif
+ for (i=0; i < 256; i++) d[i]=i;
for (i=0; i < 256; i+=4)
{
- SK_LOOP(i+0);
- SK_LOOP(i+1);
- SK_LOOP(i+2);
- SK_LOOP(i+3);
+ SK_LOOP(d,i+0);
+ SK_LOOP(d,i+1);
+ SK_LOOP(d,i+2);
+ SK_LOOP(d,i+3);
}
}