From: hama Date: Sat, 28 Feb 2015 06:46:18 +0000 (+0100) Subject: MD5 optimizations X-Git-Url: https://gitweb.hamatoma.de/?a=commitdiff_plain;h=64b8bf671f8f3ac958e10272fe9e4ba7adf6cf57;p=crepublib MD5 optimizations --- diff --git a/base/ReTestUnit.cpp b/base/ReTestUnit.cpp index db5a4d1..bc90647 100644 --- a/base/ReTestUnit.cpp +++ b/base/ReTestUnit.cpp @@ -247,13 +247,11 @@ void ReTestUnit::createTestDir(){ m_tempDir.ensureLastChar(OS_SEPARATOR_CHAR); m_tempDir.append("retestunit", -1); struct stat info; - #ifdef __WIN32__ - #define ALLPERMS 0 - #endif int error = 0; if (lstat(m_tempDir.str(), &info) != 0) - if (_mkdir(m_tempDir.str(), ALLPERMS) != 0) - error = GetLastError(); + if (_mkdir(m_tempDir.str(), ALLPERMS) != 0){ + error = getLastOSError(); + } ReDirectory::deleteTree(m_tempDir.str(), false); m_tempDir.ensureLastChar(OS_SEPARATOR_CHAR); } @@ -320,7 +318,7 @@ const char* ReTestUnit::testDir(){ void ReTestUnit::createFile(const char* filename, const char* content){ FILE* fp = fopen(filename, "w"); if (fp == NULL){ - int error = GetLastError(); + int error = getLastOSError(); logF(true, "createFile(%d): %s", error, filename); } else if (content != NULL){ fwrite(content, strlen(content), 1, fp); diff --git a/base/rebase.hpp b/base/rebase.hpp index 61f67fc..3a6e606 100644 --- a/base/rebase.hpp +++ b/base/rebase.hpp @@ -20,6 +20,8 @@ #include #include #include +//#define __LITTLE_ENDIAN__ +#define __BIG_ENDIAN__ #if defined __linux__ @@ -45,6 +47,9 @@ # define _rmdir(path) rmdir(path) # define OS_SEPARATOR_CHAR '/' # define OS_SEPARATOR "/" +inline int getLastOSError(){ + return errno; +} #elif defined __WIN32__ # include # include @@ -63,6 +68,9 @@ # define ALLPERMS 0 //# define _mkdir(name, mode) (!CreateDirectory(name, NULL)) # define _mkdir(name, mode) _mkdir(name) + inline int getLastOSError(){ + return GetLastError(); + } #endif #define RE_TESTUNIT diff --git a/cunit/cuReMD5.cpp b/cunit/cuReMD5.cpp index aa484e4..2df9386 100644 --- a/cunit/cuReMD5.cpp +++ b/cunit/cuReMD5.cpp @@ -116,7 +116,12 @@ private: int duration = milliSecSince(start); if (duration == 0) duration = 1; - printf("size: %.1f MiByte count: %d rate: %.2f MiB/sec duration: %s\n", + printf("%s endian size: %.1f MiByte count: %d rate: %.2f MiB/sec duration: %s\n", +#if defined __LITTLE_ENDIAN__ + "little", +#else + "big", +#endif max / 1024.0 / 1024, passes, max / 1024.0 / 1024 * 1000 * passes / duration, ReByteBuffer("").appendMilliSec(duration).str()); diff --git a/cunit/testall.cpp b/cunit/testall.cpp index 37b07fb..f88d3d2 100644 --- a/cunit/testall.cpp +++ b/cunit/testall.cpp @@ -77,7 +77,7 @@ void testMath(){ void testAll(){ try { - testOs(); + testMath(); if (s_testAll){ testBase(); testString(); diff --git a/math/ReMD5.cpp b/math/ReMD5.cpp index 375a91c..89d43dc 100644 --- a/math/ReMD5.cpp +++ b/math/ReMD5.cpp @@ -156,7 +156,7 @@ const ReByteBuffer& ReMD5::hexDigest(){ /** * Processes a 512 bit block ("chunk"). */ -void ReMD5::processChunk(const uint8_t block[64]){ +void ReMD5::processChunk2(const uint8_t block[64]){ uint32_t M[16]; // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 #ifdef __LITTLE_ENDIAN__ @@ -192,24 +192,24 @@ void ReMD5::processChunk(const uint8_t block[64]){ printf("%2d: A: %08x B: %08x C: %08x D%08x\n", i, A, B, C, D); #endif if (i < 16){ +# define F1(B, C, D, sh) F = (B & C) | (~ B & D); g = sh // F := (B and C) or ((not B) and D) - F = (B & C) | (~ B & D); - g = i; + F1(B, C, D, i); } else if (i < 32){ // F := (D and B) or (C and (not D)) - F = (D & B) | (C & ~ D); // g := (5×i + 1) mod 16 - g = (5*i + 1) % 16; +# define F2(B, C, D, sh) F = (D & B) | (C & ~ D); g = sh + F2(B, C, D, (5*i + 1) % 16); } else if (i < 48){ // F := B xor C xor D - F = (B ^ C) ^ D; // g := (3×i + 5) mod 16 - g = (3*i + 5) % 16; +# define F3(B, C, D, sh) F = (B ^ C) ^ D; g = sh + F3(B, C, D, (3*i + 5) % 16); } else { // F := C xor (B or (not D)) - F = C ^ (B | ~ D); +# define F4(B, C, D, sh) F = C ^ (B | ~ D); g = sh // g := (7×i) mod 16 - g = (7*i) % 16; + F4(B, C, D, (7*i) % 16); } #if defined TRACE_MD5 if (i > 60) @@ -231,7 +231,274 @@ void ReMD5::processChunk(const uint8_t block[64]){ m_c0 += C; m_d0 += D; } +/** ---------------------- + */ +inline uint32_t F(uint32_t x, uint32_t y, uint32_t z) { + return x&y | ~x&z; +} + +inline uint32_t G(uint32_t x, uint32_t y, uint32_t z) { + return x&z | y&~z; +} + +inline uint32_t H(uint32_t x, uint32_t y, uint32_t z) { + return x^y^z; +} + +inline uint32_t I(uint32_t x, uint32_t y, uint32_t z) { + return y ^ (x | ~z); +} + +inline uint32_t rotate_left(uint32_t x, int n) { + return (x << n) | (x >> (32-n)); +} + +inline void FF(uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac) { +//#define TRACE_MD5 +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, a, b, c, d); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, ac, x, s); +#endif + a = rotate_left(a+ F(b,c,d) + x + ac, s) + b; +} + +inline void GG(uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, a, b, c, d); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, ac, x, s); +#endif + a = rotate_left(a + G(b,c,d) + x + ac, s) + b; +} +inline void HH(uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, a, b, c, d); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, ac, x, s); +#endif + a = rotate_left(a + H(b,c,d) + x + ac, s) + b; +} + +inline void II(uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, a, b, c, d); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, ac, x, s); +#endif + a = rotate_left(a + I(b,c,d) + x + ac, s) + b; +} +/** + * Processes a 512 bit block ("chunk"). + */ +void ReMD5::processChunk(const uint8_t block[64]){ + uint32_t M[16]; + // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 +#ifdef __LITTLE_ENDIAN__ + for (int ix = 0; ix < 16; ix++) + memcpy(&M[ix], block + ix * 4, 4); +#elif defined __BIG_ENDIAN__ + for (int ix = 0; ix < 16; ix++){ + uint32_t x = block[3]; + for (int jj = 2; jj >= 0; jj--){ + x = (x << 8) + block[jj]; + } + M[ix] = x; + block += 4; + } +#else +# error "missing __LITTLE_ENDIAN__ or __BIG_ENDIAN__" +#endif + //Initialize hash value for this chunk: + uint32_t A = m_a0; + uint32_t B = m_b0; + uint32_t C = m_c0; + uint32_t D = m_d0; + //Main loop: + + int F, g; + // B := B + leftrotate((A + F + K[i] + M[g]), s[i]) +# define F20(B, C, D, n) F = (B & C) | (~ B & D); g = n +// FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ +// F(uint32_t x, uint32_t y, uint32_t z) { +// return x&y | ~x&z; +// void MD5::FF(uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac) { +// a = rotate_left(a+ F(b,c,d) + x + ac, s) + b; +// D := C; +// C := B; +// B := B + leftrotate((A + F + K[i] + M[g]), s[i]) +// A := D(old) +// (D, C, B, A) = (C, B, B + leftrotate((A + F + K[i] + M[g]), s[i]), D) +// (A, B, C, D) = (D, B + leftrotate((A + F + K[i] + M[g]), s[i]), B, A) +# define X0(A, B, C, D, n) A = rotate_left(A + F0(A, B, C, D, n)) +# define F21(B, C, D, n) F = (D & B) | (C & ~ D); g = (5*n + 1) % 16 +# define F22(B, C, D, n) F = (B ^ C) ^ D; g = (5*n + 1) % 16 +# define F23(B, C, D, n) F = C ^ (B | ~ D); g = (3*n + 5) % 16 +#if 0 + F20(B, C, D, 0); + F20(B, C, D, 1); + F20(B, C, D, 2); + F20(B, C, D, 3); + F20(B, C, D, 4); + F20(B, C, D, 5); + F20(B, C, D, 6); + F20(B, C, D, 7); + F20(B, C, D, 8); + F20(B, C, D, 9); + F20(B, C, D, 10); + F20(B, C, D, 11); + F20(B, C, D, 12); + F20(B, C, D, 13); + F20(B, C, D, 14); + F20(B, C, D, 15); + + F21(B, C, D, 16); + F21(B, C, D, 17); + F21(B, C, D, 18); + F21(B, C, D, 19); + F21(B, C, D, 20); + F21(B, C, D, 21); + F21(B, C, D, 22); + F21(B, C, D, 23); + F21(B, C, D, 24); + F21(B, C, D, 25); + F21(B, C, D, 26); + F21(B, C, D, 27); + F21(B, C, D, 28); + F21(B, C, D, 29); + F21(B, C, D, 30); + F21(B, C, D, 31); + + F22(B, C, D, 32); + F22(B, C, D, 33); + F22(B, C, D, 34); + F22(B, C, D, 35); + F22(B, C, D, 36); + F22(B, C, D, 37); + F22(B, C, D, 38); + F22(B, C, D, 39); + F22(B, C, D, 40); + F22(B, C, D, 41); + F22(B, C, D, 42); + F22(B, C, D, 43); + F22(B, C, D, 44); + F22(B, C, D, 45); + F22(B, C, D, 46); + F22(B, C, D, 47); + + F23(B, C, D, 48); + F23(B, C, D, 49); + F23(B, C, D, 50); + F23(B, C, D, 51); + F23(B, C, D, 52); + F23(B, C, D, 53); + F23(B, C, D, 54); + F23(B, C, D, 55); + F23(B, C, D, 56); + F23(B, C, D, 57); + F23(B, C, D, 58); + F23(B, C, D, 59); + F23(B, C, D, 60); + F23(B, C, D, 61); + F23(B, C, D, 62); + F23(B, C, D, 63); +#endif +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + + /* Round 1 */ + FF (A, B, C, D, M[ 0], S11, 0xd76aa478); /* 1 */ + FF (D, A, B, C, M[ 1], S12, 0xe8c7b756); /* 2 */ + FF (C, D, A, B, M[ 2], S13, 0x242070db); /* 3 */ + FF (B, C, D, A, M[ 3], S14, 0xc1bdceee); /* 4 */ + FF (A, B, C, D, M[ 4], S11, 0xf57c0faf); /* 5 */ + FF (D, A, B, C, M[ 5], S12, 0x4787c62a); /* 6 */ + FF (C, D, A, B, M[ 6], S13, 0xa8304613); /* 7 */ + FF (B, C, D, A, M[ 7], S14, 0xfd469501); /* 8 */ + FF (A, B, C, D, M[ 8], S11, 0x698098d8); /* 9 */ + FF (D, A, B, C, M[ 9], S12, 0x8b44f7af); /* 10 */ + FF (C, D, A, B, M[10], S13, 0xffff5bb1); /* 11 */ + FF (B, C, D, A, M[11], S14, 0x895cd7be); /* 12 */ + FF (A, B, C, D, M[12], S11, 0x6b901122); /* 13 */ + FF (D, A, B, C, M[13], S12, 0xfd987193); /* 14 */ + FF (C, D, A, B, M[14], S13, 0xa679438e); /* 15 */ + FF (B, C, D, A, M[15], S14, 0x49b40821); /* 16 */ + + /* Round 2 */ + GG (A, B, C, D, M[ 1], S21, 0xf61e2562); /* 17 */ + GG (D, A, B, C, M[ 6], S22, 0xc040b340); /* 18 */ + GG (C, D, A, B, M[11], S23, 0x265e5a51); /* 19 */ + GG (B, C, D, A, M[ 0], S24, 0xe9b6c7aa); /* 20 */ + GG (A, B, C, D, M[ 5], S21, 0xd62f105d); /* 21 */ + GG (D, A, B, C, M[10], S22, 0x2441453); /* 22 */ + GG (C, D, A, B, M[15], S23, 0xd8a1e681); /* 23 */ + GG (B, C, D, A, M[ 4], S24, 0xe7d3fbc8); /* 24 */ + GG (A, B, C, D, M[ 9], S21, 0x21e1cde6); /* 25 */ + GG (D, A, B, C, M[14], S22, 0xc33707d6); /* 26 */ + GG (C, D, A, B, M[ 3], S23, 0xf4d50d87); /* 27 */ + GG (B, C, D, A, M[ 8], S24, 0x455a14ed); /* 28 */ + GG (A, B, C, D, M[13], S21, 0xa9e3e905); /* 29 */ + GG (D, A, B, C, M[ 2], S22, 0xfcefa3f8); /* 30 */ + GG (C, D, A, B, M[ 7], S23, 0x676f02d9); /* 31 */ + GG (B, C, D, A, M[12], S24, 0x8d2a4c8a); /* 32 */ + + /* Round 3 */ + HH (A, B, C, D, M[ 5], S31, 0xfffa3942); /* 33 */ + HH (D, A, B, C, M[ 8], S32, 0x8771f681); /* 34 */ + HH (C, D, A, B, M[11], S33, 0x6d9d6122); /* 35 */ + HH (B, C, D, A, M[14], S34, 0xfde5380c); /* 36 */ + HH (A, B, C, D, M[ 1], S31, 0xa4beea44); /* 37 */ + HH (D, A, B, C, M[ 4], S32, 0x4bdecfa9); /* 38 */ + HH (C, D, A, B, M[ 7], S33, 0xf6bb4b60); /* 39 */ + HH (B, C, D, A, M[10], S34, 0xbebfbc70); /* 40 */ + HH (A, B, C, D, M[13], S31, 0x289b7ec6); /* 41 */ + HH (D, A, B, C, M[ 0], S32, 0xeaa127fa); /* 42 */ + HH (C, D, A, B, M[ 3], S33, 0xd4ef3085); /* 43 */ + HH (B, C, D, A, M[ 6], S34, 0x4881d05); /* 44 */ + HH (A, B, C, D, M[ 9], S31, 0xd9d4d039); /* 45 */ + HH (D, A, B, C, M[12], S32, 0xe6db99e5); /* 46 */ + HH (C, D, A, B, M[15], S33, 0x1fa27cf8); /* 47 */ + HH (B, C, D, A, M[ 2], S34, 0xc4ac5665); /* 48 */ + + /* Round 4 */ + II (A, B, C, D, M[ 0], S41, 0xf4292244); /* 49 */ + II (D, A, B, C, M[ 7], S42, 0x432aff97); /* 50 */ + II (C, D, A, B, M[14], S43, 0xab9423a7); /* 51 */ + II (B, C, D, A, M[ 5], S44, 0xfc93a039); /* 52 */ + II (A, B, C, D, M[12], S41, 0x655b59c3); /* 53 */ + II (D, A, B, C, M[ 3], S42, 0x8f0ccc92); /* 54 */ + II (C, D, A, B, M[10], S43, 0xffeff47d); /* 55 */ + II (B, C, D, A, M[ 1], S44, 0x85845dd1); /* 56 */ + II (A, B, C, D, M[ 8], S41, 0x6fa87e4f); /* 57 */ + II (D, A, B, C, M[15], S42, 0xfe2ce6e0); /* 58 */ + II (C, D, A, B, M[ 6], S43, 0xa3014314); /* 59 */ + II (B, C, D, A, M[13], S44, 0x4e0811a1); /* 60 */ + II (A, B, C, D, M[ 4], S41, 0xf7537e82); /* 61 */ + II (D, A, B, C, M[11], S42, 0xbd3af235); /* 62 */ + II (C, D, A, B, M[ 2], S43, 0x2ad7d2bb); /* 63 */ + II (B, C, D, A, M[ 9], S44, 0xeb86d391); /* 64 */ + + //Add this chunk's hash to result so far: + m_a0 += A; + m_b0 += B; + m_c0 += C; + m_d0 += D; +} /** * Prepares the instance for a new checksum. */ diff --git a/math/ReMD5.hpp b/math/ReMD5.hpp index 1294bc3..e02bb35 100644 --- a/math/ReMD5.hpp +++ b/math/ReMD5.hpp @@ -23,6 +23,7 @@ public: const ReByteBuffer& hexDigest(); void update(const uint8_t* block, int blockLength); void processChunk(const uint8_t block[64]); + void processChunk2(const uint8_t block[64]); void reset(); private: void finalize();