From d1c98ea383a3c3e15690c3609ce47d1e037ce048 Mon Sep 17 00:00:00 2001 From: Hamatoma Date: Sat, 28 Feb 2015 18:55:32 +0100 Subject: [PATCH] MD5 optimization works with VS10 too --- base/ReByteBuffer.hpp | 42 +-- cunit/cuReMD5.cpp | 2 +- math/ReMD5.cpp | 833 +++++++++++++++++++++--------------------- 3 files changed, 445 insertions(+), 432 deletions(-) diff --git a/base/ReByteBuffer.hpp b/base/ReByteBuffer.hpp index ac2fd52..63363f6 100644 --- a/base/ReByteBuffer.hpp +++ b/base/ReByteBuffer.hpp @@ -11,7 +11,7 @@ #define REBYTEBUFFER_H_ #define PRIMARY_BUFFER_SIZE 513 - +#define INLINE /** @brief An efficient dynamic memory buffer. * * Implements a very efficient dynamic byte sequence. @@ -51,7 +51,7 @@ public: * @param aChar character to append * @return *this (for chaining) */ - inline ReByteBuffer& appendChar(char aChar){ + INLINE ReByteBuffer& appendChar(char aChar){ setLength(m_length + 1); m_buffer[m_length - 1] = aChar; return *this; @@ -61,7 +61,7 @@ public: * @param count number of times to append * @return *this (for chaining) */ - inline ReByteBuffer& appendChar(char aChar, int count){ + INLINE ReByteBuffer& appendChar(char aChar, int count){ if (count > 0){ size_t oldLength = m_length; setLength(m_length + count); @@ -85,27 +85,27 @@ public: * @param index The index of the wanted byte. * @return 0: Wrong index. Otherwise: The byte from the wanted position. */ - inline Byte at(size_t index) const { + INLINE Byte at(size_t index) const { return index >= m_length ? 0 : m_buffer[index]; } int atoi(size_t start = 0, int end = -1) const; /** @brief Returns the buffer (permitting modifying). * @return The internal used buffer. */ - inline Byte* buffer() const{ + INLINE Byte* buffer() const{ return m_buffer; } /**@brief Returns the current size of the internal buffer. * @return The current size of the internal buffer. */ - inline size_t capacity() const{ + INLINE size_t capacity() const{ return m_capacity; } int count(const char* toCount, size_t lengthToCount = -1); /**@brief Returns the minimum allocation unit. * @return The minimum of bytes to allocate. */ - inline size_t delta() const{ + INLINE size_t delta() const{ return m_delta; } bool endsWith(const Byte* tail, size_t tailLength = -1, @@ -115,7 +115,7 @@ public: * @param aChar the character which will be the last * @return *this (for chaining) */ - inline ReByteBuffer& ensureLastChar(char aChar){ + INLINE ReByteBuffer& ensureLastChar(char aChar){ if (lastChar() != aChar) appendChar(aChar); return *this; @@ -126,7 +126,7 @@ public: * @param ignoreCase true: case insensitive comparison * @return true: the buffer's contents are equal */ - inline bool equals(const char* data, size_t length, bool ignoreCase = false) const{ + INLINE bool equals(const char* data, size_t length, bool ignoreCase = false) const{ if (length == (size_t) -1) length = strlen(data); bool rc = length == m_length @@ -139,7 +139,7 @@ public: * @param ignoreCase true: case insensitive comparison * @return true: the buffer's contents are equal */ - inline bool equals(const ReByteBuffer& buffer, bool ignoreCase = false) const{ + INLINE bool equals(const ReByteBuffer& buffer, bool ignoreCase = false) const{ bool rc = buffer.length() == m_length && (! ignoreCase && _memcmp(buffer.str(), m_buffer, m_length) == 0 || ignoreCase && _strnicmp(buffer.str(), m_buffer, m_length) == 0); @@ -152,7 +152,7 @@ public: * @param start The first index for searching. * @return -1: not found. Otherwise: The index of the first occurrence. */ - inline int indexOf(Byte toFind, int start = 0) const { + INLINE int indexOf(Byte toFind, int start = 0) const { while ((size_t) start < m_length) if (m_buffer[start++] == toFind) return start - 1; @@ -167,27 +167,27 @@ public: * * @return true: Success. false: ix out of range. */ - inline bool insert(size_t ix, const Byte* source, size_t length){ + INLINE bool insert(size_t ix, const Byte* source, size_t length){ return splice(ix, 0, source, length); } /** Returns the last character. * @return '\0': empty buffer
* otherwise: the last character */ - inline char lastChar() const { + INLINE char lastChar() const { return m_length == 0 ? '\0' : m_buffer[m_length - 1]; } /**@brief Returns the length of the buffer (the count of used bytes). * @return The count of the allocated bytes in the internal buffer. */ - inline size_t length() const{ + INLINE size_t length() const{ return m_length; } /** Sets the length to a lower value. * @param decrement the count to reduce the length * @return The count of the allocated bytes in the internal buffer. */ - inline ReByteBuffer& reduceLength(int decrement = 1){ + INLINE ReByteBuffer& reduceLength(int decrement = 1){ if (decrement > 0 && m_length > 0){ if (decrement > (int) m_length) decrement = m_length; @@ -203,7 +203,7 @@ public: * * @return true: Success. false: ix out of range. */ - inline bool remove(size_t ix, size_t deletedLength){ + INLINE bool remove(size_t ix, size_t deletedLength){ return splice(ix, deletedLength, NULL, 0); } ReByteBuffer& replaceAll(const Byte* toFind, size_t toFindLength, @@ -212,7 +212,7 @@ public: * @param aChar character to remove * @return *this (for chaining) */ - inline ReByteBuffer& removeLastChar(char aChar){ + INLINE ReByteBuffer& removeLastChar(char aChar){ if (m_length > 0 && m_buffer[m_length - 1] == aChar) setLength(m_length - 1); return *this; @@ -222,7 +222,7 @@ public: * @param start The first index for searching. If < 0: relative to the end. * @return -1: not found. Otherwise: The index of the last occurrence. */ - inline int rindexOf(Byte toFind, int start = -1) const { + INLINE int rindexOf(Byte toFind, int start = -1) const { if (start < 0) start = m_length + start; while (start >= 0) @@ -237,14 +237,14 @@ public: * @param length The length of source. * @return *this (for chaining). */ - inline ReByteBuffer& set(const Byte* source, size_t length){ + INLINE ReByteBuffer& set(const Byte* source, size_t length){ return setLength(0).append(source, length); } /** @brief Sets the content of the buffer by copying a byte sequence. * @param source the source to copy. * @return *this (for chaining). */ - inline ReByteBuffer& set(const ReByteBuffer& source){ + INLINE ReByteBuffer& set(const ReByteBuffer& source){ return setLength(0).append(source); } void setDelta(int delta); @@ -258,7 +258,7 @@ public: * This is exactly the same result as from getBuffer(). * @return The internal used buffer. */ - inline const char* str() const{ + INLINE const char* str() const{ return (const char*) m_buffer; } protected: diff --git a/cunit/cuReMD5.cpp b/cunit/cuReMD5.cpp index 04fb2bd..36af4cc 100644 --- a/cunit/cuReMD5.cpp +++ b/cunit/cuReMD5.cpp @@ -92,7 +92,7 @@ private: buffer.setLength(max - 1); buffer.fill('x', max - 1); ReMD5 md5; - int passes = 2; + int passes = 20; int64_t start = timer(); for (int ii = 0; ii < passes; ii++){ md5.reset(); diff --git a/math/ReMD5.cpp b/math/ReMD5.cpp index 00ed16e..4bc855b 100644 --- a/math/ReMD5.cpp +++ b/math/ReMD5.cpp @@ -1,351 +1,364 @@ -/* - * ReMD5.cpp - * - * Created on: 30.01.2015 - * - */ - -#include "base/rebase.hpp" -#include "math/remath.hpp" - -#define __LITTLE_ENDIAN__ - -const int ReMD5::m_s[64] = { - 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, - 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, - 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, - 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 -}; -static int s_ix = 0; -// for x in [1..64] : int(2**32 * sin(x)) -const uint32_t ReMD5::m_K[64] = { - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, - 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, - 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, - 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, - 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, - 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, - 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, - 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, - 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 -}; - -/** - * Constructor. - */ -ReMD5::ReMD5() : - //m_digest - // m_hexDigest; - // m_waiting[64]; - m_lengthWaiting(0), - m_length(0), - m_a0(0x67452301), - m_b0(0xefcdab89), - m_c0(0x98badcfe), - m_d0(0x10325476), - m_finalized(false) -{ -} - -/** - * Destructor. - */ -ReMD5::~ReMD5() { -} - -/** - * Returns the binary digest value. - * - * @return the binary digest (16 byte array) - */ -const uint8_t* ReMD5::digest(){ - if (! m_finalized){ - finalize(); - } - return m_digest; -} - -/** - * Finalizes the digest. - * - * Handles the rest block (< 64 byte) and append a special tail: a "1" bit - * and the length of the total input length (in bits). - * - * @param block the rest input (incomplete chunk) - * @param blockLength the length of the block: 0..63 - */ -void ReMD5::finalize(){ - uint8_t* block = m_waiting; - int blockLength = m_lengthWaiting; - // append "1" bit to message - // Notice: the input bytes are considered as bits strings, - // where the first bit is the most significant bit of the byte. - block[blockLength++] = 0x80; - //Pre-processing: padding with zeros - //append "0" bit until message length in bits ≡ 448 (mod 512) - // fill the rest of the chunk with '\0'. - // the last 8 bytes is set to the length in bits (length in bytes * 8) - int restLength = (m_length + 1) % 64; - // 0 -> 56, 1 -> 55, 2 -> 54, ... 55 -> 1, 56 -> 0, - // 57 -> 63, 58 -> 62, ... 63 -> 57 - int zeros = restLength <= 56 ? 56 - restLength : 120 - restLength; - memset(block + blockLength, 0, zeros); - blockLength += zeros; - //append original length in bits mod (2 pow 64) to message - uint64_t lengthBits = 8LL * m_length; -#if defined __LITTLE_ENDIAN__ - memcpy(block + blockLength, &lengthBits, 8); - blockLength += 8; -#else - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; - lengthBits >>= 8; - block[blockLength++] = lengthBits; -#endif - processChunk(block); - if (blockLength > 64) - processChunk(block + 64); -#if defined __LITTLE_ENDIAN__ - memcpy(m_digest, &m_a0, 4); - memcpy(m_digest + 4, &m_b0, 4); - memcpy(m_digest + 8, &m_c0, 4); - memcpy(m_digest + 12, &m_d0, 4); -#else -#define oneWord(word, ix) m_digest[ix] = word; word >>= 8; \ - m_digest[ix + 1] = word; word >>= 8; m_digest[ix + 2] = word; word >>= 8; \ - m_digest[ix + 3] = word - oneWord(m_a0, 0); - oneWord(m_b0, 4); - oneWord(m_c0, 8); - oneWord(m_d0, 12); -#endif -} - -/** - * Returns the binary digest value. - * - * @return the binary digest (16 byte array) - */ -const ReByteBuffer& ReMD5::hexDigest(){ - if (m_hexDigest.length() == 0){ - digest(); - for (int ix = 0; ix < 16; ix++){ - m_hexDigest.appendInt(m_digest[ix], "%02x"); - } - } - return m_hexDigest; -} - -/** - * Processes a 512 bit block ("chunk"). - */ -void ReMD5::processChunk2(const uint8_t block[64]){ - uint32_t M[16]; - // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 -#ifdef __LITTLE_ENDIAN__ - for (int ix = 0; ix < 16; ix++){ - memcpy(&M[ix], block + ix * 4, 4); - //M[ix] = * (uint32_t*) (block + ix*4); - } -#elif defined __BIG_ENDIAN__ - for (int ix = 0; ix < 16; ix++){ - uint32_t x = block[3]; - for (int jj = 2; jj >= 0; jj--){ - x = (x << 8) + block[jj]; - } - M[ix] = x; - block += 4; - } -#else -# error "missing __LITTLE_ENDIAN__ or __BIG_ENDIAN__" -#endif - //Initialize hash value for this chunk: - uint32_t A = m_a0; - uint32_t B = m_b0; - uint32_t C = m_c0; - uint32_t D = m_d0; - //Main loop: - - int F, g; -//#define TRACE_MD5 -#if defined TRACE_MD5 - printf("neu: (%s)\n", block); -#endif - for (int i = 0; i < 64; i++){ -#if defined TRACE_MD5 - if (i < 8) - printf("%2d: A: %08x B: %08x C: %08x D%08x\n", i, A, B, C, D); -#endif - if (i < 16){ -# define F1(B, C, D) ((B & C) | (~ B & D)) - // F := (B and C) or ((not B) and D) - F = F1(B, C, D); - g = i; - } else if (i < 32){ - // F := (D and B) or (C and (not D)) - // g := (5×i + 1) mod 16 -# define F2(B, C, D) ((D & B) | (C & ~ D)) - F = F2(B, C, D); - g = (5*i + 1) % 16; - } else if (i < 48){ - // F := B xor C xor D - // g := (3×i + 5) mod 16 -# define F3(B, C, D) ((B ^ C) ^ D) - F = F3(B, C, D); - g = (3*i + 5) % 16; - } else { - // F := C xor (B or (not D)) -# define F4(B, C, D) (C ^ (B | ~ D)) - // g := (7×i) mod 16 - F = F4(B, C, D); - g = (7*i) % 16; - } -#if defined TRACE_MD5 - if (i < 8) - printf(" K[%2d]: %08x M[%2d]: %08x shift: %02d\n", - i, m_K[i], g, M[g], m_s[i]); -#endif - uint32_t dTemp = D; - D = C; - C = B; - // B := B + leftrotate((A + F + K[i] + M[g]), s[i]) - uint32_t x = (A + F + m_K[i] + M[g]); - int shift = m_s[i]; - B += (x << shift) | (x >> (32 - shift)); - A = dTemp; - } - //Add this chunk's hash to result so far: - m_a0 += A; - m_b0 += B; - m_c0 += C; - m_d0 += D; -} -/** ---------------------- - */ - -inline uint32_t rotate_left(uint32_t x, int n) { - return (x << n) | (x >> (32-n)); -} - -inline void X1(uint32_t &var, uint32_t x, uint32_t y, uint32_t z, - uint32_t data, uint32_t aConst, uint32_t shift) { -//#define TRACE_MD5 -#if defined TRACE_MD5 - printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); - printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", - s_ix - 1, aConst, data, shift); -#endif - var = rotate_left(var+ F1(x, y, z) + data + aConst, shift) + x; -} - -inline void X2(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, - uint32_t data, uint32_t aConst, uint32_t shift) { -#if defined TRACE_MD5 - printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); - printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", - s_ix - 1, aConst, data, shift); -#endif - var = rotate_left(var + F2(x, y, z) + data + aConst, shift) + x; -} - -inline void X3(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, - uint32_t data, uint32_t aConst, uint32_t shift) { -#if defined TRACE_MD5 - printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); - printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", - s_ix - 1, aConst, data, shift); -#endif - var = rotate_left(var + F3(x, y, z) + data + aConst, shift) + x; -} - -inline void X4(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, - uint32_t data, uint32_t aConst, uint32_t shift) { -#if defined TRACE_MD5 - printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); - printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", - s_ix - 1, aConst, data, shift); -#endif - var = rotate_left(var + F4(x, y, z) + data + aConst, shift) + x; -} -/** - * Processes a 512 bit block ("chunk"). - */ -void ReMD5::processChunk(const uint8_t block[64]){ - uint32_t M[16]; - // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 -#ifdef __LITTLE_ENDIAN__ - for (int ix = 0; ix < 16; ix++){ - //memcpy(&M[ix], block + ix * 4, 4); - M[ix] = * (uint32_t*) (block + ix*4); - } -#elif defined __BIG_ENDIAN__ - for (int ix = 0; ix < 16; ix++){ - uint32_t x = block[3]; - for (int jj = 2; jj >= 0; jj--){ - x = (x << 8) + block[jj]; - } - M[ix] = x; - block += 4; - } -#else -# error "missing __LITTLE_ENDIAN__ or __BIG_ENDIAN__" -#endif - //Initialize hash value for this chunk: - uint32_t A = m_a0; - uint32_t B = m_b0; - uint32_t C = m_c0; - uint32_t D = m_d0; -#if 0 - // B := B + leftrotate((A + F + K[i] + M[g]), s[i]) - // D := C; - // C := B; - // B := B + leftrotate((A + F + K[i] + M[g]), s[i]) - // A := D(old) - // (D, C, B, A) = (C, B, B + leftrotate((A + F + K[i] + M[g]), s[i]), D) - // ==> (A, B, C, D) = (D, B + leftrotate((A + F + K[i] + M[g]), s[i]), B, A) - // The unrolled loop: - //i = g = 0; - (A, B, C, D) = (D, B + leftrotate((A + F1(B, C, D) + K[0] + M[0]), s[0]), B, A) - // only one var must be calculated, the other 3 are exchanged only. - //i = g = 1; - (A, B, C, D) = (D, B + leftrotate((A + F1(B, C, D) + K[1] + M[1]), s[1]), B, A) - //i = g = 2; - (A, B, C, D) = (D, B + leftrotate((A + F1(B, C, D) + K[2] + M[2]), s[2]), B, A) - //i = g = 3; - (A, B, C, D) = (D, B + leftrotate((A + F1(B, C, D) + K[3] + M[3]), s[3]), B, A) - // in each of the 4 statements another variable (of A, B, C and D) will be calculated - // so we do not exchange in each step, we calculate in the end position - // we define a function to do this: - void X1(uint32_t &var, uint32_t x, uint32_t y, uint32_t z, uint32_t data, uint32_t shift, uint32_t aConst){ - var = rotate_left(var+ F1(x, y, z) + data + aConst, shift) + x; - } - // note: the input parameter of of X1 must respect the exchange: - // A -> D -> C -> B ... - X1(A, B, C, D, M[0], K[0], s[0]); - X1(D, A, B, C, M[1], K[1], s[1]); - X1(C, D, A, B, M[2], K[2], s[2]); - X1(B, C, D, A, M[3], K[3], s[3]); - // ... -#endif - /* Round 1 */ +/* + * ReMD5.cpp + * + * Created on: 30.01.2015 + * + */ + +#include "base/rebase.hpp" +#include "math/remath.hpp" + +#define __LITTLE_ENDIAN__ + +const int ReMD5::m_s[64] = { + 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 +}; +static int s_ix = 0; +// for x in [1..64] : int(2**32 * sin(x)) +const uint32_t ReMD5::m_K[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, + 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, + 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, + 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 +}; + +/** + * Constructor. + */ +ReMD5::ReMD5() : + //m_digest + // m_hexDigest; + // m_waiting[64]; + m_lengthWaiting(0), + m_length(0), + m_a0(0x67452301), + m_b0(0xefcdab89), + m_c0(0x98badcfe), + m_d0(0x10325476), + m_finalized(false) +{ +} + +/** + * Destructor. + */ +ReMD5::~ReMD5() { +} + +/** + * Returns the binary digest value. + * + * @return the binary digest (16 byte array) + */ +const uint8_t* ReMD5::digest(){ + if (! m_finalized){ + finalize(); + } + return m_digest; +} + +/** + * Finalizes the digest. + * + * Handles the rest block (< 64 byte) and append a special tail: a "1" bit + * and the length of the total input length (in bits). + * + * @param block the rest input (incomplete chunk) + * @param blockLength the length of the block: 0..63 + */ +void ReMD5::finalize(){ + uint8_t* block = m_waiting; + int blockLength = m_lengthWaiting; + // append "1" bit to message + // Notice: the input bytes are considered as bits strings, + // where the first bit is the most significant bit of the byte. + block[blockLength++] = 0x80; + //Pre-processing: padding with zeros + //append "0" bit until message length in bits ≡ 448 (mod 512) + // fill the rest of the chunk with '\0'. + // the last 8 bytes is set to the length in bits (length in bytes * 8) + int restLength = (m_length + 1) % 64; + // 0 -> 56, 1 -> 55, 2 -> 54, ... 55 -> 1, 56 -> 0, + // 57 -> 63, 58 -> 62, ... 63 -> 57 + int zeros = restLength <= 56 ? 56 - restLength : 120 - restLength; + memset(block + blockLength, 0, zeros); + blockLength += zeros; + //append original length in bits mod (2 pow 64) to message + uint64_t lengthBits = 8LL * m_length; +#if defined __LITTLE_ENDIAN__ + memcpy(block + blockLength, &lengthBits, 8); + blockLength += 8; +#else + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; + lengthBits >>= 8; + block[blockLength++] = lengthBits; +#endif + processChunk(block); + if (blockLength > 64) + processChunk(block + 64); +#if defined __LITTLE_ENDIAN__ + memcpy(m_digest, &m_a0, 4); + memcpy(m_digest + 4, &m_b0, 4); + memcpy(m_digest + 8, &m_c0, 4); + memcpy(m_digest + 12, &m_d0, 4); +#else +#define oneWord(word, ix) m_digest[ix] = word; word >>= 8; \ + m_digest[ix + 1] = word; word >>= 8; m_digest[ix + 2] = word; word >>= 8; \ + m_digest[ix + 3] = word + oneWord(m_a0, 0); + oneWord(m_b0, 4); + oneWord(m_c0, 8); + oneWord(m_d0, 12); +#endif +} + +/** + * Returns the binary digest value. + * + * @return the binary digest (16 byte array) + */ +const ReByteBuffer& ReMD5::hexDigest(){ + if (m_hexDigest.length() == 0){ + digest(); + for (int ix = 0; ix < 16; ix++){ + m_hexDigest.appendInt(m_digest[ix], "%02x"); + } + } + return m_hexDigest; +} + +/** + * Processes a 512 bit block ("chunk"). + * + * This method is a direct programming of the algorithm described in wikipedia. + */ +void ReMD5::processChunk2(const uint8_t block[64]){ + uint32_t M[16]; + // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 + for (int ix = 0; ix < 16; ix++){ + uint32_t x = block[3]; + for (int jj = 2; jj >= 0; jj--){ + x = (x << 8) + block[jj]; + } + M[ix] = x; + block += 4; + } + //Initialize hash value for this chunk: + uint32_t A = m_a0; + uint32_t B = m_b0; + uint32_t C = m_c0; + uint32_t D = m_d0; + //Main loop: + + int F, g; +//#define TRACE_MD5 +#if defined TRACE_MD5 + printf("neu: (%s)\n", block); +#endif + for (int i = 0; i < 64; i++){ +#if defined TRACE_MD5 + if (i < 8) + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", i, A, B, C, D); +#endif + if (i < 16){ +# define F1(B, C, D) ((B & C) | (~ B & D)) + // F := (B and C) or ((not B) and D) + F = F1(B, C, D); + g = i; + } else if (i < 32){ + // F := (D and B) or (C and (not D)) + // g := (5×i + 1) mod 16 +# define F2(B, C, D) ((D & B) | (C & ~ D)) + F = F2(B, C, D); + g = (5*i + 1) % 16; + } else if (i < 48){ + // F := B xor C xor D + // g := (3×i + 5) mod 16 +# define F3(B, C, D) ((B ^ C) ^ D) + F = F3(B, C, D); + g = (3*i + 5) % 16; + } else { + // F := C xor (B or (not D)) +# define F4(B, C, D) (C ^ (B | ~ D)) + // g := (7×i) mod 16 + F = F4(B, C, D); + g = (7*i) % 16; + } +#if defined TRACE_MD5 + if (i < 8) + printf(" K[%2d]: %08x M[%2d]: %08x shift: %02d\n", + i, m_K[i], g, M[g], m_s[i]); +#endif + uint32_t dTemp = D; + D = C; + C = B; + // B := B + leftrotate((A + F + K[i] + M[g]), s[i]) + uint32_t x = (A + F + m_K[i] + M[g]); + int shift = m_s[i]; + B += (x << shift) | (x >> (32 - shift)); + A = dTemp; + } + //Add this chunk's hash to result so far: + m_a0 += A; + m_b0 += B; + m_c0 += C; + m_d0 += D; +} +/** ---------------------- + */ +#if defined OPTIMIZER_WORKS_GREAT +inline void rotate_left_and_add(uint32_t& rc, uint32_t data, int shift, uint32_t term) { + rc = ((data << shift) | (data >> (32-shift))) + term; +} +inline void X1(uint32_t &var, uint32_t x, uint32_t y, uint32_t z, + uint32_t data, uint32_t aConst, uint32_t shift) { +//#define TRACE_MD5 +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, aConst, data, shift); +#endif + rotate_left_and_add(var, var + F1(x, y, z) + data + aConst, shift, x); +} +inline void X2(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, + uint32_t data, uint32_t aConst, uint32_t shift) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, aConst, data, shift); +#endif + rotate_left_and_add(var, var + F2(x, y, z) + data + aConst, shift, x); +} + +inline void X3(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, + uint32_t data, uint32_t aConst, uint32_t shift) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, aConst, data, shift); +#endif + rotate_left_and_add(var, var + F3(x, y, z) + data + aConst, shift, x); +} + +inline void X4(uint32_t& var, uint32_t x, uint32_t y, uint32_t z, + uint32_t data, uint32_t aConst, uint32_t shift) { +#if defined TRACE_MD5 + printf("%2d: A: %08x B: %08x C: %08x D%08x\n", s_ix++ % 16, var, x, y, z); + printf(" K[%2d]: %08x M[?]: %08x shift: %02d\n", + s_ix - 1, aConst, data, shift); +#endif + rotate_left_and_add(var, var + F4(x, y, z) + data + aConst, shift, x); +} +#else +#define rotate_left_and_add(var, data, shift, term) { \ + uint32_t val = data; \ + var = ((val << shift) | (val >> (32-shift))) + term; \ +} +#define X1(var, x, y, z, data, aConst, shift) \ + rotate_left_and_add(var, var + F1(x, y, z) + data + aConst, shift, x) +#define X2(var, x, y, z, data, aConst, shift) \ + rotate_left_and_add(var, var + F2(x, y, z) + data + aConst, shift, x) +#define X3(var, x, y, z, data, aConst, shift) \ + rotate_left_and_add(var, var + F3(x, y, z) + data + aConst, shift, x) +#define X4(var, x, y, z, data, aConst, shift) \ + rotate_left_and_add(var, var + F4(x, y, z) + data + aConst, shift, x) +#endif /* OPTIMIZER_WORKS_GREAT */ + +/** + * Processes a 512 bit block ("chunk"). + * + * This is a optimized version, derived from the method above. + * We unroll the loop, this brings speed with factor 2. + *
+ * B := B + leftrotate((A + F + K[i] + M[g]), s[i])
+ *	D := C;
+ *	C := B;
+ *  B := B + leftrotate((A + F + K[i] + M[g]), s[i])
+ *  A := D(old)
+ * (D, C, B, A) = (C, B, B + leftrotate((A + F + K[i] + M[g]), s[i]), D)
+ * ==> (A, B, C, D) = (D, B + leftrotate((A + F + K[i] + M[g]), s[i]), B, A)
+ * The unrolled loop:
+ * i = g = 0;
+ * (A, B, C, D) = (D, B + leftrotate((A +  F1(B, C, D) + K[0] + M[0]), s[0]), B, A)
+ * only one var must be calculated, the other 3 are exchanged only.
+ * i = g = 1;
+ * (A, B, C, D) = (D, B + leftrotate((A +  F1(B, C, D) + K[1] + M[1]), s[1]), B, A)
+ * i = g = 2;
+ * (A, B, C, D) = (D, B + leftrotate((A +  F1(B, C, D) + K[2] + M[2]), s[2]), B, A)
+ * i = g = 3;
+ * (A, B, C, D) = (D, B + leftrotate((A +  F1(B, C, D) + K[3] + M[3]), s[3]), B, A)
+ * in each of the 4 statements another variable (of A, B, C and D) will be calculated
+ * so we do not exchange in each step, we calculate in the end position
+ * we define a function to do this:
+ * void X1(uint32_t &var, uint32_t x, uint32_t y, uint32_t z, uint32_t data, uint32_t shift, uint32_t aConst){
+ *	 var = rotate_left(var+ F1(x, y, z) + data + aConst, shift) + x;
+ * }
+ * Note: the input parameter of X1 must respect the exchange:
+ * A -> D -> C -> B ...
+ * X1(A, B, C, D,  M[0], K[0], s[0]);
+ * X1(D, A, B, C,  M[1], K[1], s[1]);
+ * X1(C, D, A, B,  M[2], K[2], s[2]);
+ * X1(B, C, D, A,  M[3], K[3], s[3]);
+ * ...
+ * 
+ */ +void ReMD5::processChunk(const uint8_t block[64]){ + uint32_t M[16]; + // break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15 +#ifdef __LITTLE_ENDIAN__ + for (int ix = 0; ix < 16; ix++){ + //memcpy(&M[ix], block + ix * 4, 4); + M[ix] = * (uint32_t*) (block + ix*4); + } +#elif defined __BIG_ENDIAN__ + for (int ix = 0; ix < 16; ix++){ + uint32_t x = block[3]; + for (int jj = 2; jj >= 0; jj--){ + x = (x << 8) + block[jj]; + } + M[ix] = x; + block += 4; + } +#else +# error "missing __LITTLE_ENDIAN__ or __BIG_ENDIAN__" +#endif + //Initialize hash value for this chunk: + uint32_t A = m_a0; + uint32_t B = m_b0; + uint32_t C = m_c0; + uint32_t D = m_d0; +#if defined NeverAndNeverAndNeverAgain + // Derivation of the optimization: + +#endif + /* Round 1 */ X1(A, B, C, D, M[ 0], 0xd76aa478, 7); X1(D, A, B, C, M[ 1], 0xe8c7b756, 12); X1(C, D, A, B, M[ 2], 0x242070db, 17); @@ -369,7 +382,7 @@ void ReMD5::processChunk(const uint8_t block[64]){ X2 (C, D, A, B, M[11], 0x265e5a51, 14); X2 (B, C, D, A, M[ 0], 0xe9b6c7aa, 20); X2 (A, B, C, D, M[ 5], 0xd62f105d, 5); - X2 (D, A, B, C, M[10], 0x2441453, 9); + X2 (D, A, B, C, M[10], 0x02441453, 9); X2 (C, D, A, B, M[15], 0xd8a1e681, 14); X2 (B, C, D, A, M[ 4], 0xe7d3fbc8, 20); X2 (A, B, C, D, M[ 9], 0x21e1cde6, 5); @@ -393,7 +406,7 @@ void ReMD5::processChunk(const uint8_t block[64]){ X3 (A, B, C, D, M[13], 0x289b7ec6, 4); X3 (D, A, B, C, M[ 0], 0xeaa127fa, 11); X3 (C, D, A, B, M[ 3], 0xd4ef3085, 16); - X3 (B, C, D, A, M[ 6], 0x4881d05, 23); + X3 (B, C, D, A, M[ 6], 0x04881d05, 23); X3 (A, B, C, D, M[ 9], 0xd9d4d039, 4); X3 (D, A, B, C, M[12], 0xe6db99e5, 11); X3 (C, D, A, B, M[15], 0x1fa27cf8, 16); @@ -416,63 +429,63 @@ void ReMD5::processChunk(const uint8_t block[64]){ X4 (D, A, B, C, M[11], 0xbd3af235, 10); X4 (C, D, A, B, M[ 2], 0x2ad7d2bb, 15); X4 (B, C, D, A, M[ 9], 0xeb86d391, 21); - - //Add this chunk's hash to result so far: - m_a0 += A; - m_b0 += B; - m_c0 += C; - m_d0 += D; -} -/** - * Prepares the instance for a new checksum. - */ -void ReMD5::reset(){ - m_a0 = 0x67452301; - m_b0 = 0xefcdab89; - m_c0 = 0x98badcfe; - m_d0 = 0x10325476; - memset(m_digest, 0, sizeof m_digest); - memset(m_waiting, 0, sizeof m_waiting); - m_lengthWaiting = 0; - m_length = 0; - m_hexDigest.setLength(0); - m_finalized = false; -} -/** - * Processes a 64 byte block. - * - * @param block a block which should be added to the digest - * @param blockLength the length of block - */ -void ReMD5::update(const uint8_t* block, int blockLength){ - if (blockLength == -1) - blockLength = strlen((const char*) block); - // process the "waiting" input (incomplete chunk): - m_length += blockLength; - if (m_lengthWaiting > 0){ - int rest = 64 - m_lengthWaiting; - if (rest > blockLength) - rest = blockLength; - memcpy(m_waiting + m_lengthWaiting, block, rest); - blockLength -= rest; - block += rest; - m_lengthWaiting += rest; - // Is the chunk complete? - if (m_lengthWaiting == 64){ - processChunk(m_waiting); - m_lengthWaiting = 0; - } - } - // process full 512 bit chunks (64 byte blocks): - for (int ix = blockLength / 64; ix > 0; ix--){ - processChunk(block); - block += 64; - } - blockLength %= 64; - if (blockLength != 0){ - assert(m_lengthWaiting == 0); - memcpy(m_waiting, block, blockLength); - m_lengthWaiting = blockLength; - } -} - + + //Add this chunk's hash to result so far: + m_a0 += A; + m_b0 += B; + m_c0 += C; + m_d0 += D; +} +/** + * Prepares the instance for a new checksum. + */ +void ReMD5::reset(){ + m_a0 = 0x67452301; + m_b0 = 0xefcdab89; + m_c0 = 0x98badcfe; + m_d0 = 0x10325476; + memset(m_digest, 0, sizeof m_digest); + memset(m_waiting, 0, sizeof m_waiting); + m_lengthWaiting = 0; + m_length = 0; + m_hexDigest.setLength(0); + m_finalized = false; +} +/** + * Processes a 64 byte block. + * + * @param block a block which should be added to the digest + * @param blockLength the length of block + */ +void ReMD5::update(const uint8_t* block, int blockLength){ + if (blockLength == -1) + blockLength = strlen((const char*) block); + // process the "waiting" input (incomplete chunk): + m_length += blockLength; + if (m_lengthWaiting > 0){ + int rest = 64 - m_lengthWaiting; + if (rest > blockLength) + rest = blockLength; + memcpy(m_waiting + m_lengthWaiting, block, rest); + blockLength -= rest; + block += rest; + m_lengthWaiting += rest; + // Is the chunk complete? + if (m_lengthWaiting == 64){ + processChunk(m_waiting); + m_lengthWaiting = 0; + } + } + // process full 512 bit chunks (64 byte blocks): + for (int ix = blockLength / 64; ix > 0; ix--){ + processChunk(block); + block += 64; + } + blockLength %= 64; + if (blockLength != 0){ + assert(m_lengthWaiting == 0); + memcpy(m_waiting, block, blockLength); + m_lengthWaiting = blockLength; + } +} + -- 2.39.5