From 8c477fee84c895a22dfa17ffb926c4f1136c36cf Mon Sep 17 00:00:00 2001 From: hama Date: Mon, 12 Jan 2015 00:54:41 +0100 Subject: [PATCH] Refactoring: ReSeqList is now efficient (sorting). --- base/ReByteBuffer.cpp | 4 +- base/ReHashList.cpp | 157 +++++------ base/ReHashList.hpp | 23 +- base/ReProgramArgs.cpp | 5 +- base/ReProgramArgs.hpp | 4 +- base/ReSeqList.cpp | 587 ++++++++++++++++++++++++++++++++++++--- base/ReSeqList.hpp | 89 ++++-- base/ReStringList.cpp | 6 +- base/ReStringList.hpp | 2 +- base/ReTestUnit.cpp | 15 + base/ReTestUnit.hpp | 2 + base/baselocations.hpp | 1 + cunit/cuReCString.cpp | 4 - cunit/cuReSeqList.cpp | 121 +++++++- cunit/cuReStringList.cpp | 2 - cunit/cuReTraverser.cpp | 2 +- cunit/testall.cpp | 8 +- os/ReDirTools.cpp | 76 +++-- os/ReDirTools.hpp | 29 +- 19 files changed, 921 insertions(+), 216 deletions(-) diff --git a/base/ReByteBuffer.cpp b/base/ReByteBuffer.cpp index 3525de7..7dd45b2 100644 --- a/base/ReByteBuffer.cpp +++ b/base/ReByteBuffer.cpp @@ -278,13 +278,13 @@ int ReByteBuffer::firstDifference(const Byte* source, size_t length, int start, if (start < 0) rc = 0; else if (start < (int) m_length){ - if (length == -1) + if (length == (size_t) -1) length = strlen(source); int count = length > m_length - start ? m_length - start : length; const Byte* ptr = m_buffer + start; for (int ix = 0; rc < 0 && ix < count; ix++){ if ((! ignoreCase && *ptr++ != *source++) - || ignoreCase && (tolower(*ptr++) != tolower(*source++))) + || (ignoreCase && (tolower(*ptr++) != tolower(*source++)))) rc = start + ix; } if (rc < 0 && length > m_length - start) diff --git a/base/ReHashList.cpp b/base/ReHashList.cpp index 61dc9db..4f5aba1 100644 --- a/base/ReHashList.cpp +++ b/base/ReHashList.cpp @@ -9,73 +9,37 @@ /** Constructor. */ -ReHashList::ReHashList() - : +ReHashList::ReHashList(int keyTagSize, int contentLengthSize, int keyLengthSize) : m_keys(), m_values() { + m_keys.setSizes(keyTagSize, keyLengthSize); + m_keys.setSizes(0, contentLengthSize); } /** Destructor */ ReHashList::~ReHashList() { } -/** @brief Puts a key value pair into the hashlist. - * - * @param key The key byte sequence. - * @param keyLength The length of key. - * @param value The value byte sequence. - * @param valueLength The length of value. +/** @brief Deletes all entries in the list. */ -void ReHashList::put(const Byte* key, size_t keyLength, - const Byte* value, size_t valueLength){ - if (keyLength == (size_t) -1) - keyLength = strlen(key) + 1; - if (valueLength == (size_t) -1) - valueLength = strlen(value) + 1; - int ix = find(key, keyLength); - bool storeValue = false; - if (ix < 0){ - ReSeqList::Index pos = m_values.length(); - storeValue = true; - m_keys.add(-1, key, keyLength, pos); - } else { - Sequence* seq = m_keys.getInfo(ix); - // m_tag contains the index into m_values. - Byte* ptr = m_values.buffer() + seq->m_tag; - size_t valLength = * (size_t *) ptr; - // Can we take the old storage? - if (valLength >= valueLength){ - // Yes, it is enough space: - * (size_t *) ptr = valueLength; - ptr += sizeof (size_t); - memcpy(ptr, value, valueLength); - } else { - storeValue = true; - seq->m_tag = m_values.length(); - } - } - if (storeValue){ - m_values.append((Byte*) &valueLength, sizeof valueLength); - m_values.append(value, valueLength); - } +void ReHashList::clear(){ + m_keys.clear(); + m_values.clear(); } -/** @brief Puts a key value pair into the hashlist. + +/** @brief Returns the index of key. * - * @param key The key. This is a C string. - * @param value The value. This is a C string. - */ -void ReHashList::put(const char* key, const char* value){ - put(key, -1, value, -1); -} -/** @brief Puts a key value pair into the hashlist. + * @param key The byte sequence of the key. + * @param length The length of key. * - * @param key The key. - * @param value The value. + * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array. */ -void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){ - put(key.str(), key.length(), value.str(), value.length()); +int ReHashList::find(const Byte* key, size_t length) const{ + int rc = m_keys.find(key, length, NULL); + return rc; } + /** @brief Returns the value of a key value pair. * * @param key The key byte sequence. @@ -87,19 +51,21 @@ void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){ bool ReHashList::get(const Byte* key, size_t keyLength, ReByteBuffer& value) const{ if (keyLength == (size_t) -1) - keyLength = strlen(key) + 1; + keyLength = strlen(key); int ix = find(key, keyLength); bool rc = ix >= 0; +#if 0 if (rc){ ReSeqList::Sequence* seq = m_keys.getInfo(ix); // m_tag contains the index into m_values: - const Byte* ptr = m_values.str() + seq->m_tag; + const Byte* ptr = reinterpret_cast(m_values.str() + seq->m_tag); // m_values contains : size_t valLength = * (size_t*) ptr; ptr += sizeof (size_t); value.set(ptr, valLength); } +#endif return rc; } /** @brief Returns the value of a key value pair. @@ -114,12 +80,6 @@ bool ReHashList::get(const ReByteBuffer& key, bool rc = get(key.str(), key.length(), value); return rc; } -/** @brief Deletes all entries in the list. - */ -void ReHashList::clear(){ - m_keys.clear(); - m_values.setLength(0); -} /** @brief Implements an iterator. * @@ -131,7 +91,7 @@ void ReHashList::clear(){ * * @param true: An item was found. false: No more items. */ -bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value){ +bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value) const{ bool rc = position < m_keys.count(); if (rc){ ReSeqList::Sequence* seq = m_keys.getInfo(position++); @@ -141,37 +101,74 @@ bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value){ memcpy(key->buffer(), ptr, seq->m_length); } if (value != NULL){ - const Byte* ptr = m_values.str() + seq->m_tag; +#if 0 + const Byte* ptr = reinterpret_cast(m_values.str() + seq->m_tag); size_t length = * (size_t*) ptr; ptr += sizeof (size_t); value->setLength(length); memcpy(value->buffer(), ptr, length); +#endif } } return rc; } - -/** @brief Returns the index of key. - * - * @param key The byte sequence of the key. - * @param length The length of key. +/** @brief Puts a key value pair into the hashlist. * - * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array. + * @param key The key byte sequence. + * @param keyLength The length of key. + * @param value The value byte sequence. + * @param valueLength The length of value. */ -int ReHashList::find(const Byte* key, size_t length) const{ - if (length == (size_t) -1) - length = strlen(key) + 1; - int rc = -1; - int count = m_keys.count(); - for (int ii = 0; rc < 0 && ii < count; ii++){ - ReSeqList::Sequence* seq = m_keys.getInfo(ii); - if (seq->m_length == length){ - const Byte* ptr = m_keys.getContent() + seq->m_index; - if (_memcmp(ptr, key, length) == 0) - rc = ii; +void ReHashList::put(const Byte* key, size_t keyLength, + const Byte* value, size_t valueLength){ +#if 0 + if (keyLength == (size_t) -1) + keyLength = strlen(key); + if (valueLength == (size_t) -1) + valueLength = strlen(value); + int ix = find(key, keyLength); + bool storeValue = false; + if (ix < 0){ + ReSeqList::Index pos = m_values.length(); + storeValue = true; + m_keys.add(-1, key, keyLength, pos); + } else { + Sequence* seq = m_keys.getInfo(ix); + // m_tag contains the index into m_values. + Byte* ptr = m_values.buffer() + seq->m_tag; + size_t valLength = * (size_t *) ptr; + // Can we take the old storage? + if (valLength >= valueLength){ + // Yes, it is enough space: + * (size_t *) ptr = valueLength; + ptr += sizeof (size_t); + memcpy(ptr, value, valueLength); + } else { + storeValue = true; + seq->m_tag = m_values.length(); } } - return rc; + if (storeValue){ + m_values.append((Byte*) &valueLength, sizeof valueLength); + m_values.append(value, valueLength); + } +#endif +} +/** @brief Puts a key value pair into the hashlist. + * + * @param key The key. This is a C string. + * @param value The value. This is a C string. + */ +void ReHashList::put(const char* key, const char* value){ + put(key, -1, value, -1); +} +/** @brief Puts a key value pair into the hashlist. + * + * @param key The key. + * @param value The value. + */ +void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){ + put(key.str(), key.length(), value.str(), value.length()); } diff --git a/base/ReHashList.hpp b/base/ReHashList.hpp index d91d79b..88371d5 100644 --- a/base/ReHashList.hpp +++ b/base/ReHashList.hpp @@ -8,23 +8,24 @@ #ifndef REHASHLIST_H_ #define REHASHLIST_H_ -/** @brief A (very) simple associative array. - *

An instance stores key value pairs.

- *

Keys and values are byte sequences. This includes c strings. - *

+/** @brief A simple associative array. + * + * An instance stores key value pairs. + * + * Keys and values are byte sequences. This includes c strings. */ class ReHashList { public: typedef char Byte; typedef ReSeqList::Sequence Sequence; public: - ReHashList(); + ReHashList(int keyTagSize = 1, int contentLengthSize = 1, int keyLengthSize = 1); virtual ~ReHashList(); public: void clear(); bool get(const Byte* key, size_t keyLength, ReByteBuffer& value) const; bool get(const ReByteBuffer& key, ReByteBuffer& value) const; - bool next(size_t& position, ReByteBuffer* key, ReByteBuffer* val); + bool next(size_t& position, ReByteBuffer* key, ReByteBuffer* val) const; void put(const Byte* key, size_t keyLength, const Byte* value, size_t valueLength); void put(const char* key, const char* value); void put(const ReByteBuffer& key, const ReByteBuffer& value); @@ -32,11 +33,11 @@ protected: int find(const Byte* key, size_t length) const; protected: - //@ Containing an array of keys. - ReSeqList m_keys; - //@ Containing the values. The tag of m_key is the index - //@of the start position in m_values. - ReByteBuffer m_values; + //@ Containing an array of keys. + ReSeqList m_keys; + //@ Containing the values. The tag of m_key is the index + //@ of the start position in m_values. + ReSeqList m_values; }; #endif /* REHASHLIST_H_ */ diff --git a/base/ReProgramArgs.cpp b/base/ReProgramArgs.cpp index d40573b..57d15bc 100644 --- a/base/ReProgramArgs.cpp +++ b/base/ReProgramArgs.cpp @@ -539,7 +539,8 @@ void ReProgramArgs::setLastError(const char* message){ m_lastError.set(message, -1); } -void ReProgramArgs::help(const char* message, bool issueLastError, ReStringList& lines){ +void ReProgramArgs::help(const char* message, bool issueLastError, + ReStringList& lines) const{ lines.append(m_usage); lines.append(""); @@ -607,7 +608,7 @@ void ReProgramArgs::help(const char* message, bool issueLastError, ReStringList& } } -void ReProgramArgs::help(const char* message, bool issueLastError, FILE* stream){ +void ReProgramArgs::help(const char* message, bool issueLastError, FILE* stream) const{ ReStringList lines; help(message, issueLastError, lines); for(size_t ii = 0; ii < lines.count(); ii++){ diff --git a/base/ReProgramArgs.hpp b/base/ReProgramArgs.hpp index d7c22b7..dbb1b10 100644 --- a/base/ReProgramArgs.hpp +++ b/base/ReProgramArgs.hpp @@ -71,8 +71,8 @@ public: void init(int argc, const char* argv[]); void setLastError(const char* message); - void help(const char* message, bool issueLastError, ReStringList& lines); - void help(const char* message, bool issueLastError, FILE* stream); + void help(const char* message, bool issueLastError, ReStringList& lines) const; + void help(const char* message, bool issueLastError, FILE* stream) const; void setUsage(const char* usage[]); private: diff --git a/base/ReSeqList.cpp b/base/ReSeqList.cpp index 4e8a019..c10e5bf 100644 --- a/base/ReSeqList.cpp +++ b/base/ReSeqList.cpp @@ -7,6 +7,41 @@ #include "base/rebase.hpp" +enum RELOC_SEQLIST { + LC_SET_SIZES_1 = LC_SEQLIST + 1, // 50201 + LC_SET_SIZES_2, // 50202 + LC_SET_SIZES_3, // 50203 +}; +/** + * Implementation: + * ReSeqList is a storage for byte sequences. + * Each stored element is accessible by its index. + * + * A Sequence is a tuple (m_index, m_length, m_tag). + *
    + *
  • m_index is the index in a content buffer
  • + *
  • m_length is the length of byte sequence in the content
  • + *
  • m_tag is a additional info used by the list user, not in the container + * If the lengths of the byte sequences are equal there is no need for saving + * the length. In this case the length is stored one time (in + * m_commonSize). + * The tag can be omitted if there is no need. This is controlled by + * m_sequSize. + * + * ReSeqList contains a content buffer (m_content) and a table of + * content (m_list). + *
      + *
    • m_content stores a sequences of byte sequences. No separator is used. + * The start and the length of the byte sequences are stored in m_list.
    • + *
    • m_list is a potentially sorted array of Sequence blocks.
    • + *
    + * Each new element is stored at the end of m_content, even if is a replacement + * for a existing element. Also deletion of an element does not free the space + * in m_content. But there is a method pack(), which removes the gaps. + * The lost space is available in m_lost. + */ + /** @brief Constructor. * * @param deltaList If there is not enough space in the list (array) @@ -18,7 +53,15 @@ ReSeqList::ReSeqList(size_t deltaList, int deltaBuffer) : m_content(deltaBuffer), m_list(deltaList), - m_lost(0) + m_lost(0), + m_entrySize(sizeof(Index) + 1 + 8), + m_commonSize(INDIVIDUAL_SIZE), + m_sizeOfTag(sizeof (void*)), + m_sizeOfLength(1), + m_offsetOfTag(sizeof(Index) + 1), + m_offsetOfLength(sizeof(Index)), + m_sorted(false), + m_ignoreCase(false) { } /** @brief Destructor. @@ -33,9 +76,16 @@ ReSeqList::ReSeqList(const ReSeqList& source) : m_content(source.m_content), m_list(source.m_list), - m_lost(source.m_lost) + m_lost(source.m_lost), + m_entrySize(source.m_entrySize), + m_commonSize(source.m_commonSize), + m_sizeOfTag(source.m_sizeOfTag), + m_sizeOfLength(source.m_sizeOfLength), + m_offsetOfTag(source.m_offsetOfTag), + m_offsetOfLength(source.m_offsetOfLength), + m_sorted(source.m_sorted), + m_ignoreCase(false) { - } /** @brief Assignment operator. * @@ -47,9 +97,17 @@ ReSeqList& ReSeqList::operator = (const ReSeqList& source){ m_content = source.m_content; m_list = source.m_list; m_lost = source.m_lost; + m_entrySize = source.m_entrySize; + m_commonSize = source.m_commonSize; + m_sizeOfTag = source.m_sizeOfTag; + m_sizeOfLength = source.m_sizeOfLength; + m_offsetOfTag = source.m_offsetOfTag; + m_offsetOfLength = source.m_offsetOfLength; + m_sorted = source.m_sorted; + m_ignoreCase = source.m_ignoreCase; return *this; } -/** @brief Adds a byte sequence to the list. +/** @brief Adds an element to the list. * * @param index The index of the new entry. If greater than the list length it will be appended. * @param source The pointer of the byte sequence to insert. @@ -59,18 +117,150 @@ ReSeqList& ReSeqList::operator = (const ReSeqList& source){ void ReSeqList::add(Index index, const Byte* source, size_t sourceLength, Tag tag){ Sequence seq; if (sourceLength == (size_t) -1) - sourceLength = strlen(source) + 1; - seq.m_index = m_content.length(); - seq.m_length = sourceLength; - seq.m_tag = tag; + sourceLength = strlen(source); + setSequence(&seq, m_content.length(), sourceLength, tag); m_content.append(source, sourceLength); - if (index >= count()){ - m_list.append((Byte*) &seq, sizeof seq); - }else{ - m_list.insert(index * sizeof(Sequence), (Byte*) &seq, sizeof seq); + if (m_sorted){ + //@ToDo + assert(false); + } else { + if (index >= count()){ + m_list.append((Byte*) &seq, m_entrySize); + }else{ + m_list.insert(index * m_entrySize, (Byte*) &seq, m_entrySize); + } + } +} + +/** + * @brief Searches the an element with binary search. + * + * @param toFind the value which should be found + * @param length -1 or the length of toFind + * @param index OUT: the index usable for insert: + * index is the smallest value with content[index] >= toFind + * @param tag OUT: the tag of the element (only if it was found).
    + * May be NULL + * @return -1: the key has been found. + */ +bool ReSeqList::binarySearch(const Byte* toFind, int length, Index& index, + Tag* tag) const +{ + assert(m_sorted); + if (length < 0) + length = strlen(toFind); + Index rc = (Index) -1; + int lbound = 0; + int theCount = count(); + int ubound = theCount; + // binary search over the sorted vector: + while(lbound <= ubound){ + int half = (ubound + lbound) / 2; + const Sequence* seq = reinterpret_cast( + m_list.str() + half * m_entrySize); + int currentLength = getLength(seq); + int minLength = currentLength < length ? currentLength : length; + const char* current = m_content.str() + seq->m_index; + int compareRc = m_ignoreCase + ? _memicmp(toFind, current, minLength) + : _memcmp(toFind, current, minLength); + if (compareRc == 0 && currentLength != length) + compareRc = currentLength > length ? -1 : 1; + if (compareRc < 0) + ubound = half - 1; + else if (compareRc > 0) + lbound = half + 1; + else { + rc = true; + index = half; + break; + } + } + if (! rc) + index = ubound; + return rc; +} + +/** @brief Deletes all entries in the list. + */ +void ReSeqList::clear(){ + m_content.setLength(0); + m_list.setLength(0); +} + +/** + * Compares two elements. + * + * @param index1 the index of the first element + * @param index2 the index of the second element + * @return 0: element1 == element2
    + * < 0: element1 < element2
    + * > 0: element1 > element2 + */ +int ReSeqList::compare(Index index1, Index index2){ + Sequence* seq1 = getInfo(index1); + Sequence* seq2 = getInfo(index2); + size_t length1 = getLength(seq1); + size_t length2 = getLength(seq2); + int minLength = length1 < length2 ? length1 : length2; + int rc = m_ignoreCase + ? _memicmp(m_content.str() + seq1->m_index, + m_content.str() + seq2->m_index, minLength) + : _memcmp(m_content.str() + seq1->m_index, + m_content.str() + seq2->m_index, minLength); + if (rc == 0 && length1 != length2) + rc = length1 < length2 ? -1 : 1; + return rc; +} +/** + * Writes the content to a stream. + * + * @param fp target file pointer + */ +void ReSeqList::dump(FILE* fp) const{ + ReByteBuffer buffer; + Tag tag; + for (int ix = 0; ix < count(); ix++){ + get(ix, buffer, &tag); + fprintf(fp, "%d: (%lld) [%d] %s\n", ix, (int64_t) tag, buffer.length(), + buffer.str()); } } -/** @brief Returns a byte sequence from the list. + +/** @brief Returns the index of a stored byte sequence. + * + * @param toFind the byte sequence to find + * @param length -1 or the length of toFind. + * + * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array. + */ +ReSeqList::Index ReSeqList::find(const Byte* toFind, size_t length, + Tag* tag) const{ + if (length == (size_t) -1) + length = strlen(toFind); + int rc = -1; + int theCount = count(); + for (int ix = 0; ix < theCount; ix++){ + const ReSeqList::Sequence* seq = getInfo(ix); + Tag currentTag; + size_t currentLength = getLengthAndTag(seq, currentTag); + if (currentLength == length){ + const Byte* ptr = reinterpret_cast(m_content.str() + + seq->m_index); + int comparison = m_ignoreCase ? _memicmp(ptr, toFind, length) + : _memcmp(ptr, toFind, length); + if (comparison == 0){ + rc = ix; + if (tag != NULL) + *tag = currentTag; + break; + } + } + } + return rc; +} + +/** @brief Returns an element from the list. * * @param index The index of the sequence in the list. * @param value Out: The stored sequence will be copied here. @@ -82,14 +272,135 @@ void ReSeqList::add(Index index, const Byte* source, size_t sourceLength, Tag ta bool ReSeqList::get(Index index, ReByteBuffer& value, Tag* tag) const{ bool rc = false; if (index < count()){ - Sequence* seq = ((Sequence*)m_list.buffer()) + index; - value.set(m_content.buffer() + seq->m_index, seq->m_length); - if (tag != NULL) - *tag = seq->m_tag; + const Sequence* seq = getInfo(index); + size_t length = tag == NULL ? getLength(seq) : getLengthAndTag(seq, *tag); + value.set(m_content.str() + seq->m_index, length); rc = true; } return rc; } +/** @brief Returns the byte sequence length of the element. + * + * @return the length of the element described in the Sequence seq + */ +size_t ReSeqList::getLength(const Sequence* seq) const{ + size_t rc; + if (m_commonSize != INDIVIDUAL_SIZE) + rc = m_commonSize; + else { + const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength; + switch (m_sizeOfLength){ + case 1: + rc = *ptr; + break; + case 2: + rc = ptr[0] + (ptr[1] << 8); + break; + case 3: + rc = ptr[0] + (ptr[1] << 8) + (ptr[1] << 16); + break; + case 4: + rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24); + break; + case 5: + rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24) + + ((int64_t)ptr[4] << 32); + break; + default: + assert(false); + rc = 0; + break; + } + } + return rc; +} +/** @brief Returns the byte sequence length and the tag of an element. + * + * @param tag OUT: the tag of the element. + * @return the byte sequence length of the element + */ +size_t ReSeqList::getLengthAndTag(const Sequence* seq, Tag& tag) const{ + size_t rc; + if (m_commonSize != INDIVIDUAL_SIZE) + rc = m_commonSize; + else { + const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength; + switch (m_sizeOfLength){ + case 1: + rc = *ptr; + break; + case 2: + rc = ptr[0] + (ptr[1] << 8); + break; + case 3: + rc = ptr[0] + (ptr[1] << 8) + (ptr[1] << 16); + break; + case 4: + rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24); + break; + case 5: + rc = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24) + | ((uint64_t)ptr[4] << 32); + break; + default: + assert(false); + rc = 0; + break; + } + } + const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfTag; + switch (m_sizeOfTag){ + case 0: + tag = 0; + break; + case 1: + tag = *ptr; + break; + case 2: + tag = ptr[0] + (ptr[1] << 8); + break; + case 3: + tag = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16); + break; + case 4: + tag = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24); + break; + case 5: + tag = Tag(ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (uint64_t(ptr[3]) << 24) + | (uint64_t(ptr[4]) << 32)); + break; + case 8: + tag = Tag(ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (uint64_t(ptr[3]) << 24) + | (uint64_t(ptr[4]) << 32) | (uint64_t(ptr[5]) << 40) + | (uint64_t(ptr[6]) << 48) | (uint64_t(ptr[7]) << 56)); + break; + default: + assert(false); + tag = 0; + break; + } + return rc; +} + +/** @brief Removes an element given by its index. + * + * @param index The index of the entry to remove. + */ +void ReSeqList::remove(Index index){ + if (index <= count()){ + Sequence* seq = getInfo(index); + size_t currentLength = getLength(seq); + // Is this the last entry in m_content? + if (seq->m_index + currentLength >= m_content.length()){ + // We can free the content: + m_content.setLength(seq->m_index); + } else { + m_lost += seq->m_length; + } + // Remove the entry from the list: + m_list.remove(index * m_entrySize, m_entrySize); + } +} /** @brief Replaces the byte sequence in the list. * * @param index The index of the sequence to replace. @@ -97,50 +408,244 @@ bool ReSeqList::get(Index index, ReByteBuffer& value, Tag* tag) const{ * @param sourceLength The length of the new value. * @param tag An additional info associated to the source. */ -void ReSeqList::set(Index index, const Byte* source, size_t sourceLength, Tag tag){ +void ReSeqList::set(Index index, const Byte* source, + size_t sourceLength, Tag tag){ if (index >= count()) add(index, source, sourceLength, tag); else { if (sourceLength == (size_t) -1) sourceLength = strlen(source) + 1; Sequence* seq = getInfo(index); - seq->m_tag = tag; - if (seq->m_length >= sourceLength){ + size_t currentLength = getLength(seq); + size_t indexContent; + if (currentLength >= sourceLength){ // Use the existing space: - memcpy(m_content.buffer() + seq->m_index, source, sourceLength); - m_lost += seq->m_length - sourceLength; + indexContent = seq->m_index; + memcpy(m_content.buffer() + indexContent, source, sourceLength); + m_lost += currentLength - sourceLength; } else { // New space must be allocated: - m_lost += seq->m_length; - seq->m_index = m_content.length(); + m_lost += currentLength; + indexContent = m_content.length(); m_content.append(source, sourceLength); } + setSequence(seq, indexContent, sourceLength, tag); } } -/** @brief Removes an element given by its index. +/** @brief Sets the Sequence of an element. * - * @param index The index of the entry to remove. + * @param seq the target sequence + * @param index the index in m_content + * @param length the length of the content + * @param tag the tag of the element */ -void ReSeqList::remove(Index index){ - if (index <= count()){ - Sequence* seq = getInfo(index); - // Is this the last entry in m_content? - if (seq->m_index + seq->m_length >= m_content.length()){ - // We can free the content: - m_content.setLength(seq->m_index); - } else { - m_lost += seq->m_length; +void ReSeqList::setSequence(Sequence* seq, Index index, size_t length, Tag tag){ + seq->m_index = index; + if (m_commonSize == INDIVIDUAL_SIZE){ + uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength; + switch (m_sizeOfLength){ + case 1: + ptr[0] = length & 0xff; + break; + case 2: + ptr[0] = length & 0xff; + ptr[1] = (length >> 8) & 0xff; + break; + case 3: + ptr[0] = length & 0xff; + ptr[1] = (length >> 8) & 0xff; + ptr[2] = (length >> 16) & 0xff; + break; + case 4: + ptr[0] = length & 0xff; + ptr[1] = (length >> 8) & 0xff; + ptr[2] = (length >> 16) & 0xff; + ptr[3] = (length >> 24) & 0xff; + break; + case 5: + ptr[0] = length & 0xff; + ptr[1] = (length >> 8) & 0xff; + ptr[2] = (length >> 16) & 0xff; + ptr[3] = (length >> 24) & 0xff; + ptr[4] = (length >> 32) & 0xff; + break; + default: + assert(false); + break; + } + } + if (m_sizeOfTag > 0){ + uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfTag; + switch (m_sizeOfTag){ + case 1: + ptr[0] = tag & 0xff; + break; + case 2: + ptr[0] = length & 0xff; + ptr[1] = (tag >> 8) & 0xff; + break; + case 3: + ptr[0] = tag & 0xff; + ptr[1] = (tag >> 8) & 0xff; + ptr[2] = (tag >> 16) & 0xff; + break; + case 4: + ptr[0] = tag & 0xff; + ptr[1] = (tag >> 8) & 0xff; + ptr[2] = (tag >> 16) & 0xff; + ptr[3] = (tag >> 24) & 0xff; + break; + case 5: + ptr[0] = tag & 0xff; + ptr[1] = (tag >> 8) & 0xff; + ptr[2] = (tag >> 16) & 0xff; + ptr[3] = (tag >> 24) & 0xff; + ptr[4] = (tag >> 32) & 0xff; + break; + case 8: + ptr[0] = tag & 0xff; + ptr[1] = (tag >> 8) & 0xff; + ptr[2] = (tag >> 16) & 0xff; + ptr[3] = (tag >> 24) & 0xff; + ptr[4] = (tag >> 32) & 0xff; + ptr[5] = (tag >> 40) & 0xff; + ptr[6] = (tag >> 48) & 0xff; + ptr[7] = (tag >> 56) & 0xff; + break; + default: + assert(false); + break; } - // Remove the entry from the list: - m_list.remove(index * sizeof (Sequence), sizeof (Sequence)); } } -/** @brief Deletes all entries in the list. +/** @brief Sets the switch for case sensitivity. + * + * @param onNotOff true: the comparison will be case insensitive */ -void ReSeqList::clear(){ - m_content.setLength(0); - m_list.setLength(0); +void ReSeqList::setIgnoreCase(bool onNotOff){ + if (m_ignoreCase != onNotOff){ + m_ignoreCase = onNotOff; + if (m_sorted) + sort(); + } } +/** @brief Sets the switch for sorting. + * + * @param onNotOff true: the list will be sorted + */ +void ReSeqList::setSorted(bool onNotOff){ + if (m_sorted != onNotOff){ + m_sorted = onNotOff; + if (m_sorted) + sort(); + } +} + +/** + * Sets the length of the stored items tag and length (in Sequence). + * + * @param sizeOfTag 0: no tag stored. + * Otherwise: length in byte: 1,2,3,4,5 or 8 + * @param sizeOfLength 0: no length stored (constant length, stored in + * m_commonSize). + * Otherwise: length in byte: 1, 2, 3, 4 or 5 + * @param constantSize 0 or the size of the sequence if the length is not + * indiviually stored.
    + * If > 0 sizeOfLengthmust be 0! + */ +void ReSeqList::setSizes(int sizeOfTag, int sizeOfLength, int constantSize){ + switch(m_sizeOfLength = sizeOfLength){ + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + break; + default: + globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_2, + i18n("Invalid length length: $1 (instead of 0,1,2,4,5)")).arg(sizeOfTag).end(); + m_sizeOfLength = 8; + break; + } + switch(m_sizeOfTag = sizeOfTag){ + case 0: + case 1: + case 2: + case 3: + case 4: + case 8: + break; + default: + globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_1, + i18n("Invalid tag length: $1 (instead of 0,1,2,3,4,8)")).arg(sizeOfTag).end(); + m_sizeOfTag = 8; + break; + } + m_offsetOfLength = sizeof(Index); + m_offsetOfTag = sizeof(Index) + m_sizeOfLength; + if (sizeOfLength > 0 && constantSize > 0){ + globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_1, + i18n("collision of sizeOfLength $1 and constantSize $2")) + .arg(sizeOfLength).arg(constantSize).end(); + constantSize = 0; + } + m_commonSize = constantSize; +} + +/** @brief Sorts the list. + * Note: the comparison is controlled by m_ignoreCase. + */ +void ReSeqList::sort(){ + // Build the heap in array so that largest value is at the root: + int theCount = (int) count(); + for (int start = (theCount - 2) / 2; start >= 0; start--) { + // (shift down the node at index 'start' to the proper place such + // that all nodes below the start index are in heap order) + shiftDown(start, theCount); + } + // The following loop maintains the invariants that array[0:end] is a heap + // and every element beyond end is greater than everything before it + // (so array[end:count] is in sorted order) + for (int end = theCount - 1; end > 0; end--) { + // swap [end] with [0]: + Sequence seq; + Byte* ptrEnd = m_list.buffer() + end * m_entrySize; + memcpy(&seq, ptrEnd, m_entrySize); + memcpy(ptrEnd, m_list.buffer(), m_entrySize); + memcpy(m_list.buffer(), &seq, m_entrySize); + shiftDown(0, end); + } +} +/** + * Correct the order in the heap. + * + * @param start the lower bound of the interval to inspect + * @param endEnd the upper bound of the interval to inspect + */ +void ReSeqList::shiftDown(int start, int end){ + int root = start; + Sequence seq; + // while the root has at least one child: + while (root * 2 + 1 < end ) { + // left child: + int child = 2 * root + 1; + if (child + 1 < end && compare(child, child + 1) < 0) + child++; + if (compare(root, child) >= 0) + break; + else + { + // swap [child] with [root] + Byte* ptrRoot = m_list.buffer() + root * m_entrySize; + Byte* ptrChild = m_list.buffer() + child * m_entrySize; + memcpy(&seq, ptrRoot, m_entrySize); + memcpy(ptrRoot, ptrChild, m_entrySize); + memcpy(ptrChild, &seq, m_entrySize); + root = child; + } + } +} diff --git a/base/ReSeqList.hpp b/base/ReSeqList.hpp index afc3351..3cdbd8f 100644 --- a/base/ReSeqList.hpp +++ b/base/ReSeqList.hpp @@ -8,18 +8,16 @@ #ifndef RESEQLIST_H_ #define RESEQLIST_H_ -/** @brief This class implements a dynamic (selfgrowing) array of byte sequences. - *

    A byte sequence is an array of byte. +#define INDIVIDUAL_SIZE (size_t(-1)) +/** @brief This class implements a dynamic (self growing) array of elements. + * + * An element is a tuple of a byte sequences and a tag. + * A tag is an integer value which is interpreted only by the user + * of the list. The list knows nothing abbout it. + * + * A byte sequence is an array of byte. * The byte sequences may have different lengths. * This implies the handling of C string arrays too. - *

    There is room for storing a so called tag with any sequence. - * This can be used for any purpose. - *

    - *

    This is a very simple implementation: use it when: - *

    • The needed size is known nearly.
    • - *
    • The array elements increment rarely their length.
    • - *
    - *

    */ class ReSeqList { public: @@ -28,7 +26,7 @@ public: typedef int64_t Tag; typedef struct { Index m_index; - size_t m_length; + uint64_t m_length; Tag m_tag; } Sequence; public: @@ -37,17 +35,39 @@ public: ReSeqList(const ReSeqList& source); ReSeqList& operator = (const ReSeqList& source); public: - void clear(); void add(Index index, const Byte* source, size_t sourceLength, Tag tag = 0); - bool get(Index index, ReByteBuffer& value, Tag* tag = NULL) const; - void remove(Index index); - void set(Index index, const Byte* source, size_t sourceLength, Tag tag); + bool binarySearch(const Byte* toFind, int length, Index& index, + Tag* tag = NULL) const; + void clear(); + int compare(Index index1, Index index2); /** @brief Returns the count of defined entries in the list. * @return The number of defined entries in the list (array). */ inline Index count() const { - return m_list.length() / sizeof (Sequence); + return m_list.length() / m_entrySize; } + void dump(FILE* fp) const; + Index find(const Byte* toFind, size_t length, Tag* tag = NULL) const; + bool get(Index index, ReByteBuffer& value, Tag* tag = NULL) const; + /** Returns whether the list is sorted automatically. + * Note: comparison is controlled by m_ignoreCase. + * @return true the byte sequences will be sorted + */ + bool isSorted() const { + return m_sorted; + } + /** Returns whether the comparisons inside the list are case insensitive. + * @return true the comparisons inside the list are case insensitive + */ + bool ignoreCase() const { + return m_ignoreCase; + } + void remove(Index index); + void set(Index index, const Byte* source, size_t sourceLength, Tag tag); + void setSizes(int sizeOfTag, int sizeOfLength, int constantLength = 0); + void setSorted(bool onNotOff); + void setIgnoreCase(bool onNotOff); + void sort(); protected: /** @brief Returns a pointer of the content buffer. * @return A pointer of the first byte of the content buffer. @@ -61,18 +81,37 @@ protected: * @return The pointer of the entry. */ inline Sequence* getInfo(Index index) const { - return &((Sequence*) m_list.buffer())[index]; + return reinterpret_cast(m_list.buffer() + m_entrySize * index); } - + size_t getLength(const Sequence* seq) const; + size_t getLengthAndTag(const Sequence* seq, Tag& tag) const; + void setSequence(Sequence* seq, Index index, size_t length, Tag tag); + void shiftDown(int from, int to); protected: - // friend class ReSeqList; - //@ Contains the sequences itself. - ReByteBuffer m_content; - //@ Contains an array of Sequences. - ReByteBuffer m_list; - //@ If strings have been replaced the space in m_content is still allocated. - //@ This is the sum of lost space. + //@ Contains the sequences itself. + ReByteBuffer m_content; + //@ Contains an array of Sequences. + ReByteBuffer m_list; + //@ If strings have been replaced/deleted the space in m_content is still allocated. + //@ This is the sum of lost space. size_t m_lost; + //@ The length of a Sequence block: sizeof(seq->m_index)+m_lengthTag+m_lengthLength + size_t m_entrySize; + //@ -1: The Sequence block contains the individual length. + //@ 0: no content will be stored + //@ Otherwise: the common length of all elements in m_content + size_t m_commonSize; + //@ length of the tag: 0,1,2,4,8 + size_t m_sizeOfTag; + //@ length of the length element in Sequence: 0,1,2,4 + size_t m_sizeOfLength; + //@ offset of the tag (from start of Sequence) + size_t m_offsetOfTag; + //@ offset of the length (from start of Sequence) + size_t m_offsetOfLength; + bool m_sorted; + //@ true: the comparison will be case insensitive + bool m_ignoreCase; }; #endif /* RESEQLIST_H_ */ diff --git a/base/ReStringList.cpp b/base/ReStringList.cpp index 7c28aa0..71fb641 100644 --- a/base/ReStringList.cpp +++ b/base/ReStringList.cpp @@ -49,7 +49,7 @@ ReStringList& ReStringList::append(const ReByteBuffer& source, Tag tagOf){ * @param source The new stringlist. * @return the instance itself (for chaining) */ -ReStringList& ReStringList::append(ReStringList& source){ +ReStringList& ReStringList::append(const ReStringList& source){ for (size_t ii = 0; ii < source.count(); ii++) add(-1, source.strOf(ii), source.sizeOf(ii), source.tagOf(ii)); return *this; @@ -72,7 +72,7 @@ void ReStringList::insert(Index index, const char* source, Tag tagOf){ * @param tagOf The tagOf of the replace element. */ void ReStringList::replace(Index index, const char* source, Tag tagOf){ - set(index, source, strlen(source) + 1, tagOf); + set(index, source, strlen(source), tagOf); } /** @brief Replaces a string in the internal array. * @@ -84,7 +84,7 @@ void ReStringList::replace(Index index, const char* source, Tag tagOf){ void ReStringList::replaceString(Index index, const char* source){ if (index < count()){ Sequence* seq = getInfo(index); - set(index, source, strlen(source) + 1, seq->m_tag); + set(index, source, strlen(source), seq->m_tag); } } /** @brief Replaces a tagOf in the internal array. diff --git a/base/ReStringList.hpp b/base/ReStringList.hpp index f68d622..a12b2db 100644 --- a/base/ReStringList.hpp +++ b/base/ReStringList.hpp @@ -35,7 +35,7 @@ public: public: ReStringList& append(const char* source, Tag tag = 0); ReStringList& append(const ReByteBuffer& source, Tag tag = 0); - ReStringList& append(ReStringList& source); + ReStringList& append(const ReStringList& source); bool equal(const ReStringList& toCompare) const; int firstDiff(const ReStringList& toCompare) const; Index indexOf(const char* toFind, bool ignoreCase = false, Index start = 0) const; diff --git a/base/ReTestUnit.cpp b/base/ReTestUnit.cpp index 0c4180d..7d23938 100644 --- a/base/ReTestUnit.cpp +++ b/base/ReTestUnit.cpp @@ -122,6 +122,21 @@ void ReTestUnit::assertEqual(const char* expected, const char* current, int line expected, current == NULL ? "" : current); } } + +/** @brief Compares two string values. If not equal this will be logged. + * + * @param expected the expected value + * @param current the current value + * @param lineNo the line number of the test (for the error message) + */ +void ReTestUnit::assertEqualIgnoreCase(const char* expected, const char* current, int lineNo){ + if (current == NULL || _stricmp(expected, current) != 0){ + logF(true, i18n("%s-%d: expected / current: length: %d / %d\n%.512s\n%.512s"), + m_sourceFile.str(), lineNo, strlen(expected), current == NULL ? 0 : strlen(current), + expected, current == NULL ? "" : current); + } +} + /** @brief Checks whether a file exists. If not this will be logged. * * @param name the filename diff --git a/base/ReTestUnit.hpp b/base/ReTestUnit.hpp index eef2520..2b1d187 100644 --- a/base/ReTestUnit.hpp +++ b/base/ReTestUnit.hpp @@ -23,6 +23,7 @@ public: void assertEqual(int expected, int current, int lineNo); void assertEqual(unsigned int expected, unsigned int current, int lineNo); void assertEqual(const char* expected, const char* current, int lineNo); + void assertEqualIgnoreCase(const char* expected, const char* current, int lineNo); void assertEqualFiles(const char* name1, const char* name2, int lineNo); void assertFileExists(const char* name, int lineNo); void assertFalse(bool conditon, int lineNo); @@ -48,6 +49,7 @@ protected: #define checkN(ptr) assertNull((void*) ptr, __LINE__) #define checkNN(ptr) assertNotNull((void*) ptr, __LINE__) #define checkEqu(exp, cur) assertEqual(exp, cur, __LINE__) +#define checkIEqu(exp, cur) assertEqualIgnoreCase(exp, cur, __LINE__) #define checkFileExists(fn) assertFileExists(fn, __LINE__) #define checkDirExists(fn) assertDirExists(fn, __LINE__) #define checkFileEqu(f1, f2) assertEqualFiles(f1, f2, __LINE__) diff --git a/base/baselocations.hpp b/base/baselocations.hpp index 83b39ad..b96a8a6 100644 --- a/base/baselocations.hpp +++ b/base/baselocations.hpp @@ -13,6 +13,7 @@ enum RELOC_LIB { LC_CONFIGFILE = 50000, LC_DIRTOOLS = 50100, + LC_SEQLIST = 50200, }; enum RELOC_UDPCONNECTION { LC_UDPCONNECTION_CONSTRUCT = 50101, diff --git a/cunit/cuReCString.cpp b/cunit/cuReCString.cpp index ac4da6d..931a2c4 100644 --- a/cunit/cuReCString.cpp +++ b/cunit/cuReCString.cpp @@ -19,7 +19,6 @@ private: } void testReplaceSubstring(){ char buffer[256]; -#if 0 strcpy(buffer, "12.ab"); replaceSubstring(buffer, sizeof buffer, 0, "xy"); checkEqu("xy12.ab", buffer); @@ -31,10 +30,8 @@ private: replaceSubstring(buffer, 5, 0, "x"); replaceSubstring(buffer, 6, 0, "x"); checkEqu("xw.ab", buffer); -#endif } void testMemicmp(){ -#if 0 checkEqu(0, _memicmp((void*) "abcd", (void*) "abcx", 3u)); checkEqu(0, _memicmp("aBcd", "AbCx", 3)); @@ -46,7 +43,6 @@ private: checkEqu(1, _memicmp("bbcd", "abcx", 3)); checkEqu(-1, _memicmp("ABcd", "bbcx", 3)); -#endif } }; extern void testReCString(void); diff --git a/cunit/cuReSeqList.cpp b/cunit/cuReSeqList.cpp index e09f42c..2397b9d 100644 --- a/cunit/cuReSeqList.cpp +++ b/cunit/cuReSeqList.cpp @@ -7,27 +7,140 @@ public: } private: void run(){ + testSorted(); + testIgnoreCase(); + testFind(); + testAdd(); testBase(); testRemove(); } + void checkElement(ReSeqList& list, ReSeqList::Index index, const char* value, ReSeqList::Tag expectedTag){ + ReByteBuffer buffer; + ReSeqList::Tag tag; + checkT(list.get(index, buffer, &tag)); + checkIEqu(value, buffer.str()); + checkEqu(expectedTag, tag); + } + void testSorted(){ + ReSeqList list; + list.setIgnoreCase(true); + ReByteBuffer value, expectedValue; + ReSeqList::Tag tag = 0; + ReSeqList::Tag expectedTag = 0; + list.add(-1, "bcd", -1, 200); + list.add(-1, "abcd", -1, 100); + list.add(-1, "abc", -1, 300); + list.add(-1, "abc", -1, 350); + list.add(-1, "cde", -1, 400); + log(false, "unsorted:"); + list.dump(stdout); + list.setSorted(true); + log(false, "sorted:"); + list.dump(stdout); + checkElement(list, 0U, "AbC", 350); + checkElement(list, 1U, "AbC", 300); + checkElement(list, 2U, "AbCd", 100); + checkElement(list, 3U, "bCd", 200); + checkElement(list, 4U, "cde", 400); + ReSeqList::Index index; + checkT(list.binarySearch("Abc", -1, index, &tag)); + checkEqu(0u, index); + checkT(list.binarySearch("AbcD", -1, index, &tag)); + checkEqu(2u, index); + checkT(list.binarySearch("bCd", -1, index, &tag)); + checkEqu(3u, index); + checkT(list.binarySearch("cde", -1, index, &tag)); + checkEqu(4u, index); + } + void testIgnoreCase(){ + ReSeqList list; + list.setIgnoreCase(true); + ReByteBuffer value, expectedValue; + ReSeqList::Tag tag = 0; + ReSeqList::Tag expectedTag = 0; + list.add(-1, "bcd", -1, 200); + list.add(0, "abc", -1, 100); + list.dump(stdout); + checkEqu(0U, list.find("AbC", -1)); + checkEqu(1U, list.find("BCD", -1)); + } + void testAdd(){ + ReSeqList list; + ReByteBuffer value, expectedValue; + ReSeqList::Tag tag = 0; + ReSeqList::Tag expectedTag = 0; + size_t maxIx = 64; + for (size_t ix = 0; ix < maxIx; ix++){ + expectedTag = (1 << ix); + expectedValue.append("x", 1); + list.add(-1, expectedValue.str(), -1, expectedTag); + checkEqu(ix + 1, list.count()); + checkT(list.get(ix, value, &tag)); + checkEqu(expectedValue.str(), value.str()); + if (expectedTag != tag) + checkEqu(expectedTag, tag); + } + expectedValue.setLength(0); + for (size_t ix = 0; ix < maxIx; ix++){ + expectedTag = (1 << ix); + expectedValue.append("x", 1); + checkT(list.get(ix, value, &tag)); + checkEqu(expectedValue.str(), value.str()); + checkEqu(expectedTag, tag); + } + + } + void testFind(){ + ReSeqList list; + ReByteBuffer value, expectedValue; + ReSeqList::Tag tag = 0; + ReSeqList::Tag expectedTag = 0; + size_t maxIx = 13; + for (size_t ix = 0; ix < maxIx; ix++){ + expectedTag = 10*ix; + expectedValue.append("x", 1); + list.add(-1, expectedValue.str(), -1, expectedTag); + checkEqu(ix + 1, list.count()); + checkT(list.get(ix, value, &tag)); + checkEqu(expectedValue.str(), value.str()); + if (expectedTag != tag) + checkEqu(expectedTag, tag); + } + list.dump(stdout); + expectedValue.setLength(0); + for (size_t ix = 0; ix < maxIx; ix++){ + expectedTag = -1; + expectedValue.append("x", 1); + if (ix >= 4) + expectedTag = -1; + checkEqu(ix, list.find(expectedValue.str(), expectedValue.length(), &expectedTag)); + checkEqu(10*ix, expectedTag); + } + } void testBase(){ ReSeqList list; ReByteBuffer value; ReSeqList::Tag tag = 0; - list.add(-1, "123", -1, 100); + list.add(-1, "xxx", -1, 1llu << 31); + checkT(list.get(0, value, &tag)); + checkEqu(1ll << 31, tag); + checkEqu("xxx", value.str()); + + list.clear(); + list.add(-1, "123", -1, 0x3344556633445566ll); checkEqu(1u, list.count()); checkT(list.get(0, value, &tag)); checkEqu("123", value.str()); - checkEqu((int64_t) 100, tag); + checkEqu((int64_t) 0x3344556633445566ll, tag); - list.add(-1, "ab", -1, 200); + list.add(-1, "ab", -1, 0x345678abcdef3344ll); checkEqu(2u, list.count()); checkT(list.get(0, value)); checkEqu("123", value.str()); checkT(list.get(1, value, &tag)); checkEqu("ab", value.str()); - checkEqu(200ll, tag); + checkEqu(0x345678abcdef3344ll, tag); list.add(0, "xyz", -1, 300); checkEqu(3u, list.count()); diff --git a/cunit/cuReStringList.cpp b/cunit/cuReStringList.cpp index d38920f..a4baf00 100644 --- a/cunit/cuReStringList.cpp +++ b/cunit/cuReStringList.cpp @@ -7,14 +7,12 @@ public: } private: void run(){ -#if 0 testAppendByteBuffer(); testBase(); testReplace(); testJoin(); testEqu(); testFile(); -#endif } void testAppendByteBuffer(){ ReStringList list; diff --git a/cunit/cuReTraverser.cpp b/cunit/cuReTraverser.cpp index de0ccab..9314d9f 100644 --- a/cunit/cuReTraverser.cpp +++ b/cunit/cuReTraverser.cpp @@ -132,7 +132,7 @@ private: "-D5", "-d1", "-z1k", "-Z2M", "-p;*;-*~" }; ReDirEntryFilter_t filter; - opts.programArgs().init(sizeof argv / sizeof argv[0], argv); + opts.programArgsChangeable().init(sizeof argv / sizeof argv[0], argv); opts.setFilterFromProgramArgs(filter); // local time: +3600 const int DIFF = 3600; diff --git a/cunit/testall.cpp b/cunit/testall.cpp index ab66b06..f2921f4 100644 --- a/cunit/testall.cpp +++ b/cunit/testall.cpp @@ -11,8 +11,12 @@ #endif void testBase(){ + extern void testReHashList(void); + //testReHashList(); + extern void testReSeqList(); + testReSeqList(); extern void testReTestUnit(); - //testReTestUnit(); + testReTestUnit(); extern void testReByteBuffer(); testReByteBuffer(); extern void testReSeqList(void); @@ -59,9 +63,9 @@ void testOs(){ void testAll(){ try { + testBase(); testString(); testOs(); - testBase(); } catch (ReException e){ fprintf(stderr, "testBase.cpp: unexpected exception: %s\n", e.getMessage()); } diff --git a/os/ReDirTools.cpp b/os/ReDirTools.cpp index f364a31..db2032e 100644 --- a/os/ReDirTools.cpp +++ b/os/ReDirTools.cpp @@ -106,14 +106,16 @@ ReDirOptions::ReDirOptions(const char* usage[], const char* examples[]) : m_nodePatterns(), m_pathPatterns(), m_compoundUsage(NULL), - m_countCompoundUsage(0) + m_countCompoundUsage(0), + m_output(stdout) { } /** * Destructor. */ ReDirOptions::~ReDirOptions(){ - delete[] m_compoundUsage; + close(); + delete[] m_compoundUsage; } /** * Initializes the compound usage message array. @@ -150,6 +152,7 @@ void ReDirOptions::addCompoundUsage(const char** usage){ * Adds the standard filter options. */ void ReDirOptions::addStandardFilterOptions(){ + // standard short options: D d O o P p q T t y Z z m_programArgs.addInt("maxdepth", i18n("the depth of the subdirectory is lower or equal \n" "0: search is done only in the base directory"), @@ -158,7 +161,11 @@ void ReDirOptions::addStandardFilterOptions(){ i18n("the depth of the subdirectory is greater or equal \n" "0: search is done in all subdirectories"), 'd', "min-depth", 512); - m_programArgs.addString("older", + m_programArgs.addString("output", + i18n("the name of the output file.\n" + "The output will written to this file instead of stdout"), + 'O', "output-file", false, NULL); + m_programArgs.addString("older", i18n("the modification date is older than \n" " is a date (e.g. 2015.02.17) or number followed by an unit\n" "units: m(inutes) h(hours), d(days). Default: m(inutes)\n" @@ -172,7 +179,7 @@ void ReDirOptions::addStandardFilterOptions(){ "A directory will be entered if at least one of the positive patterns\n" "and none of the 'not patterns' matches\n" "examples: ';*;-*/.git/' ',*/cache/,*/temp/"), - 'P', "path-pattern", false, NULL); + 'P', "path-pattern", false, NULL); m_programArgs.addString("nodepattern", i18n("a list of patterns for the basename (name without path) separated by ';'\n" "Each pattern can contain '*' as wildcard\n" @@ -442,6 +449,12 @@ void ReDirOptions::setFilterFromProgramArgs(ReDirEntryFilter_t& filter){ filter.m_pathPatterns = &m_pathPatterns; } filter.m_traceInterval = m_programArgs.getInt("trace"); + if (m_programArgs.getString("output", buffer)[0] != '\0'){ + if ( (m_output = fopen(buffer.str(), "w")) == NULL){ + help("cannot open output file", buffer.str()); + m_output = stdout; + } + } } /** * Prints a help message, the error message and exits. @@ -450,7 +463,7 @@ void ReDirOptions::setFilterFromProgramArgs(ReDirEntryFilter_t& filter){ * @param message2 an additional message */ -void ReDirOptions::help(const char* errorMessage, const char* message2 ){ +void ReDirOptions::help(const char* errorMessage, const char* message2) const{ ReByteBuffer msg; if (errorMessage != 0) msg.append(errorMessage, -1); @@ -475,6 +488,13 @@ void ReDirOptions::checkStandardFilterOptions(){ checkPatternList(m_programArgs.getString("pathpattern", buffer)); } +void ReDirOptions::close(){ + if (m_output != stdout){ + fclose(m_output); + m_output = stdout; + } +} + /** * Constructor. */ @@ -584,14 +604,14 @@ void ReDirStatistic::run(int argc, const char* argv[]){ ReByteBuffer buffer; for (size_t ix = 0; ix < list.count(); ix++){ buffer.set(list.strOf(ix), list.strLengthOf(ix)); - printf("%s\n", buffer.str()); + fprintf(m_output, "%s\n", buffer.str()); } if (! m_programArgs.getBool("quiet")){ int duration = int(time(NULL) - start); - printf("Duration: "); + fprintf(m_output, "Duration: "); if (duration >= 3600) - printf("%d:", duration / 3600); - printf("%02d:%02d\n", duration % 3600 / 60, duration % 60); + fprintf(m_output, "%d:", duration / 3600); + fprintf(m_output, "%02d:%02d\n", duration % 3600 / 60, duration % 60); } } catch (ReOptionException& exc) { m_programArgs.help(exc.getMessage(), false, stdout); @@ -624,8 +644,6 @@ const ReStringList& ReDirStatistic::calculate(const char* base, int level, while( (entry = traverser.rawNextFile(currentDepth))){ //@todo ReByteBuffer dummy(entry->m_path); dummy.append(entry->node()); - int passNo = entry->m_passNo; - const char* dummyStr = dummy.str(); if (currentDepth <= level && ! entry->m_path.equals(current->m_path)){ if (currentDepth <= topOfStack){ // close the entries of the stack above the new top level: @@ -812,9 +830,9 @@ void ReDirList::list(int argc, const char* argv[]){ } if (! printOneFile(entry)){ if (shortOutput) - printf("%s%s\n", entry->m_path.str(), entry->node()); + fprintf(m_output, "%s%s\n", entry->m_path.str(), entry->node()); else - printf("%s %12.6f %s %02x %s%s\n", + fprintf(m_output, "%s %12.6f %s %02x %s%s\n", entry->rightsAsString(bufferRights), entry->fileSize() / 1E6, entry->filetimeAsString(bufferTime), @@ -825,7 +843,7 @@ void ReDirList::list(int argc, const char* argv[]){ } if (verbose){ int duration = int(time(NULL) - start); - printf ("+++ %d dirs and %d file(s) with %.6f MByte in %02d:%02d sec\n", + fprintf(m_output, "+++ %d dirs and %d file(s) with %.6f MByte in %02d:%02d sec\n", dirs, files, sumSizes / 1E6, duration / 60, duration % 60); } } catch(ReOptionException& exc){ @@ -839,6 +857,7 @@ void ReDirList::list(int argc, const char* argv[]){ ReDirBatch::ReDirBatch() : ReDirOptions(s_batchUsage, s_batchExamples) { + // standard short options: D d O o P p q T t y Z z m_programArgs.addString("first", i18n("defines the first line of the output"), '1', "first-line", true, @@ -972,7 +991,7 @@ void ReDirBatch::createBatch(int argc, const char* argv[]){ } else { replaceMakros(arguments.str(), entry, delim, line); } - printf("%s\n", line.str()); + fprintf(m_output, "%s\n", line.str()); } } if (verbose){ @@ -982,7 +1001,7 @@ void ReDirBatch::createBatch(int argc, const char* argv[]){ #elif defined __WIN32__ const char* prefix = "rem"; #endif - printf ("%s %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n", + fprintf(m_output, "%s %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n", prefix, dirs, files, sumSizes / 1E6, duration / 60, duration % 60); } } catch(ReOptionException& exc){ @@ -1012,25 +1031,26 @@ ReDirSync::ReDirSync() : ReDirOptions(s_syncUsage, s_syncExamples), m_buffer() { + // standard short options: D d O o P p q T t y Z z m_buffer.ensureSize(4u*1024u*1024u); m_programArgs.addBool("add", i18n("copies only files which does not exist on the target"), 'a', "add", false); m_programArgs.addBool("dry", i18n("does nothing, but says what should be done"), - 'd', "dry", false); + 'Y', "dry", false); m_programArgs.addInt("timediff", i18n("filetime difference is considered to be equal\n" "if the difference is less than this value (in seconds)"), - 'D', "time-delta", 2); + 'I', "time-delta", 2); m_programArgs.addBool("ignoredate", - i18n("the modification is recognized only by the different size (not time)"), + i18n("the modification is recognized only by the different size (not time)"), 'i', "ignore-time", false); m_programArgs.addBool("chatter", - i18n("comments the action of each file"), + i18n("comments the action of each file"), 'h', "chatter", false); m_programArgs.addBool("mustexist", - i18n("files which don't exist on the target will not be copied"), + i18n("files which don't exist on the target will not be copied"), 'm', "must-exist", false); addStandardFilterOptions(); } @@ -1093,7 +1113,7 @@ bool ReDirSync::copyFile(const char* source, time_t modified, int64_t size, fseek(fpTarget, 0, SEEK_SET); while(size > 0){ size_t blockSize = buffer.capacity(); - if (blockSize > size) + if ((int) blockSize > size) blockSize = size; if (fread(buffer.buffer(), blockSize, 1, fpSource) != 1){ if (logger != NULL) @@ -1194,23 +1214,23 @@ void ReDirSync::synchronize(int argc, const char* argv[]){ bool exists = stat(targetFile.str(), &info) == 0; if (! exists && ! mustExist){ if (chatterMode) - printf("-ignored: %s does not exist\n", targetRelativePath); + fprintf(m_output, "-ignored: %s does not exist\n", targetRelativePath); continue; } if (exists && addOnly){ if (chatterMode) - printf("~ignored: %s exists\n", targetRelativePath); + fprintf(m_output, "~ignored: %s exists\n", targetRelativePath); continue; } if (ignoreDate && entry->fileSize() == info.st_size){ if (chatterMode) - printf("_ignored: %s same size\n", targetRelativePath); + fprintf(m_output, "_ignored: %s same size\n", targetRelativePath); continue; } if (! ignoreDate && entry->filetimeToTime(entry->modified()) - info.st_mtime > maxFileTimeDiff) { if (chatterMode) - printf("=ignored: %s same time\n", targetRelativePath); + fprintf(m_output, "=ignored: %s same time\n", targetRelativePath); continue; } else { if (entry->isDirectory()) @@ -1220,7 +1240,7 @@ void ReDirSync::synchronize(int argc, const char* argv[]){ sumSizes += entry->fileSize(); } if (verbose || chatterMode || dry) - printf("%c%s%s\n", exists ? '!' : '+', targetRelativePath, + fprintf(m_output, "%c%s%s\n", exists ? '!' : '+', targetRelativePath, dry ? " would be copied" : ""); if (! dry) copyFile(entry, target.str()); @@ -1229,7 +1249,7 @@ void ReDirSync::synchronize(int argc, const char* argv[]){ } if (verbose){ int duration = int(time(NULL) - start); - printf ("=== %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n", + fprintf(m_output, "=== %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n", dirs, files, sumSizes / 1E6, duration / 60, duration % 60); } } catch(ReOptionException& exc){ diff --git a/os/ReDirTools.hpp b/os/ReDirTools.hpp index 6b6fe66..12b571a 100644 --- a/os/ReDirTools.hpp +++ b/os/ReDirTools.hpp @@ -13,26 +13,39 @@ public: ReDirOptions(const char* usage[], const char* example[]); ~ReDirOptions(); public: + void addCompoundUsage(const char** usage); void addStandardFilterOptions(); + time_t checkDate(const char* value); + const char* checkPatternList(const char* value); + int64_t checkSize(const char* value); void checkStandardFilterOptions(); - void initCompoundUsage(size_t size); - void addCompoundUsage(const char** usage); + void close(); + ReDirStatus_t::Type_t checkType(const char* value); + /** Returns the compound usage message. + * @return the compound usage message + */ const char** compoundUsage() const { return m_compoundUsage; } - ReProgramArgs& programArgs() + void initCompoundUsage(size_t size); + void help(const char* errorMessage, const char* message2 = NULL) const; + /** Returns the program arguments. + * @return the program arguments + */ + const ReProgramArgs& programArgs() const + { return m_programArgs; } + /** Returns the program arguments. + * @return the program arguments + */ + ReProgramArgs& programArgsChangeable() { return m_programArgs; } - time_t checkDate(const char* value); - int64_t checkSize(const char* value); - ReDirStatus_t::Type_t checkType(const char* value); - const char* checkPatternList(const char* value); void setFilterFromProgramArgs(ReDirEntryFilter_t& filter); - void help(const char* errorMessage, const char* message2 = NULL); protected: ReProgramArgs m_programArgs; RePatternList m_nodePatterns; RePatternList m_pathPatterns; const char** m_compoundUsage; int m_countCompoundUsage; + FILE* m_output; }; class ReDirBatch : public ReDirOptions { -- 2.39.5