From 8c477fee84c895a22dfa17ffb926c4f1136c36cf Mon Sep 17 00:00:00 2001
From: hama
Date: Mon, 12 Jan 2015 00:54:41 +0100
Subject: [PATCH] Refactoring: ReSeqList is now efficient (sorting).
---
base/ReByteBuffer.cpp | 4 +-
base/ReHashList.cpp | 157 +++++------
base/ReHashList.hpp | 23 +-
base/ReProgramArgs.cpp | 5 +-
base/ReProgramArgs.hpp | 4 +-
base/ReSeqList.cpp | 587 ++++++++++++++++++++++++++++++++++++---
base/ReSeqList.hpp | 89 ++++--
base/ReStringList.cpp | 6 +-
base/ReStringList.hpp | 2 +-
base/ReTestUnit.cpp | 15 +
base/ReTestUnit.hpp | 2 +
base/baselocations.hpp | 1 +
cunit/cuReCString.cpp | 4 -
cunit/cuReSeqList.cpp | 121 +++++++-
cunit/cuReStringList.cpp | 2 -
cunit/cuReTraverser.cpp | 2 +-
cunit/testall.cpp | 8 +-
os/ReDirTools.cpp | 76 +++--
os/ReDirTools.hpp | 29 +-
19 files changed, 921 insertions(+), 216 deletions(-)
diff --git a/base/ReByteBuffer.cpp b/base/ReByteBuffer.cpp
index 3525de7..7dd45b2 100644
--- a/base/ReByteBuffer.cpp
+++ b/base/ReByteBuffer.cpp
@@ -278,13 +278,13 @@ int ReByteBuffer::firstDifference(const Byte* source, size_t length, int start,
if (start < 0)
rc = 0;
else if (start < (int) m_length){
- if (length == -1)
+ if (length == (size_t) -1)
length = strlen(source);
int count = length > m_length - start ? m_length - start : length;
const Byte* ptr = m_buffer + start;
for (int ix = 0; rc < 0 && ix < count; ix++){
if ((! ignoreCase && *ptr++ != *source++)
- || ignoreCase && (tolower(*ptr++) != tolower(*source++)))
+ || (ignoreCase && (tolower(*ptr++) != tolower(*source++))))
rc = start + ix;
}
if (rc < 0 && length > m_length - start)
diff --git a/base/ReHashList.cpp b/base/ReHashList.cpp
index 61dc9db..4f5aba1 100644
--- a/base/ReHashList.cpp
+++ b/base/ReHashList.cpp
@@ -9,73 +9,37 @@
/** Constructor.
*/
-ReHashList::ReHashList()
- :
+ReHashList::ReHashList(int keyTagSize, int contentLengthSize, int keyLengthSize) :
m_keys(),
m_values()
{
+ m_keys.setSizes(keyTagSize, keyLengthSize);
+ m_keys.setSizes(0, contentLengthSize);
}
/** Destructor
*/
ReHashList::~ReHashList() {
}
-/** @brief Puts a key value pair into the hashlist.
- *
- * @param key The key byte sequence.
- * @param keyLength The length of key
.
- * @param value The value byte sequence.
- * @param valueLength The length of value
.
+/** @brief Deletes all entries in the list.
*/
-void ReHashList::put(const Byte* key, size_t keyLength,
- const Byte* value, size_t valueLength){
- if (keyLength == (size_t) -1)
- keyLength = strlen(key) + 1;
- if (valueLength == (size_t) -1)
- valueLength = strlen(value) + 1;
- int ix = find(key, keyLength);
- bool storeValue = false;
- if (ix < 0){
- ReSeqList::Index pos = m_values.length();
- storeValue = true;
- m_keys.add(-1, key, keyLength, pos);
- } else {
- Sequence* seq = m_keys.getInfo(ix);
- // m_tag contains the index into m_values
.
- Byte* ptr = m_values.buffer() + seq->m_tag;
- size_t valLength = * (size_t *) ptr;
- // Can we take the old storage?
- if (valLength >= valueLength){
- // Yes, it is enough space:
- * (size_t *) ptr = valueLength;
- ptr += sizeof (size_t);
- memcpy(ptr, value, valueLength);
- } else {
- storeValue = true;
- seq->m_tag = m_values.length();
- }
- }
- if (storeValue){
- m_values.append((Byte*) &valueLength, sizeof valueLength);
- m_values.append(value, valueLength);
- }
+void ReHashList::clear(){
+ m_keys.clear();
+ m_values.clear();
}
-/** @brief Puts a key value pair into the hashlist.
+
+/** @brief Returns the index of key.
*
- * @param key The key. This is a C string.
- * @param value The value. This is a C string.
- */
-void ReHashList::put(const char* key, const char* value){
- put(key, -1, value, -1);
-}
-/** @brief Puts a key value pair into the hashlist.
+ * @param key The byte sequence of the key.
+ * @param length The length of key
.
*
- * @param key The key.
- * @param value The value.
+ * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array.
*/
-void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){
- put(key.str(), key.length(), value.str(), value.length());
+int ReHashList::find(const Byte* key, size_t length) const{
+ int rc = m_keys.find(key, length, NULL);
+ return rc;
}
+
/** @brief Returns the value of a key value pair.
*
* @param key The key byte sequence.
@@ -87,19 +51,21 @@ void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){
bool ReHashList::get(const Byte* key, size_t keyLength,
ReByteBuffer& value) const{
if (keyLength == (size_t) -1)
- keyLength = strlen(key) + 1;
+ keyLength = strlen(key);
int ix = find(key, keyLength);
bool rc = ix >= 0;
+#if 0
if (rc){
ReSeqList::Sequence* seq = m_keys.getInfo(ix);
// m_tag contains the index into m_values:
- const Byte* ptr = m_values.str() + seq->m_tag;
+ const Byte* ptr = reinterpret_cast(m_values.str() + seq->m_tag);
// m_values contains :
size_t valLength = * (size_t*) ptr;
ptr += sizeof (size_t);
value.set(ptr, valLength);
}
+#endif
return rc;
}
/** @brief Returns the value of a key value pair.
@@ -114,12 +80,6 @@ bool ReHashList::get(const ReByteBuffer& key,
bool rc = get(key.str(), key.length(), value);
return rc;
}
-/** @brief Deletes all entries in the list.
- */
-void ReHashList::clear(){
- m_keys.clear();
- m_values.setLength(0);
-}
/** @brief Implements an iterator.
*
@@ -131,7 +91,7 @@ void ReHashList::clear(){
*
* @param true: An item was found. false: No more items.
*/
-bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value){
+bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value) const{
bool rc = position < m_keys.count();
if (rc){
ReSeqList::Sequence* seq = m_keys.getInfo(position++);
@@ -141,37 +101,74 @@ bool ReHashList::next(size_t& position, ReByteBuffer* key, ReByteBuffer* value){
memcpy(key->buffer(), ptr, seq->m_length);
}
if (value != NULL){
- const Byte* ptr = m_values.str() + seq->m_tag;
+#if 0
+ const Byte* ptr = reinterpret_cast(m_values.str() + seq->m_tag);
size_t length = * (size_t*) ptr;
ptr += sizeof (size_t);
value->setLength(length);
memcpy(value->buffer(), ptr, length);
+#endif
}
}
return rc;
}
-
-/** @brief Returns the index of key.
- *
- * @param key The byte sequence of the key.
- * @param length The length of key
.
+/** @brief Puts a key value pair into the hashlist.
*
- * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array.
+ * @param key The key byte sequence.
+ * @param keyLength The length of key
.
+ * @param value The value byte sequence.
+ * @param valueLength The length of value
.
*/
-int ReHashList::find(const Byte* key, size_t length) const{
- if (length == (size_t) -1)
- length = strlen(key) + 1;
- int rc = -1;
- int count = m_keys.count();
- for (int ii = 0; rc < 0 && ii < count; ii++){
- ReSeqList::Sequence* seq = m_keys.getInfo(ii);
- if (seq->m_length == length){
- const Byte* ptr = m_keys.getContent() + seq->m_index;
- if (_memcmp(ptr, key, length) == 0)
- rc = ii;
+void ReHashList::put(const Byte* key, size_t keyLength,
+ const Byte* value, size_t valueLength){
+#if 0
+ if (keyLength == (size_t) -1)
+ keyLength = strlen(key);
+ if (valueLength == (size_t) -1)
+ valueLength = strlen(value);
+ int ix = find(key, keyLength);
+ bool storeValue = false;
+ if (ix < 0){
+ ReSeqList::Index pos = m_values.length();
+ storeValue = true;
+ m_keys.add(-1, key, keyLength, pos);
+ } else {
+ Sequence* seq = m_keys.getInfo(ix);
+ // m_tag contains the index into m_values
.
+ Byte* ptr = m_values.buffer() + seq->m_tag;
+ size_t valLength = * (size_t *) ptr;
+ // Can we take the old storage?
+ if (valLength >= valueLength){
+ // Yes, it is enough space:
+ * (size_t *) ptr = valueLength;
+ ptr += sizeof (size_t);
+ memcpy(ptr, value, valueLength);
+ } else {
+ storeValue = true;
+ seq->m_tag = m_values.length();
}
}
- return rc;
+ if (storeValue){
+ m_values.append((Byte*) &valueLength, sizeof valueLength);
+ m_values.append(value, valueLength);
+ }
+#endif
+}
+/** @brief Puts a key value pair into the hashlist.
+ *
+ * @param key The key. This is a C string.
+ * @param value The value. This is a C string.
+ */
+void ReHashList::put(const char* key, const char* value){
+ put(key, -1, value, -1);
+}
+/** @brief Puts a key value pair into the hashlist.
+ *
+ * @param key The key.
+ * @param value The value.
+ */
+void ReHashList::put(const ReByteBuffer& key, const ReByteBuffer& value){
+ put(key.str(), key.length(), value.str(), value.length());
}
diff --git a/base/ReHashList.hpp b/base/ReHashList.hpp
index d91d79b..88371d5 100644
--- a/base/ReHashList.hpp
+++ b/base/ReHashList.hpp
@@ -8,23 +8,24 @@
#ifndef REHASHLIST_H_
#define REHASHLIST_H_
-/** @brief A (very) simple associative array.
- * An instance stores key value pairs.
- * Keys and values are byte sequences. This includes c strings.
- *
+/** @brief A simple associative array.
+ *
+ * An instance stores key value pairs.
+ *
+ * Keys and values are byte sequences. This includes c strings.
*/
class ReHashList {
public:
typedef char Byte;
typedef ReSeqList::Sequence Sequence;
public:
- ReHashList();
+ ReHashList(int keyTagSize = 1, int contentLengthSize = 1, int keyLengthSize = 1);
virtual ~ReHashList();
public:
void clear();
bool get(const Byte* key, size_t keyLength, ReByteBuffer& value) const;
bool get(const ReByteBuffer& key, ReByteBuffer& value) const;
- bool next(size_t& position, ReByteBuffer* key, ReByteBuffer* val);
+ bool next(size_t& position, ReByteBuffer* key, ReByteBuffer* val) const;
void put(const Byte* key, size_t keyLength, const Byte* value, size_t valueLength);
void put(const char* key, const char* value);
void put(const ReByteBuffer& key, const ReByteBuffer& value);
@@ -32,11 +33,11 @@ protected:
int find(const Byte* key, size_t length) const;
protected:
- //@ Containing an array of keys.
- ReSeqList m_keys;
- //@ Containing the values. The tag of m_key
is the index
- //@of the start position in m_values
.
- ReByteBuffer m_values;
+ //@ Containing an array of keys.
+ ReSeqList m_keys;
+ //@ Containing the values. The tag of m_key
is the index
+ //@ of the start position in m_values
.
+ ReSeqList m_values;
};
#endif /* REHASHLIST_H_ */
diff --git a/base/ReProgramArgs.cpp b/base/ReProgramArgs.cpp
index d40573b..57d15bc 100644
--- a/base/ReProgramArgs.cpp
+++ b/base/ReProgramArgs.cpp
@@ -539,7 +539,8 @@ void ReProgramArgs::setLastError(const char* message){
m_lastError.set(message, -1);
}
-void ReProgramArgs::help(const char* message, bool issueLastError, ReStringList& lines){
+void ReProgramArgs::help(const char* message, bool issueLastError,
+ ReStringList& lines) const{
lines.append(m_usage);
lines.append("");
@@ -607,7 +608,7 @@ void ReProgramArgs::help(const char* message, bool issueLastError, ReStringList&
}
}
-void ReProgramArgs::help(const char* message, bool issueLastError, FILE* stream){
+void ReProgramArgs::help(const char* message, bool issueLastError, FILE* stream) const{
ReStringList lines;
help(message, issueLastError, lines);
for(size_t ii = 0; ii < lines.count(); ii++){
diff --git a/base/ReProgramArgs.hpp b/base/ReProgramArgs.hpp
index d7c22b7..dbb1b10 100644
--- a/base/ReProgramArgs.hpp
+++ b/base/ReProgramArgs.hpp
@@ -71,8 +71,8 @@ public:
void init(int argc, const char* argv[]);
void setLastError(const char* message);
- void help(const char* message, bool issueLastError, ReStringList& lines);
- void help(const char* message, bool issueLastError, FILE* stream);
+ void help(const char* message, bool issueLastError, ReStringList& lines) const;
+ void help(const char* message, bool issueLastError, FILE* stream) const;
void setUsage(const char* usage[]);
private:
diff --git a/base/ReSeqList.cpp b/base/ReSeqList.cpp
index 4e8a019..c10e5bf 100644
--- a/base/ReSeqList.cpp
+++ b/base/ReSeqList.cpp
@@ -7,6 +7,41 @@
#include "base/rebase.hpp"
+enum RELOC_SEQLIST {
+ LC_SET_SIZES_1 = LC_SEQLIST + 1, // 50201
+ LC_SET_SIZES_2, // 50202
+ LC_SET_SIZES_3, // 50203
+};
+/**
+ * Implementation:
+ * ReSeqList is a storage for byte sequences.
+ * Each stored element is accessible by its index.
+ *
+ * A Sequence is a tuple (m_index, m_length, m_tag).
+ *
+ * - m_index is the index in a content buffer
+ * - m_length is the length of byte sequence in the content
+ * - m_tag is a additional info used by the list user, not in the container
+ * If the lengths of the byte sequences are equal there is no need for saving
+ * the length. In this case the length is stored one time (in
+ * m_commonSize
).
+ * The tag can be omitted if there is no need. This is controlled by
+ * m_sequSize
.
+ *
+ * ReSeqList contains a content buffer (m_content
) and a table of
+ * content (m_list
).
+ *
+ * - m_content stores a sequences of byte sequences. No separator is used.
+ * The start and the length of the byte sequences are stored in m_list.
+ * - m_list is a potentially sorted array of Sequence blocks.
+ *
+ * Each new element is stored at the end of m_content, even if is a replacement
+ * for a existing element. Also deletion of an element does not free the space
+ * in m_content. But there is a method pack(), which removes the gaps.
+ * The lost space is available in m_lost
.
+ */
+
/** @brief Constructor.
*
* @param deltaList If there is not enough space in the list (array)
@@ -18,7 +53,15 @@ ReSeqList::ReSeqList(size_t deltaList, int deltaBuffer)
:
m_content(deltaBuffer),
m_list(deltaList),
- m_lost(0)
+ m_lost(0),
+ m_entrySize(sizeof(Index) + 1 + 8),
+ m_commonSize(INDIVIDUAL_SIZE),
+ m_sizeOfTag(sizeof (void*)),
+ m_sizeOfLength(1),
+ m_offsetOfTag(sizeof(Index) + 1),
+ m_offsetOfLength(sizeof(Index)),
+ m_sorted(false),
+ m_ignoreCase(false)
{
}
/** @brief Destructor.
@@ -33,9 +76,16 @@ ReSeqList::ReSeqList(const ReSeqList& source)
:
m_content(source.m_content),
m_list(source.m_list),
- m_lost(source.m_lost)
+ m_lost(source.m_lost),
+ m_entrySize(source.m_entrySize),
+ m_commonSize(source.m_commonSize),
+ m_sizeOfTag(source.m_sizeOfTag),
+ m_sizeOfLength(source.m_sizeOfLength),
+ m_offsetOfTag(source.m_offsetOfTag),
+ m_offsetOfLength(source.m_offsetOfLength),
+ m_sorted(source.m_sorted),
+ m_ignoreCase(false)
{
-
}
/** @brief Assignment operator.
*
@@ -47,9 +97,17 @@ ReSeqList& ReSeqList::operator = (const ReSeqList& source){
m_content = source.m_content;
m_list = source.m_list;
m_lost = source.m_lost;
+ m_entrySize = source.m_entrySize;
+ m_commonSize = source.m_commonSize;
+ m_sizeOfTag = source.m_sizeOfTag;
+ m_sizeOfLength = source.m_sizeOfLength;
+ m_offsetOfTag = source.m_offsetOfTag;
+ m_offsetOfLength = source.m_offsetOfLength;
+ m_sorted = source.m_sorted;
+ m_ignoreCase = source.m_ignoreCase;
return *this;
}
-/** @brief Adds a byte sequence to the list.
+/** @brief Adds an element to the list.
*
* @param index The index of the new entry. If greater than the list length it will be appended.
* @param source The pointer of the byte sequence to insert.
@@ -59,18 +117,150 @@ ReSeqList& ReSeqList::operator = (const ReSeqList& source){
void ReSeqList::add(Index index, const Byte* source, size_t sourceLength, Tag tag){
Sequence seq;
if (sourceLength == (size_t) -1)
- sourceLength = strlen(source) + 1;
- seq.m_index = m_content.length();
- seq.m_length = sourceLength;
- seq.m_tag = tag;
+ sourceLength = strlen(source);
+ setSequence(&seq, m_content.length(), sourceLength, tag);
m_content.append(source, sourceLength);
- if (index >= count()){
- m_list.append((Byte*) &seq, sizeof seq);
- }else{
- m_list.insert(index * sizeof(Sequence), (Byte*) &seq, sizeof seq);
+ if (m_sorted){
+ //@ToDo
+ assert(false);
+ } else {
+ if (index >= count()){
+ m_list.append((Byte*) &seq, m_entrySize);
+ }else{
+ m_list.insert(index * m_entrySize, (Byte*) &seq, m_entrySize);
+ }
+ }
+}
+
+/**
+ * @brief Searches the an element with binary search.
+ *
+ * @param toFind the value which should be found
+ * @param length -1 or the length of toFind
+ * @param index OUT: the index usable for insert:
+ * index is the smallest value with content[index] >= toFind
+ * @param tag OUT: the tag of the element (only if it was found).
+ * May be NULL
+ * @return -1: the key has been found.
+ */
+bool ReSeqList::binarySearch(const Byte* toFind, int length, Index& index,
+ Tag* tag) const
+{
+ assert(m_sorted);
+ if (length < 0)
+ length = strlen(toFind);
+ Index rc = (Index) -1;
+ int lbound = 0;
+ int theCount = count();
+ int ubound = theCount;
+ // binary search over the sorted vector:
+ while(lbound <= ubound){
+ int half = (ubound + lbound) / 2;
+ const Sequence* seq = reinterpret_cast(
+ m_list.str() + half * m_entrySize);
+ int currentLength = getLength(seq);
+ int minLength = currentLength < length ? currentLength : length;
+ const char* current = m_content.str() + seq->m_index;
+ int compareRc = m_ignoreCase
+ ? _memicmp(toFind, current, minLength)
+ : _memcmp(toFind, current, minLength);
+ if (compareRc == 0 && currentLength != length)
+ compareRc = currentLength > length ? -1 : 1;
+ if (compareRc < 0)
+ ubound = half - 1;
+ else if (compareRc > 0)
+ lbound = half + 1;
+ else {
+ rc = true;
+ index = half;
+ break;
+ }
+ }
+ if (! rc)
+ index = ubound;
+ return rc;
+}
+
+/** @brief Deletes all entries in the list.
+ */
+void ReSeqList::clear(){
+ m_content.setLength(0);
+ m_list.setLength(0);
+}
+
+/**
+ * Compares two elements.
+ *
+ * @param index1 the index of the first element
+ * @param index2 the index of the second element
+ * @return 0: element1 == element2
+ * < 0: element1 < element2
+ * > 0: element1 > element2
+ */
+int ReSeqList::compare(Index index1, Index index2){
+ Sequence* seq1 = getInfo(index1);
+ Sequence* seq2 = getInfo(index2);
+ size_t length1 = getLength(seq1);
+ size_t length2 = getLength(seq2);
+ int minLength = length1 < length2 ? length1 : length2;
+ int rc = m_ignoreCase
+ ? _memicmp(m_content.str() + seq1->m_index,
+ m_content.str() + seq2->m_index, minLength)
+ : _memcmp(m_content.str() + seq1->m_index,
+ m_content.str() + seq2->m_index, minLength);
+ if (rc == 0 && length1 != length2)
+ rc = length1 < length2 ? -1 : 1;
+ return rc;
+}
+/**
+ * Writes the content to a stream.
+ *
+ * @param fp target file pointer
+ */
+void ReSeqList::dump(FILE* fp) const{
+ ReByteBuffer buffer;
+ Tag tag;
+ for (int ix = 0; ix < count(); ix++){
+ get(ix, buffer, &tag);
+ fprintf(fp, "%d: (%lld) [%d] %s\n", ix, (int64_t) tag, buffer.length(),
+ buffer.str());
}
}
-/** @brief Returns a byte sequence from the list.
+
+/** @brief Returns the index of a stored byte sequence.
+ *
+ * @param toFind the byte sequence to find
+ * @param length -1 or the length of toFind
.
+ *
+ * @return: -1: The key was not found. Otherwise: The index of the key in the key sequence array.
+ */
+ReSeqList::Index ReSeqList::find(const Byte* toFind, size_t length,
+ Tag* tag) const{
+ if (length == (size_t) -1)
+ length = strlen(toFind);
+ int rc = -1;
+ int theCount = count();
+ for (int ix = 0; ix < theCount; ix++){
+ const ReSeqList::Sequence* seq = getInfo(ix);
+ Tag currentTag;
+ size_t currentLength = getLengthAndTag(seq, currentTag);
+ if (currentLength == length){
+ const Byte* ptr = reinterpret_cast(m_content.str()
+ + seq->m_index);
+ int comparison = m_ignoreCase ? _memicmp(ptr, toFind, length)
+ : _memcmp(ptr, toFind, length);
+ if (comparison == 0){
+ rc = ix;
+ if (tag != NULL)
+ *tag = currentTag;
+ break;
+ }
+ }
+ }
+ return rc;
+}
+
+/** @brief Returns an element from the list.
*
* @param index The index of the sequence in the list.
* @param value Out: The stored sequence will be copied here.
@@ -82,14 +272,135 @@ void ReSeqList::add(Index index, const Byte* source, size_t sourceLength, Tag ta
bool ReSeqList::get(Index index, ReByteBuffer& value, Tag* tag) const{
bool rc = false;
if (index < count()){
- Sequence* seq = ((Sequence*)m_list.buffer()) + index;
- value.set(m_content.buffer() + seq->m_index, seq->m_length);
- if (tag != NULL)
- *tag = seq->m_tag;
+ const Sequence* seq = getInfo(index);
+ size_t length = tag == NULL ? getLength(seq) : getLengthAndTag(seq, *tag);
+ value.set(m_content.str() + seq->m_index, length);
rc = true;
}
return rc;
}
+/** @brief Returns the byte sequence length of the element.
+ *
+ * @return the length of the element described in the Sequence seq
+ */
+size_t ReSeqList::getLength(const Sequence* seq) const{
+ size_t rc;
+ if (m_commonSize != INDIVIDUAL_SIZE)
+ rc = m_commonSize;
+ else {
+ const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength;
+ switch (m_sizeOfLength){
+ case 1:
+ rc = *ptr;
+ break;
+ case 2:
+ rc = ptr[0] + (ptr[1] << 8);
+ break;
+ case 3:
+ rc = ptr[0] + (ptr[1] << 8) + (ptr[1] << 16);
+ break;
+ case 4:
+ rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
+ break;
+ case 5:
+ rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24)
+ + ((int64_t)ptr[4] << 32);
+ break;
+ default:
+ assert(false);
+ rc = 0;
+ break;
+ }
+ }
+ return rc;
+}
+/** @brief Returns the byte sequence length and the tag of an element.
+ *
+ * @param tag OUT: the tag of the element.
+ * @return the byte sequence length of the element
+ */
+size_t ReSeqList::getLengthAndTag(const Sequence* seq, Tag& tag) const{
+ size_t rc;
+ if (m_commonSize != INDIVIDUAL_SIZE)
+ rc = m_commonSize;
+ else {
+ const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength;
+ switch (m_sizeOfLength){
+ case 1:
+ rc = *ptr;
+ break;
+ case 2:
+ rc = ptr[0] + (ptr[1] << 8);
+ break;
+ case 3:
+ rc = ptr[0] + (ptr[1] << 8) + (ptr[1] << 16);
+ break;
+ case 4:
+ rc = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
+ break;
+ case 5:
+ rc = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24)
+ | ((uint64_t)ptr[4] << 32);
+ break;
+ default:
+ assert(false);
+ rc = 0;
+ break;
+ }
+ }
+ const uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfTag;
+ switch (m_sizeOfTag){
+ case 0:
+ tag = 0;
+ break;
+ case 1:
+ tag = *ptr;
+ break;
+ case 2:
+ tag = ptr[0] + (ptr[1] << 8);
+ break;
+ case 3:
+ tag = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16);
+ break;
+ case 4:
+ tag = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
+ break;
+ case 5:
+ tag = Tag(ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (uint64_t(ptr[3]) << 24)
+ | (uint64_t(ptr[4]) << 32));
+ break;
+ case 8:
+ tag = Tag(ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (uint64_t(ptr[3]) << 24)
+ | (uint64_t(ptr[4]) << 32) | (uint64_t(ptr[5]) << 40)
+ | (uint64_t(ptr[6]) << 48) | (uint64_t(ptr[7]) << 56));
+ break;
+ default:
+ assert(false);
+ tag = 0;
+ break;
+ }
+ return rc;
+}
+
+/** @brief Removes an element given by its index.
+ *
+ * @param index The index of the entry to remove.
+ */
+void ReSeqList::remove(Index index){
+ if (index <= count()){
+ Sequence* seq = getInfo(index);
+ size_t currentLength = getLength(seq);
+ // Is this the last entry in m_content?
+ if (seq->m_index + currentLength >= m_content.length()){
+ // We can free the content:
+ m_content.setLength(seq->m_index);
+ } else {
+ m_lost += seq->m_length;
+ }
+ // Remove the entry from the list:
+ m_list.remove(index * m_entrySize, m_entrySize);
+ }
+}
/** @brief Replaces the byte sequence in the list.
*
* @param index The index of the sequence to replace.
@@ -97,50 +408,244 @@ bool ReSeqList::get(Index index, ReByteBuffer& value, Tag* tag) const{
* @param sourceLength The length of the new value.
* @param tag An additional info associated to the source.
*/
-void ReSeqList::set(Index index, const Byte* source, size_t sourceLength, Tag tag){
+void ReSeqList::set(Index index, const Byte* source,
+ size_t sourceLength, Tag tag){
if (index >= count())
add(index, source, sourceLength, tag);
else {
if (sourceLength == (size_t) -1)
sourceLength = strlen(source) + 1;
Sequence* seq = getInfo(index);
- seq->m_tag = tag;
- if (seq->m_length >= sourceLength){
+ size_t currentLength = getLength(seq);
+ size_t indexContent;
+ if (currentLength >= sourceLength){
// Use the existing space:
- memcpy(m_content.buffer() + seq->m_index, source, sourceLength);
- m_lost += seq->m_length - sourceLength;
+ indexContent = seq->m_index;
+ memcpy(m_content.buffer() + indexContent, source, sourceLength);
+ m_lost += currentLength - sourceLength;
} else {
// New space must be allocated:
- m_lost += seq->m_length;
- seq->m_index = m_content.length();
+ m_lost += currentLength;
+ indexContent = m_content.length();
m_content.append(source, sourceLength);
}
+ setSequence(seq, indexContent, sourceLength, tag);
}
}
-/** @brief Removes an element given by its index.
+/** @brief Sets the Sequence of an element.
*
- * @param index The index of the entry to remove.
+ * @param seq the target sequence
+ * @param index the index in m_content
+ * @param length the length of the content
+ * @param tag the tag of the element
*/
-void ReSeqList::remove(Index index){
- if (index <= count()){
- Sequence* seq = getInfo(index);
- // Is this the last entry in m_content?
- if (seq->m_index + seq->m_length >= m_content.length()){
- // We can free the content:
- m_content.setLength(seq->m_index);
- } else {
- m_lost += seq->m_length;
+void ReSeqList::setSequence(Sequence* seq, Index index, size_t length, Tag tag){
+ seq->m_index = index;
+ if (m_commonSize == INDIVIDUAL_SIZE){
+ uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfLength;
+ switch (m_sizeOfLength){
+ case 1:
+ ptr[0] = length & 0xff;
+ break;
+ case 2:
+ ptr[0] = length & 0xff;
+ ptr[1] = (length >> 8) & 0xff;
+ break;
+ case 3:
+ ptr[0] = length & 0xff;
+ ptr[1] = (length >> 8) & 0xff;
+ ptr[2] = (length >> 16) & 0xff;
+ break;
+ case 4:
+ ptr[0] = length & 0xff;
+ ptr[1] = (length >> 8) & 0xff;
+ ptr[2] = (length >> 16) & 0xff;
+ ptr[3] = (length >> 24) & 0xff;
+ break;
+ case 5:
+ ptr[0] = length & 0xff;
+ ptr[1] = (length >> 8) & 0xff;
+ ptr[2] = (length >> 16) & 0xff;
+ ptr[3] = (length >> 24) & 0xff;
+ ptr[4] = (length >> 32) & 0xff;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+ if (m_sizeOfTag > 0){
+ uint8_t* ptr = reinterpret_cast(seq) + m_offsetOfTag;
+ switch (m_sizeOfTag){
+ case 1:
+ ptr[0] = tag & 0xff;
+ break;
+ case 2:
+ ptr[0] = length & 0xff;
+ ptr[1] = (tag >> 8) & 0xff;
+ break;
+ case 3:
+ ptr[0] = tag & 0xff;
+ ptr[1] = (tag >> 8) & 0xff;
+ ptr[2] = (tag >> 16) & 0xff;
+ break;
+ case 4:
+ ptr[0] = tag & 0xff;
+ ptr[1] = (tag >> 8) & 0xff;
+ ptr[2] = (tag >> 16) & 0xff;
+ ptr[3] = (tag >> 24) & 0xff;
+ break;
+ case 5:
+ ptr[0] = tag & 0xff;
+ ptr[1] = (tag >> 8) & 0xff;
+ ptr[2] = (tag >> 16) & 0xff;
+ ptr[3] = (tag >> 24) & 0xff;
+ ptr[4] = (tag >> 32) & 0xff;
+ break;
+ case 8:
+ ptr[0] = tag & 0xff;
+ ptr[1] = (tag >> 8) & 0xff;
+ ptr[2] = (tag >> 16) & 0xff;
+ ptr[3] = (tag >> 24) & 0xff;
+ ptr[4] = (tag >> 32) & 0xff;
+ ptr[5] = (tag >> 40) & 0xff;
+ ptr[6] = (tag >> 48) & 0xff;
+ ptr[7] = (tag >> 56) & 0xff;
+ break;
+ default:
+ assert(false);
+ break;
}
- // Remove the entry from the list:
- m_list.remove(index * sizeof (Sequence), sizeof (Sequence));
}
}
-/** @brief Deletes all entries in the list.
+/** @brief Sets the switch for case sensitivity.
+ *
+ * @param onNotOff true
: the comparison will be case insensitive
*/
-void ReSeqList::clear(){
- m_content.setLength(0);
- m_list.setLength(0);
+void ReSeqList::setIgnoreCase(bool onNotOff){
+ if (m_ignoreCase != onNotOff){
+ m_ignoreCase = onNotOff;
+ if (m_sorted)
+ sort();
+ }
}
+/** @brief Sets the switch for sorting.
+ *
+ * @param onNotOff true
: the list will be sorted
+ */
+void ReSeqList::setSorted(bool onNotOff){
+ if (m_sorted != onNotOff){
+ m_sorted = onNotOff;
+ if (m_sorted)
+ sort();
+ }
+}
+
+/**
+ * Sets the length of the stored items tag and length (in Sequence).
+ *
+ * @param sizeOfTag 0: no tag stored.
+ * Otherwise: length in byte: 1,2,3,4,5 or 8
+ * @param sizeOfLength 0: no length stored (constant length, stored in
+ * m_commonSize).
+ * Otherwise: length in byte: 1, 2, 3, 4 or 5
+ * @param constantSize 0 or the size of the sequence if the length is not
+ * indiviually stored.
+ * If > 0 sizeOfLength
must be 0!
+ */
+void ReSeqList::setSizes(int sizeOfTag, int sizeOfLength, int constantSize){
+ switch(m_sizeOfLength = sizeOfLength){
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ break;
+ default:
+ globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_2,
+ i18n("Invalid length length: $1 (instead of 0,1,2,4,5)")).arg(sizeOfTag).end();
+ m_sizeOfLength = 8;
+ break;
+ }
+ switch(m_sizeOfTag = sizeOfTag){
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 8:
+ break;
+ default:
+ globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_1,
+ i18n("Invalid tag length: $1 (instead of 0,1,2,3,4,8)")).arg(sizeOfTag).end();
+ m_sizeOfTag = 8;
+ break;
+ }
+ m_offsetOfLength = sizeof(Index);
+ m_offsetOfTag = sizeof(Index) + m_sizeOfLength;
+ if (sizeOfLength > 0 && constantSize > 0){
+ globalLogger()->sayF(LOG_ERROR | CAT_LIB, LC_SET_SIZES_1,
+ i18n("collision of sizeOfLength $1 and constantSize $2"))
+ .arg(sizeOfLength).arg(constantSize).end();
+ constantSize = 0;
+ }
+ m_commonSize = constantSize;
+}
+
+/** @brief Sorts the list.
+ * Note: the comparison is controlled by m_ignoreCase
.
+ */
+void ReSeqList::sort(){
+ // Build the heap in array so that largest value is at the root:
+ int theCount = (int) count();
+ for (int start = (theCount - 2) / 2; start >= 0; start--) {
+ // (shift down the node at index 'start' to the proper place such
+ // that all nodes below the start index are in heap order)
+ shiftDown(start, theCount);
+ }
+ // The following loop maintains the invariants that array[0:end] is a heap
+ // and every element beyond end is greater than everything before it
+ // (so array[end:count] is in sorted order)
+ for (int end = theCount - 1; end > 0; end--) {
+ // swap [end] with [0]:
+ Sequence seq;
+ Byte* ptrEnd = m_list.buffer() + end * m_entrySize;
+ memcpy(&seq, ptrEnd, m_entrySize);
+ memcpy(ptrEnd, m_list.buffer(), m_entrySize);
+ memcpy(m_list.buffer(), &seq, m_entrySize);
+ shiftDown(0, end);
+ }
+}
+/**
+ * Correct the order in the heap.
+ *
+ * @param start the lower bound of the interval to inspect
+ * @param endEnd the upper bound of the interval to inspect
+ */
+void ReSeqList::shiftDown(int start, int end){
+ int root = start;
+ Sequence seq;
+ // while the root has at least one child:
+ while (root * 2 + 1 < end ) {
+ // left child:
+ int child = 2 * root + 1;
+ if (child + 1 < end && compare(child, child + 1) < 0)
+ child++;
+ if (compare(root, child) >= 0)
+ break;
+ else
+ {
+ // swap [child] with [root]
+ Byte* ptrRoot = m_list.buffer() + root * m_entrySize;
+ Byte* ptrChild = m_list.buffer() + child * m_entrySize;
+ memcpy(&seq, ptrRoot, m_entrySize);
+ memcpy(ptrRoot, ptrChild, m_entrySize);
+ memcpy(ptrChild, &seq, m_entrySize);
+ root = child;
+ }
+ }
+}
diff --git a/base/ReSeqList.hpp b/base/ReSeqList.hpp
index afc3351..3cdbd8f 100644
--- a/base/ReSeqList.hpp
+++ b/base/ReSeqList.hpp
@@ -8,18 +8,16 @@
#ifndef RESEQLIST_H_
#define RESEQLIST_H_
-/** @brief This class implements a dynamic (selfgrowing) array of byte sequences.
- * A byte sequence is an array of byte.
+#define INDIVIDUAL_SIZE (size_t(-1))
+/** @brief This class implements a dynamic (self growing) array of elements.
+ *
+ * An element is a tuple of a byte sequences and a tag.
+ * A tag is an integer value which is interpreted only by the user
+ * of the list. The list knows nothing abbout it.
+ *
+ * A byte sequence is an array of byte.
* The byte sequences may have different lengths.
* This implies the handling of C string arrays too.
- *
There is room for storing a so called tag with any sequence.
- * This can be used for any purpose.
- *
- * This is a very simple implementation: use it when:
- *
- The needed size is known nearly.
- * - The array elements increment rarely their length.
- *
- *
*/
class ReSeqList {
public:
@@ -28,7 +26,7 @@ public:
typedef int64_t Tag;
typedef struct {
Index m_index;
- size_t m_length;
+ uint64_t m_length;
Tag m_tag;
} Sequence;
public:
@@ -37,17 +35,39 @@ public:
ReSeqList(const ReSeqList& source);
ReSeqList& operator = (const ReSeqList& source);
public:
- void clear();
void add(Index index, const Byte* source, size_t sourceLength, Tag tag = 0);
- bool get(Index index, ReByteBuffer& value, Tag* tag = NULL) const;
- void remove(Index index);
- void set(Index index, const Byte* source, size_t sourceLength, Tag tag);
+ bool binarySearch(const Byte* toFind, int length, Index& index,
+ Tag* tag = NULL) const;
+ void clear();
+ int compare(Index index1, Index index2);
/** @brief Returns the count of defined entries in the list.
* @return The number of defined entries in the list (array).
*/
inline Index count() const {
- return m_list.length() / sizeof (Sequence);
+ return m_list.length() / m_entrySize;
}
+ void dump(FILE* fp) const;
+ Index find(const Byte* toFind, size_t length, Tag* tag = NULL) const;
+ bool get(Index index, ReByteBuffer& value, Tag* tag = NULL) const;
+ /** Returns whether the list is sorted automatically.
+ * Note: comparison is controlled by m_ignoreCase
.
+ * @return true
the byte sequences will be sorted
+ */
+ bool isSorted() const {
+ return m_sorted;
+ }
+ /** Returns whether the comparisons inside the list are case insensitive.
+ * @return true
the comparisons inside the list are case insensitive
+ */
+ bool ignoreCase() const {
+ return m_ignoreCase;
+ }
+ void remove(Index index);
+ void set(Index index, const Byte* source, size_t sourceLength, Tag tag);
+ void setSizes(int sizeOfTag, int sizeOfLength, int constantLength = 0);
+ void setSorted(bool onNotOff);
+ void setIgnoreCase(bool onNotOff);
+ void sort();
protected:
/** @brief Returns a pointer of the content buffer.
* @return A pointer of the first byte of the content buffer.
@@ -61,18 +81,37 @@ protected:
* @return The pointer of the entry.
*/
inline Sequence* getInfo(Index index) const {
- return &((Sequence*) m_list.buffer())[index];
+ return reinterpret_cast(m_list.buffer() + m_entrySize * index);
}
-
+ size_t getLength(const Sequence* seq) const;
+ size_t getLengthAndTag(const Sequence* seq, Tag& tag) const;
+ void setSequence(Sequence* seq, Index index, size_t length, Tag tag);
+ void shiftDown(int from, int to);
protected:
- // friend class ReSeqList;
- //@ Contains the sequences itself.
- ReByteBuffer m_content;
- //@ Contains an array of Sequence
s.
- ReByteBuffer m_list;
- //@ If strings have been replaced the space in m_content is still allocated.
- //@ This is the sum of lost space.
+ //@ Contains the sequences itself.
+ ReByteBuffer m_content;
+ //@ Contains an array of Sequence
s.
+ ReByteBuffer m_list;
+ //@ If strings have been replaced/deleted the space in m_content is still allocated.
+ //@ This is the sum of lost space.
size_t m_lost;
+ //@ The length of a Sequence block: sizeof(seq->m_index)+m_lengthTag+m_lengthLength
+ size_t m_entrySize;
+ //@ -1: The Sequence block contains the individual length.
+ //@ 0: no content will be stored
+ //@ Otherwise: the common length of all elements in m_content
+ size_t m_commonSize;
+ //@ length of the tag: 0,1,2,4,8
+ size_t m_sizeOfTag;
+ //@ length of the length element in Sequence: 0,1,2,4
+ size_t m_sizeOfLength;
+ //@ offset of the tag (from start of Sequence)
+ size_t m_offsetOfTag;
+ //@ offset of the length (from start of Sequence)
+ size_t m_offsetOfLength;
+ bool m_sorted;
+ //@ true: the comparison will be case insensitive
+ bool m_ignoreCase;
};
#endif /* RESEQLIST_H_ */
diff --git a/base/ReStringList.cpp b/base/ReStringList.cpp
index 7c28aa0..71fb641 100644
--- a/base/ReStringList.cpp
+++ b/base/ReStringList.cpp
@@ -49,7 +49,7 @@ ReStringList& ReStringList::append(const ReByteBuffer& source, Tag tagOf){
* @param source The new stringlist.
* @return the instance itself (for chaining)
*/
-ReStringList& ReStringList::append(ReStringList& source){
+ReStringList& ReStringList::append(const ReStringList& source){
for (size_t ii = 0; ii < source.count(); ii++)
add(-1, source.strOf(ii), source.sizeOf(ii), source.tagOf(ii));
return *this;
@@ -72,7 +72,7 @@ void ReStringList::insert(Index index, const char* source, Tag tagOf){
* @param tagOf The tagOf of the replace element.
*/
void ReStringList::replace(Index index, const char* source, Tag tagOf){
- set(index, source, strlen(source) + 1, tagOf);
+ set(index, source, strlen(source), tagOf);
}
/** @brief Replaces a string in the internal array.
*
@@ -84,7 +84,7 @@ void ReStringList::replace(Index index, const char* source, Tag tagOf){
void ReStringList::replaceString(Index index, const char* source){
if (index < count()){
Sequence* seq = getInfo(index);
- set(index, source, strlen(source) + 1, seq->m_tag);
+ set(index, source, strlen(source), seq->m_tag);
}
}
/** @brief Replaces a tagOf in the internal array.
diff --git a/base/ReStringList.hpp b/base/ReStringList.hpp
index f68d622..a12b2db 100644
--- a/base/ReStringList.hpp
+++ b/base/ReStringList.hpp
@@ -35,7 +35,7 @@ public:
public:
ReStringList& append(const char* source, Tag tag = 0);
ReStringList& append(const ReByteBuffer& source, Tag tag = 0);
- ReStringList& append(ReStringList& source);
+ ReStringList& append(const ReStringList& source);
bool equal(const ReStringList& toCompare) const;
int firstDiff(const ReStringList& toCompare) const;
Index indexOf(const char* toFind, bool ignoreCase = false, Index start = 0) const;
diff --git a/base/ReTestUnit.cpp b/base/ReTestUnit.cpp
index 0c4180d..7d23938 100644
--- a/base/ReTestUnit.cpp
+++ b/base/ReTestUnit.cpp
@@ -122,6 +122,21 @@ void ReTestUnit::assertEqual(const char* expected, const char* current, int line
expected, current == NULL ? "" : current);
}
}
+
+/** @brief Compares two string values. If not equal this will be logged.
+ *
+ * @param expected the expected value
+ * @param current the current value
+ * @param lineNo the line number of the test (for the error message)
+ */
+void ReTestUnit::assertEqualIgnoreCase(const char* expected, const char* current, int lineNo){
+ if (current == NULL || _stricmp(expected, current) != 0){
+ logF(true, i18n("%s-%d: expected / current: length: %d / %d\n%.512s\n%.512s"),
+ m_sourceFile.str(), lineNo, strlen(expected), current == NULL ? 0 : strlen(current),
+ expected, current == NULL ? "" : current);
+ }
+}
+
/** @brief Checks whether a file exists. If not this will be logged.
*
* @param name the filename
diff --git a/base/ReTestUnit.hpp b/base/ReTestUnit.hpp
index eef2520..2b1d187 100644
--- a/base/ReTestUnit.hpp
+++ b/base/ReTestUnit.hpp
@@ -23,6 +23,7 @@ public:
void assertEqual(int expected, int current, int lineNo);
void assertEqual(unsigned int expected, unsigned int current, int lineNo);
void assertEqual(const char* expected, const char* current, int lineNo);
+ void assertEqualIgnoreCase(const char* expected, const char* current, int lineNo);
void assertEqualFiles(const char* name1, const char* name2, int lineNo);
void assertFileExists(const char* name, int lineNo);
void assertFalse(bool conditon, int lineNo);
@@ -48,6 +49,7 @@ protected:
#define checkN(ptr) assertNull((void*) ptr, __LINE__)
#define checkNN(ptr) assertNotNull((void*) ptr, __LINE__)
#define checkEqu(exp, cur) assertEqual(exp, cur, __LINE__)
+#define checkIEqu(exp, cur) assertEqualIgnoreCase(exp, cur, __LINE__)
#define checkFileExists(fn) assertFileExists(fn, __LINE__)
#define checkDirExists(fn) assertDirExists(fn, __LINE__)
#define checkFileEqu(f1, f2) assertEqualFiles(f1, f2, __LINE__)
diff --git a/base/baselocations.hpp b/base/baselocations.hpp
index 83b39ad..b96a8a6 100644
--- a/base/baselocations.hpp
+++ b/base/baselocations.hpp
@@ -13,6 +13,7 @@
enum RELOC_LIB {
LC_CONFIGFILE = 50000,
LC_DIRTOOLS = 50100,
+ LC_SEQLIST = 50200,
};
enum RELOC_UDPCONNECTION {
LC_UDPCONNECTION_CONSTRUCT = 50101,
diff --git a/cunit/cuReCString.cpp b/cunit/cuReCString.cpp
index ac4da6d..931a2c4 100644
--- a/cunit/cuReCString.cpp
+++ b/cunit/cuReCString.cpp
@@ -19,7 +19,6 @@ private:
}
void testReplaceSubstring(){
char buffer[256];
-#if 0
strcpy(buffer, "12.ab");
replaceSubstring(buffer, sizeof buffer, 0, "xy");
checkEqu("xy12.ab", buffer);
@@ -31,10 +30,8 @@ private:
replaceSubstring(buffer, 5, 0, "x");
replaceSubstring(buffer, 6, 0, "x");
checkEqu("xw.ab", buffer);
-#endif
}
void testMemicmp(){
-#if 0
checkEqu(0, _memicmp((void*) "abcd", (void*) "abcx", 3u));
checkEqu(0, _memicmp("aBcd", "AbCx", 3));
@@ -46,7 +43,6 @@ private:
checkEqu(1, _memicmp("bbcd", "abcx", 3));
checkEqu(-1, _memicmp("ABcd", "bbcx", 3));
-#endif
}
};
extern void testReCString(void);
diff --git a/cunit/cuReSeqList.cpp b/cunit/cuReSeqList.cpp
index e09f42c..2397b9d 100644
--- a/cunit/cuReSeqList.cpp
+++ b/cunit/cuReSeqList.cpp
@@ -7,27 +7,140 @@ public:
}
private:
void run(){
+ testSorted();
+ testIgnoreCase();
+ testFind();
+ testAdd();
testBase();
testRemove();
}
+ void checkElement(ReSeqList& list, ReSeqList::Index index, const char* value, ReSeqList::Tag expectedTag){
+ ReByteBuffer buffer;
+ ReSeqList::Tag tag;
+ checkT(list.get(index, buffer, &tag));
+ checkIEqu(value, buffer.str());
+ checkEqu(expectedTag, tag);
+ }
+ void testSorted(){
+ ReSeqList list;
+ list.setIgnoreCase(true);
+ ReByteBuffer value, expectedValue;
+ ReSeqList::Tag tag = 0;
+ ReSeqList::Tag expectedTag = 0;
+ list.add(-1, "bcd", -1, 200);
+ list.add(-1, "abcd", -1, 100);
+ list.add(-1, "abc", -1, 300);
+ list.add(-1, "abc", -1, 350);
+ list.add(-1, "cde", -1, 400);
+ log(false, "unsorted:");
+ list.dump(stdout);
+ list.setSorted(true);
+ log(false, "sorted:");
+ list.dump(stdout);
+ checkElement(list, 0U, "AbC", 350);
+ checkElement(list, 1U, "AbC", 300);
+ checkElement(list, 2U, "AbCd", 100);
+ checkElement(list, 3U, "bCd", 200);
+ checkElement(list, 4U, "cde", 400);
+ ReSeqList::Index index;
+ checkT(list.binarySearch("Abc", -1, index, &tag));
+ checkEqu(0u, index);
+ checkT(list.binarySearch("AbcD", -1, index, &tag));
+ checkEqu(2u, index);
+ checkT(list.binarySearch("bCd", -1, index, &tag));
+ checkEqu(3u, index);
+ checkT(list.binarySearch("cde", -1, index, &tag));
+ checkEqu(4u, index);
+ }
+ void testIgnoreCase(){
+ ReSeqList list;
+ list.setIgnoreCase(true);
+ ReByteBuffer value, expectedValue;
+ ReSeqList::Tag tag = 0;
+ ReSeqList::Tag expectedTag = 0;
+ list.add(-1, "bcd", -1, 200);
+ list.add(0, "abc", -1, 100);
+ list.dump(stdout);
+ checkEqu(0U, list.find("AbC", -1));
+ checkEqu(1U, list.find("BCD", -1));
+ }
+ void testAdd(){
+ ReSeqList list;
+ ReByteBuffer value, expectedValue;
+ ReSeqList::Tag tag = 0;
+ ReSeqList::Tag expectedTag = 0;
+ size_t maxIx = 64;
+ for (size_t ix = 0; ix < maxIx; ix++){
+ expectedTag = (1 << ix);
+ expectedValue.append("x", 1);
+ list.add(-1, expectedValue.str(), -1, expectedTag);
+ checkEqu(ix + 1, list.count());
+ checkT(list.get(ix, value, &tag));
+ checkEqu(expectedValue.str(), value.str());
+ if (expectedTag != tag)
+ checkEqu(expectedTag, tag);
+ }
+ expectedValue.setLength(0);
+ for (size_t ix = 0; ix < maxIx; ix++){
+ expectedTag = (1 << ix);
+ expectedValue.append("x", 1);
+ checkT(list.get(ix, value, &tag));
+ checkEqu(expectedValue.str(), value.str());
+ checkEqu(expectedTag, tag);
+ }
+
+ }
+ void testFind(){
+ ReSeqList list;
+ ReByteBuffer value, expectedValue;
+ ReSeqList::Tag tag = 0;
+ ReSeqList::Tag expectedTag = 0;
+ size_t maxIx = 13;
+ for (size_t ix = 0; ix < maxIx; ix++){
+ expectedTag = 10*ix;
+ expectedValue.append("x", 1);
+ list.add(-1, expectedValue.str(), -1, expectedTag);
+ checkEqu(ix + 1, list.count());
+ checkT(list.get(ix, value, &tag));
+ checkEqu(expectedValue.str(), value.str());
+ if (expectedTag != tag)
+ checkEqu(expectedTag, tag);
+ }
+ list.dump(stdout);
+ expectedValue.setLength(0);
+ for (size_t ix = 0; ix < maxIx; ix++){
+ expectedTag = -1;
+ expectedValue.append("x", 1);
+ if (ix >= 4)
+ expectedTag = -1;
+ checkEqu(ix, list.find(expectedValue.str(), expectedValue.length(), &expectedTag));
+ checkEqu(10*ix, expectedTag);
+ }
+ }
void testBase(){
ReSeqList list;
ReByteBuffer value;
ReSeqList::Tag tag = 0;
- list.add(-1, "123", -1, 100);
+ list.add(-1, "xxx", -1, 1llu << 31);
+ checkT(list.get(0, value, &tag));
+ checkEqu(1ll << 31, tag);
+ checkEqu("xxx", value.str());
+
+ list.clear();
+ list.add(-1, "123", -1, 0x3344556633445566ll);
checkEqu(1u, list.count());
checkT(list.get(0, value, &tag));
checkEqu("123", value.str());
- checkEqu((int64_t) 100, tag);
+ checkEqu((int64_t) 0x3344556633445566ll, tag);
- list.add(-1, "ab", -1, 200);
+ list.add(-1, "ab", -1, 0x345678abcdef3344ll);
checkEqu(2u, list.count());
checkT(list.get(0, value));
checkEqu("123", value.str());
checkT(list.get(1, value, &tag));
checkEqu("ab", value.str());
- checkEqu(200ll, tag);
+ checkEqu(0x345678abcdef3344ll, tag);
list.add(0, "xyz", -1, 300);
checkEqu(3u, list.count());
diff --git a/cunit/cuReStringList.cpp b/cunit/cuReStringList.cpp
index d38920f..a4baf00 100644
--- a/cunit/cuReStringList.cpp
+++ b/cunit/cuReStringList.cpp
@@ -7,14 +7,12 @@ public:
}
private:
void run(){
-#if 0
testAppendByteBuffer();
testBase();
testReplace();
testJoin();
testEqu();
testFile();
-#endif
}
void testAppendByteBuffer(){
ReStringList list;
diff --git a/cunit/cuReTraverser.cpp b/cunit/cuReTraverser.cpp
index de0ccab..9314d9f 100644
--- a/cunit/cuReTraverser.cpp
+++ b/cunit/cuReTraverser.cpp
@@ -132,7 +132,7 @@ private:
"-D5", "-d1", "-z1k", "-Z2M", "-p;*;-*~"
};
ReDirEntryFilter_t filter;
- opts.programArgs().init(sizeof argv / sizeof argv[0], argv);
+ opts.programArgsChangeable().init(sizeof argv / sizeof argv[0], argv);
opts.setFilterFromProgramArgs(filter);
// local time: +3600
const int DIFF = 3600;
diff --git a/cunit/testall.cpp b/cunit/testall.cpp
index ab66b06..f2921f4 100644
--- a/cunit/testall.cpp
+++ b/cunit/testall.cpp
@@ -11,8 +11,12 @@
#endif
void testBase(){
+ extern void testReHashList(void);
+ //testReHashList();
+ extern void testReSeqList();
+ testReSeqList();
extern void testReTestUnit();
- //testReTestUnit();
+ testReTestUnit();
extern void testReByteBuffer();
testReByteBuffer();
extern void testReSeqList(void);
@@ -59,9 +63,9 @@ void testOs(){
void testAll(){
try
{
+ testBase();
testString();
testOs();
- testBase();
} catch (ReException e){
fprintf(stderr, "testBase.cpp: unexpected exception: %s\n", e.getMessage());
}
diff --git a/os/ReDirTools.cpp b/os/ReDirTools.cpp
index f364a31..db2032e 100644
--- a/os/ReDirTools.cpp
+++ b/os/ReDirTools.cpp
@@ -106,14 +106,16 @@ ReDirOptions::ReDirOptions(const char* usage[], const char* examples[]) :
m_nodePatterns(),
m_pathPatterns(),
m_compoundUsage(NULL),
- m_countCompoundUsage(0)
+ m_countCompoundUsage(0),
+ m_output(stdout)
{
}
/**
* Destructor.
*/
ReDirOptions::~ReDirOptions(){
- delete[] m_compoundUsage;
+ close();
+ delete[] m_compoundUsage;
}
/**
* Initializes the compound usage message array.
@@ -150,6 +152,7 @@ void ReDirOptions::addCompoundUsage(const char** usage){
* Adds the standard filter options.
*/
void ReDirOptions::addStandardFilterOptions(){
+ // standard short options: D d O o P p q T t y Z z
m_programArgs.addInt("maxdepth",
i18n("the depth of the subdirectory is lower or equal \n"
"0: search is done only in the base directory"),
@@ -158,7 +161,11 @@ void ReDirOptions::addStandardFilterOptions(){
i18n("the depth of the subdirectory is greater or equal \n"
"0: search is done in all subdirectories"),
'd', "min-depth", 512);
- m_programArgs.addString("older",
+ m_programArgs.addString("output",
+ i18n("the name of the output file.\n"
+ "The output will written to this file instead of stdout"),
+ 'O', "output-file", false, NULL);
+ m_programArgs.addString("older",
i18n("the modification date is older than \n"
" is a date (e.g. 2015.02.17) or number followed by an unit\n"
"units: m(inutes) h(hours), d(days). Default: m(inutes)\n"
@@ -172,7 +179,7 @@ void ReDirOptions::addStandardFilterOptions(){
"A directory will be entered if at least one of the positive patterns\n"
"and none of the 'not patterns' matches\n"
"examples: ';*;-*/.git/' ',*/cache/,*/temp/"),
- 'P', "path-pattern", false, NULL);
+ 'P', "path-pattern", false, NULL);
m_programArgs.addString("nodepattern",
i18n("a list of patterns for the basename (name without path) separated by ';'\n"
"Each pattern can contain '*' as wildcard\n"
@@ -442,6 +449,12 @@ void ReDirOptions::setFilterFromProgramArgs(ReDirEntryFilter_t& filter){
filter.m_pathPatterns = &m_pathPatterns;
}
filter.m_traceInterval = m_programArgs.getInt("trace");
+ if (m_programArgs.getString("output", buffer)[0] != '\0'){
+ if ( (m_output = fopen(buffer.str(), "w")) == NULL){
+ help("cannot open output file", buffer.str());
+ m_output = stdout;
+ }
+ }
}
/**
* Prints a help message, the error message and exits.
@@ -450,7 +463,7 @@ void ReDirOptions::setFilterFromProgramArgs(ReDirEntryFilter_t& filter){
* @param message2 an additional message
*/
-void ReDirOptions::help(const char* errorMessage, const char* message2 ){
+void ReDirOptions::help(const char* errorMessage, const char* message2) const{
ReByteBuffer msg;
if (errorMessage != 0)
msg.append(errorMessage, -1);
@@ -475,6 +488,13 @@ void ReDirOptions::checkStandardFilterOptions(){
checkPatternList(m_programArgs.getString("pathpattern", buffer));
}
+void ReDirOptions::close(){
+ if (m_output != stdout){
+ fclose(m_output);
+ m_output = stdout;
+ }
+}
+
/**
* Constructor.
*/
@@ -584,14 +604,14 @@ void ReDirStatistic::run(int argc, const char* argv[]){
ReByteBuffer buffer;
for (size_t ix = 0; ix < list.count(); ix++){
buffer.set(list.strOf(ix), list.strLengthOf(ix));
- printf("%s\n", buffer.str());
+ fprintf(m_output, "%s\n", buffer.str());
}
if (! m_programArgs.getBool("quiet")){
int duration = int(time(NULL) - start);
- printf("Duration: ");
+ fprintf(m_output, "Duration: ");
if (duration >= 3600)
- printf("%d:", duration / 3600);
- printf("%02d:%02d\n", duration % 3600 / 60, duration % 60);
+ fprintf(m_output, "%d:", duration / 3600);
+ fprintf(m_output, "%02d:%02d\n", duration % 3600 / 60, duration % 60);
}
} catch (ReOptionException& exc) {
m_programArgs.help(exc.getMessage(), false, stdout);
@@ -624,8 +644,6 @@ const ReStringList& ReDirStatistic::calculate(const char* base, int level,
while( (entry = traverser.rawNextFile(currentDepth))){
//@todo
ReByteBuffer dummy(entry->m_path); dummy.append(entry->node());
- int passNo = entry->m_passNo;
- const char* dummyStr = dummy.str();
if (currentDepth <= level && ! entry->m_path.equals(current->m_path)){
if (currentDepth <= topOfStack){
// close the entries of the stack above the new top level:
@@ -812,9 +830,9 @@ void ReDirList::list(int argc, const char* argv[]){
}
if (! printOneFile(entry)){
if (shortOutput)
- printf("%s%s\n", entry->m_path.str(), entry->node());
+ fprintf(m_output, "%s%s\n", entry->m_path.str(), entry->node());
else
- printf("%s %12.6f %s %02x %s%s\n",
+ fprintf(m_output, "%s %12.6f %s %02x %s%s\n",
entry->rightsAsString(bufferRights),
entry->fileSize() / 1E6,
entry->filetimeAsString(bufferTime),
@@ -825,7 +843,7 @@ void ReDirList::list(int argc, const char* argv[]){
}
if (verbose){
int duration = int(time(NULL) - start);
- printf ("+++ %d dirs and %d file(s) with %.6f MByte in %02d:%02d sec\n",
+ fprintf(m_output, "+++ %d dirs and %d file(s) with %.6f MByte in %02d:%02d sec\n",
dirs, files, sumSizes / 1E6, duration / 60, duration % 60);
}
} catch(ReOptionException& exc){
@@ -839,6 +857,7 @@ void ReDirList::list(int argc, const char* argv[]){
ReDirBatch::ReDirBatch() :
ReDirOptions(s_batchUsage, s_batchExamples)
{
+ // standard short options: D d O o P p q T t y Z z
m_programArgs.addString("first",
i18n("defines the first line of the output"),
'1', "first-line", true,
@@ -972,7 +991,7 @@ void ReDirBatch::createBatch(int argc, const char* argv[]){
} else {
replaceMakros(arguments.str(), entry, delim, line);
}
- printf("%s\n", line.str());
+ fprintf(m_output, "%s\n", line.str());
}
}
if (verbose){
@@ -982,7 +1001,7 @@ void ReDirBatch::createBatch(int argc, const char* argv[]){
#elif defined __WIN32__
const char* prefix = "rem";
#endif
- printf ("%s %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n",
+ fprintf(m_output, "%s %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n",
prefix, dirs, files, sumSizes / 1E6, duration / 60, duration % 60);
}
} catch(ReOptionException& exc){
@@ -1012,25 +1031,26 @@ ReDirSync::ReDirSync() :
ReDirOptions(s_syncUsage, s_syncExamples),
m_buffer()
{
+ // standard short options: D d O o P p q T t y Z z
m_buffer.ensureSize(4u*1024u*1024u);
m_programArgs.addBool("add",
i18n("copies only files which does not exist on the target"),
'a', "add", false);
m_programArgs.addBool("dry",
i18n("does nothing, but says what should be done"),
- 'd', "dry", false);
+ 'Y', "dry", false);
m_programArgs.addInt("timediff",
i18n("filetime difference is considered to be equal\n"
"if the difference is less than this value (in seconds)"),
- 'D', "time-delta", 2);
+ 'I', "time-delta", 2);
m_programArgs.addBool("ignoredate",
- i18n("the modification is recognized only by the different size (not time)"),
+ i18n("the modification is recognized only by the different size (not time)"),
'i', "ignore-time", false);
m_programArgs.addBool("chatter",
- i18n("comments the action of each file"),
+ i18n("comments the action of each file"),
'h', "chatter", false);
m_programArgs.addBool("mustexist",
- i18n("files which don't exist on the target will not be copied"),
+ i18n("files which don't exist on the target will not be copied"),
'm', "must-exist", false);
addStandardFilterOptions();
}
@@ -1093,7 +1113,7 @@ bool ReDirSync::copyFile(const char* source, time_t modified, int64_t size,
fseek(fpTarget, 0, SEEK_SET);
while(size > 0){
size_t blockSize = buffer.capacity();
- if (blockSize > size)
+ if ((int) blockSize > size)
blockSize = size;
if (fread(buffer.buffer(), blockSize, 1, fpSource) != 1){
if (logger != NULL)
@@ -1194,23 +1214,23 @@ void ReDirSync::synchronize(int argc, const char* argv[]){
bool exists = stat(targetFile.str(), &info) == 0;
if (! exists && ! mustExist){
if (chatterMode)
- printf("-ignored: %s does not exist\n", targetRelativePath);
+ fprintf(m_output, "-ignored: %s does not exist\n", targetRelativePath);
continue;
}
if (exists && addOnly){
if (chatterMode)
- printf("~ignored: %s exists\n", targetRelativePath);
+ fprintf(m_output, "~ignored: %s exists\n", targetRelativePath);
continue;
}
if (ignoreDate && entry->fileSize() == info.st_size){
if (chatterMode)
- printf("_ignored: %s same size\n", targetRelativePath);
+ fprintf(m_output, "_ignored: %s same size\n", targetRelativePath);
continue;
}
if (! ignoreDate && entry->filetimeToTime(entry->modified()) - info.st_mtime
> maxFileTimeDiff) {
if (chatterMode)
- printf("=ignored: %s same time\n", targetRelativePath);
+ fprintf(m_output, "=ignored: %s same time\n", targetRelativePath);
continue;
} else {
if (entry->isDirectory())
@@ -1220,7 +1240,7 @@ void ReDirSync::synchronize(int argc, const char* argv[]){
sumSizes += entry->fileSize();
}
if (verbose || chatterMode || dry)
- printf("%c%s%s\n", exists ? '!' : '+', targetRelativePath,
+ fprintf(m_output, "%c%s%s\n", exists ? '!' : '+', targetRelativePath,
dry ? " would be copied" : "");
if (! dry)
copyFile(entry, target.str());
@@ -1229,7 +1249,7 @@ void ReDirSync::synchronize(int argc, const char* argv[]){
}
if (verbose){
int duration = int(time(NULL) - start);
- printf ("=== %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n",
+ fprintf(m_output, "=== %d dir(s) and %d file(s) with %.6f MByte in %02d:%02d sec\n",
dirs, files, sumSizes / 1E6, duration / 60, duration % 60);
}
} catch(ReOptionException& exc){
diff --git a/os/ReDirTools.hpp b/os/ReDirTools.hpp
index 6b6fe66..12b571a 100644
--- a/os/ReDirTools.hpp
+++ b/os/ReDirTools.hpp
@@ -13,26 +13,39 @@ public:
ReDirOptions(const char* usage[], const char* example[]);
~ReDirOptions();
public:
+ void addCompoundUsage(const char** usage);
void addStandardFilterOptions();
+ time_t checkDate(const char* value);
+ const char* checkPatternList(const char* value);
+ int64_t checkSize(const char* value);
void checkStandardFilterOptions();
- void initCompoundUsage(size_t size);
- void addCompoundUsage(const char** usage);
+ void close();
+ ReDirStatus_t::Type_t checkType(const char* value);
+ /** Returns the compound usage message.
+ * @return the compound usage message
+ */
const char** compoundUsage() const
{ return m_compoundUsage; }
- ReProgramArgs& programArgs()
+ void initCompoundUsage(size_t size);
+ void help(const char* errorMessage, const char* message2 = NULL) const;
+ /** Returns the program arguments.
+ * @return the program arguments
+ */
+ const ReProgramArgs& programArgs() const
+ { return m_programArgs; }
+ /** Returns the program arguments.
+ * @return the program arguments
+ */
+ ReProgramArgs& programArgsChangeable()
{ return m_programArgs; }
- time_t checkDate(const char* value);
- int64_t checkSize(const char* value);
- ReDirStatus_t::Type_t checkType(const char* value);
- const char* checkPatternList(const char* value);
void setFilterFromProgramArgs(ReDirEntryFilter_t& filter);
- void help(const char* errorMessage, const char* message2 = NULL);
protected:
ReProgramArgs m_programArgs;
RePatternList m_nodePatterns;
RePatternList m_pathPatterns;
const char** m_compoundUsage;
int m_countCompoundUsage;
+ FILE* m_output;
};
class ReDirBatch : public ReDirOptions {
--
2.39.5