2020-08-11 09:10:23 +00:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 2010-2012, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: stringtriebuilder.cpp
|
|
|
|
* encoding: UTF-8
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2010dec24
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "utypeinfo.h" // for 'typeid' to work
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/stringtriebuilder.h"
|
|
|
|
#include "uassert.h"
|
|
|
|
#include "uhash.h"
|
|
|
|
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
|
|
|
|
static int32_t U_CALLCONV
|
|
|
|
hashStringTrieNode(const UHashTok key) {
|
|
|
|
return icu::StringTrieBuilder::hashNode(key.pointer);
|
|
|
|
}
|
|
|
|
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
|
|
|
|
return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CDECL_END
|
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2023-05-23 00:05:01 +00:00
|
|
|
StringTrieBuilder::StringTrieBuilder() : nodes(nullptr) {}
|
2020-08-11 09:10:23 +00:00
|
|
|
|
|
|
|
StringTrieBuilder::~StringTrieBuilder() {
|
|
|
|
deleteCompactBuilder();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
return;
|
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, nullptr,
|
2020-08-11 09:10:23 +00:00
|
|
|
sizeGuess, &errorCode);
|
|
|
|
if(U_SUCCESS(errorCode)) {
|
2023-05-23 00:05:01 +00:00
|
|
|
if(nodes==nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
} else {
|
|
|
|
uhash_setKeyDeleter(nodes, uprv_deleteUObject);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::deleteCompactBuilder() {
|
|
|
|
uhash_close(nodes);
|
2023-05-23 00:05:01 +00:00
|
|
|
nodes=nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
|
|
|
|
UErrorCode &errorCode) {
|
|
|
|
if(buildOption==USTRINGTRIE_BUILD_FAST) {
|
|
|
|
writeNode(0, elementsLength, 0);
|
|
|
|
} else /* USTRINGTRIE_BUILD_SMALL */ {
|
|
|
|
createCompactBuilder(2*elementsLength, errorCode);
|
|
|
|
Node *root=makeNode(0, elementsLength, 0, errorCode);
|
|
|
|
if(U_SUCCESS(errorCode)) {
|
|
|
|
root->markRightEdgesFirst(-1);
|
|
|
|
root->write(*this);
|
|
|
|
}
|
|
|
|
deleteCompactBuilder();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Requires start<limit,
|
|
|
|
// and all strings of the [start..limit[ elements must be sorted and
|
|
|
|
// have a common prefix of length unitIndex.
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
|
2022-10-28 06:11:55 +00:00
|
|
|
UBool hasValue=false;
|
2020-08-11 09:10:23 +00:00
|
|
|
int32_t value=0;
|
|
|
|
int32_t type;
|
|
|
|
if(unitIndex==getElementStringLength(start)) {
|
|
|
|
// An intermediate or final value.
|
|
|
|
value=getElementValue(start++);
|
|
|
|
if(start==limit) {
|
2022-10-28 06:11:55 +00:00
|
|
|
return writeValueAndFinal(value, true); // final-value node
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2022-10-28 06:11:55 +00:00
|
|
|
hasValue=true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
// Now all [start..limit[ strings are longer than unitIndex.
|
|
|
|
int32_t minUnit=getElementUnit(start, unitIndex);
|
|
|
|
int32_t maxUnit=getElementUnit(limit-1, unitIndex);
|
|
|
|
if(minUnit==maxUnit) {
|
|
|
|
// Linear-match node: All strings have the same character at unitIndex.
|
|
|
|
int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
|
|
|
|
writeNode(start, limit, lastUnitIndex);
|
|
|
|
// Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
|
|
|
|
int32_t length=lastUnitIndex-unitIndex;
|
|
|
|
int32_t maxLinearMatchLength=getMaxLinearMatchLength();
|
|
|
|
while(length>maxLinearMatchLength) {
|
|
|
|
lastUnitIndex-=maxLinearMatchLength;
|
|
|
|
length-=maxLinearMatchLength;
|
|
|
|
writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
|
|
|
|
write(getMinLinearMatch()+maxLinearMatchLength-1);
|
|
|
|
}
|
|
|
|
writeElementUnits(start, unitIndex, length);
|
|
|
|
type=getMinLinearMatch()+length-1;
|
|
|
|
} else {
|
|
|
|
// Branch node.
|
|
|
|
int32_t length=countElementUnits(start, limit, unitIndex);
|
|
|
|
// length>=2 because minUnit!=maxUnit.
|
|
|
|
writeBranchSubNode(start, limit, unitIndex, length);
|
|
|
|
if(--length<getMinLinearMatch()) {
|
|
|
|
type=length;
|
|
|
|
} else {
|
|
|
|
write(length);
|
|
|
|
type=0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return writeValueAndType(hasValue, value, type);
|
|
|
|
}
|
|
|
|
|
|
|
|
// start<limit && all strings longer than unitIndex &&
|
|
|
|
// length different units at unitIndex
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
|
2023-05-23 00:05:01 +00:00
|
|
|
char16_t middleUnits[kMaxSplitBranchLevels];
|
2020-08-11 09:10:23 +00:00
|
|
|
int32_t lessThan[kMaxSplitBranchLevels];
|
|
|
|
int32_t ltLength=0;
|
|
|
|
while(length>getMaxBranchLinearSubNodeLength()) {
|
|
|
|
// Branch on the middle unit.
|
|
|
|
// First, find the middle unit.
|
|
|
|
int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
|
|
|
|
// Encode the less-than branch first.
|
|
|
|
middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
|
|
|
|
lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
|
|
|
|
++ltLength;
|
|
|
|
// Continue for the greater-or-equal branch.
|
|
|
|
start=i;
|
|
|
|
length=length-length/2;
|
|
|
|
}
|
|
|
|
// For each unit, find its elements array start and whether it has a final value.
|
|
|
|
int32_t starts[kMaxBranchLinearSubNodeLength];
|
|
|
|
UBool isFinal[kMaxBranchLinearSubNodeLength-1];
|
|
|
|
int32_t unitNumber=0;
|
|
|
|
do {
|
|
|
|
int32_t i=starts[unitNumber]=start;
|
2023-05-23 00:05:01 +00:00
|
|
|
char16_t unit=getElementUnit(i++, unitIndex);
|
2020-08-11 09:10:23 +00:00
|
|
|
i=indexOfElementWithNextUnit(i, unitIndex, unit);
|
|
|
|
isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
|
|
|
|
start=i;
|
|
|
|
} while(++unitNumber<length-1);
|
|
|
|
// unitNumber==length-1, and the maxUnit elements range is [start..limit[
|
|
|
|
starts[unitNumber]=start;
|
|
|
|
|
|
|
|
// Write the sub-nodes in reverse order: The jump lengths are deltas from
|
|
|
|
// after their own positions, so if we wrote the minUnit sub-node first,
|
|
|
|
// then its jump delta would be larger.
|
|
|
|
// Instead we write the minUnit sub-node last, for a shorter delta.
|
|
|
|
int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
|
|
|
|
do {
|
|
|
|
--unitNumber;
|
|
|
|
if(!isFinal[unitNumber]) {
|
|
|
|
jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
|
|
|
|
}
|
|
|
|
} while(unitNumber>0);
|
|
|
|
// The maxUnit sub-node is written as the very last one because we do
|
|
|
|
// not jump for it at all.
|
|
|
|
unitNumber=length-1;
|
|
|
|
writeNode(start, limit, unitIndex+1);
|
|
|
|
int32_t offset=write(getElementUnit(start, unitIndex));
|
|
|
|
// Write the rest of this node's unit-value pairs.
|
|
|
|
while(--unitNumber>=0) {
|
|
|
|
start=starts[unitNumber];
|
|
|
|
int32_t value;
|
|
|
|
if(isFinal[unitNumber]) {
|
|
|
|
// Write the final value for the one string ending with this unit.
|
|
|
|
value=getElementValue(start);
|
|
|
|
} else {
|
|
|
|
// Write the delta to the start position of the sub-node.
|
|
|
|
value=offset-jumpTargets[unitNumber];
|
|
|
|
}
|
|
|
|
writeValueAndFinal(value, isFinal[unitNumber]);
|
|
|
|
offset=write(getElementUnit(start, unitIndex));
|
|
|
|
}
|
|
|
|
// Write the split-branch nodes.
|
|
|
|
while(ltLength>0) {
|
|
|
|
--ltLength;
|
|
|
|
writeDeltaTo(lessThan[ltLength]);
|
|
|
|
offset=write(middleUnits[ltLength]);
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Requires start<limit,
|
|
|
|
// and all strings of the [start..limit[ elements must be sorted and
|
|
|
|
// have a common prefix of length unitIndex.
|
|
|
|
StringTrieBuilder::Node *
|
|
|
|
StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2022-10-28 06:11:55 +00:00
|
|
|
UBool hasValue=false;
|
2020-08-11 09:10:23 +00:00
|
|
|
int32_t value=0;
|
|
|
|
if(unitIndex==getElementStringLength(start)) {
|
|
|
|
// An intermediate or final value.
|
|
|
|
value=getElementValue(start++);
|
|
|
|
if(start==limit) {
|
|
|
|
return registerFinalValue(value, errorCode);
|
|
|
|
}
|
2022-10-28 06:11:55 +00:00
|
|
|
hasValue=true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
Node *node;
|
|
|
|
// Now all [start..limit[ strings are longer than unitIndex.
|
|
|
|
int32_t minUnit=getElementUnit(start, unitIndex);
|
|
|
|
int32_t maxUnit=getElementUnit(limit-1, unitIndex);
|
|
|
|
if(minUnit==maxUnit) {
|
|
|
|
// Linear-match node: All strings have the same character at unitIndex.
|
|
|
|
int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
|
|
|
|
Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
|
|
|
|
// Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
|
|
|
|
int32_t length=lastUnitIndex-unitIndex;
|
|
|
|
int32_t maxLinearMatchLength=getMaxLinearMatchLength();
|
|
|
|
while(length>maxLinearMatchLength) {
|
|
|
|
lastUnitIndex-=maxLinearMatchLength;
|
|
|
|
length-=maxLinearMatchLength;
|
|
|
|
node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
|
|
|
|
nextNode=registerNode(node, errorCode);
|
|
|
|
}
|
|
|
|
node=createLinearMatchNode(start, unitIndex, length, nextNode);
|
|
|
|
} else {
|
|
|
|
// Branch node.
|
|
|
|
int32_t length=countElementUnits(start, limit, unitIndex);
|
|
|
|
// length>=2 because minUnit!=maxUnit.
|
|
|
|
Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
|
|
|
|
node=new BranchHeadNode(length, subNode);
|
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
if(hasValue && node!=nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
if(matchNodesCanHaveValues()) {
|
|
|
|
((ValueNode *)node)->setValue(value);
|
|
|
|
} else {
|
|
|
|
node=new IntermediateValueNode(value, registerNode(node, errorCode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return registerNode(node, errorCode);
|
|
|
|
}
|
|
|
|
|
|
|
|
// start<limit && all strings longer than unitIndex &&
|
|
|
|
// length different units at unitIndex
|
|
|
|
StringTrieBuilder::Node *
|
|
|
|
StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
|
|
|
|
int32_t length, UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
char16_t middleUnits[kMaxSplitBranchLevels];
|
2020-08-11 09:10:23 +00:00
|
|
|
Node *lessThan[kMaxSplitBranchLevels];
|
|
|
|
int32_t ltLength=0;
|
|
|
|
while(length>getMaxBranchLinearSubNodeLength()) {
|
|
|
|
// Branch on the middle unit.
|
|
|
|
// First, find the middle unit.
|
|
|
|
int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
|
|
|
|
// Create the less-than branch.
|
|
|
|
middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
|
|
|
|
lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
|
|
|
|
++ltLength;
|
|
|
|
// Continue for the greater-or-equal branch.
|
|
|
|
start=i;
|
|
|
|
length=length-length/2;
|
|
|
|
}
|
|
|
|
if(U_FAILURE(errorCode)) {
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
ListBranchNode *listNode=new ListBranchNode();
|
2023-05-23 00:05:01 +00:00
|
|
|
if(listNode==nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
// For each unit, find its elements array start and whether it has a final value.
|
|
|
|
int32_t unitNumber=0;
|
|
|
|
do {
|
|
|
|
int32_t i=start;
|
2023-05-23 00:05:01 +00:00
|
|
|
char16_t unit=getElementUnit(i++, unitIndex);
|
2020-08-11 09:10:23 +00:00
|
|
|
i=indexOfElementWithNextUnit(i, unitIndex, unit);
|
|
|
|
if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
|
|
|
|
listNode->add(unit, getElementValue(start));
|
|
|
|
} else {
|
|
|
|
listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
|
|
|
|
}
|
|
|
|
start=i;
|
|
|
|
} while(++unitNumber<length-1);
|
|
|
|
// unitNumber==length-1, and the maxUnit elements range is [start..limit[
|
2023-05-23 00:05:01 +00:00
|
|
|
char16_t unit=getElementUnit(start, unitIndex);
|
2020-08-11 09:10:23 +00:00
|
|
|
if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
|
|
|
|
listNode->add(unit, getElementValue(start));
|
|
|
|
} else {
|
|
|
|
listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
|
|
|
|
}
|
|
|
|
Node *node=registerNode(listNode, errorCode);
|
|
|
|
// Create the split-branch nodes.
|
|
|
|
while(ltLength>0) {
|
|
|
|
--ltLength;
|
|
|
|
node=registerNode(
|
|
|
|
new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
|
|
|
|
}
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
StringTrieBuilder::Node *
|
|
|
|
StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
delete newNode;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
if(newNode==nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
const UHashElement *old=uhash_find(nodes, newNode);
|
2023-05-23 00:05:01 +00:00
|
|
|
if(old!=nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
delete newNode;
|
|
|
|
return (Node *)old->key.pointer;
|
|
|
|
}
|
|
|
|
// If uhash_puti() returns a non-zero value from an equivalent, previously
|
|
|
|
// registered node, then uhash_find() failed to find that and we will leak newNode.
|
|
|
|
#if U_DEBUG
|
|
|
|
int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
|
|
|
|
#endif
|
|
|
|
uhash_puti(nodes, newNode, 1, &errorCode);
|
|
|
|
U_ASSERT(oldValue==0);
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
delete newNode;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
return newNode;
|
|
|
|
}
|
|
|
|
|
|
|
|
StringTrieBuilder::Node *
|
|
|
|
StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
|
|
|
|
if(U_FAILURE(errorCode)) {
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
FinalValueNode key(value);
|
|
|
|
const UHashElement *old=uhash_find(nodes, &key);
|
2023-05-23 00:05:01 +00:00
|
|
|
if(old!=nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
return (Node *)old->key.pointer;
|
|
|
|
}
|
|
|
|
Node *newNode=new FinalValueNode(value);
|
2023-05-23 00:05:01 +00:00
|
|
|
if(newNode==nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
// If uhash_puti() returns a non-zero value from an equivalent, previously
|
|
|
|
// registered node, then uhash_find() failed to find that and we will leak newNode.
|
|
|
|
#if U_DEBUG
|
|
|
|
int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
|
|
|
|
#endif
|
|
|
|
uhash_puti(nodes, newNode, 1, &errorCode);
|
|
|
|
U_ASSERT(oldValue==0);
|
|
|
|
if(U_FAILURE(errorCode)) {
|
|
|
|
delete newNode;
|
2023-05-23 00:05:01 +00:00
|
|
|
return nullptr;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
return newNode;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::hashNode(const void *node) {
|
|
|
|
return ((const Node *)node)->hashCode();
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool
|
|
|
|
StringTrieBuilder::equalNodes(const void *left, const void *right) {
|
|
|
|
return *(const Node *)left==*(const Node *)right;
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::Node::operator==(const Node &other) const {
|
|
|
|
return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
offset=edgeNumber;
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!Node::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const FinalValueNode &o=static_cast<const FinalValueNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return value==o.value;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
|
2022-10-28 06:11:55 +00:00
|
|
|
offset=builder.writeValueAndFinal(value, true);
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::ValueNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!Node::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const ValueNode &o=static_cast<const ValueNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return hasValue==o.hasValue && (!hasValue || value==o.value);
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!ValueNode::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const IntermediateValueNode &o=static_cast<const IntermediateValueNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return next==o.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
|
|
|
|
next->write(builder);
|
2022-10-28 06:11:55 +00:00
|
|
|
offset=builder.writeValueAndFinal(value, false);
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!ValueNode::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const LinearMatchNode &o=static_cast<const LinearMatchNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return length==o.length && next==o.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!Node::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const ListBranchNode &o=static_cast<const ListBranchNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
for(int32_t i=0; i<length; ++i) {
|
|
|
|
if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
}
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
firstEdgeNumber=edgeNumber;
|
|
|
|
int32_t step=0;
|
|
|
|
int32_t i=length;
|
|
|
|
do {
|
|
|
|
Node *edge=equal[--i];
|
2023-05-23 00:05:01 +00:00
|
|
|
if(edge!=nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
|
|
|
|
}
|
|
|
|
// For all but the rightmost edge, decrement the edge number.
|
|
|
|
step=1;
|
|
|
|
} while(i>0);
|
|
|
|
offset=edgeNumber;
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
|
|
|
|
// Write the sub-nodes in reverse order: The jump lengths are deltas from
|
|
|
|
// after their own positions, so if we wrote the minUnit sub-node first,
|
|
|
|
// then its jump delta would be larger.
|
|
|
|
// Instead we write the minUnit sub-node last, for a shorter delta.
|
|
|
|
int32_t unitNumber=length-1;
|
|
|
|
Node *rightEdge=equal[unitNumber];
|
2023-05-23 00:05:01 +00:00
|
|
|
int32_t rightEdgeNumber= rightEdge==nullptr ? firstEdgeNumber : rightEdge->getOffset();
|
2020-08-11 09:10:23 +00:00
|
|
|
do {
|
|
|
|
--unitNumber;
|
2023-05-23 00:05:01 +00:00
|
|
|
if(equal[unitNumber]!=nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
|
|
|
|
}
|
|
|
|
} while(unitNumber>0);
|
|
|
|
// The maxUnit sub-node is written as the very last one because we do
|
|
|
|
// not jump for it at all.
|
|
|
|
unitNumber=length-1;
|
2023-05-23 00:05:01 +00:00
|
|
|
if(rightEdge==nullptr) {
|
2022-10-28 06:11:55 +00:00
|
|
|
builder.writeValueAndFinal(values[unitNumber], true);
|
2020-08-11 09:10:23 +00:00
|
|
|
} else {
|
|
|
|
rightEdge->write(builder);
|
|
|
|
}
|
|
|
|
offset=builder.write(units[unitNumber]);
|
|
|
|
// Write the rest of this node's unit-value pairs.
|
|
|
|
while(--unitNumber>=0) {
|
|
|
|
int32_t value;
|
|
|
|
UBool isFinal;
|
2023-05-23 00:05:01 +00:00
|
|
|
if(equal[unitNumber]==nullptr) {
|
2020-08-11 09:10:23 +00:00
|
|
|
// Write the final value for the one string ending with this unit.
|
|
|
|
value=values[unitNumber];
|
2022-10-28 06:11:55 +00:00
|
|
|
isFinal=true;
|
2020-08-11 09:10:23 +00:00
|
|
|
} else {
|
|
|
|
// Write the delta to the start position of the sub-node.
|
|
|
|
U_ASSERT(equal[unitNumber]->getOffset()>0);
|
|
|
|
value=offset-equal[unitNumber]->getOffset();
|
2022-10-28 06:11:55 +00:00
|
|
|
isFinal=false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
builder.writeValueAndFinal(value, isFinal);
|
|
|
|
offset=builder.write(units[unitNumber]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!Node::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const SplitBranchNode &o=static_cast<const SplitBranchNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
firstEdgeNumber=edgeNumber;
|
|
|
|
edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
|
|
|
|
offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
|
|
|
|
// Encode the less-than branch first.
|
|
|
|
lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
|
|
|
|
// Encode the greater-or-equal branch last because we do not jump for it at all.
|
|
|
|
greaterOrEqual->write(builder);
|
|
|
|
// Write this node.
|
|
|
|
U_ASSERT(lessThan->getOffset()>0);
|
|
|
|
builder.writeDeltaTo(lessThan->getOffset()); // less-than
|
|
|
|
offset=builder.write(unit);
|
|
|
|
}
|
|
|
|
|
2021-10-28 06:15:28 +00:00
|
|
|
bool
|
2020-08-11 09:10:23 +00:00
|
|
|
StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
|
|
|
|
if(this==&other) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return true;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
|
|
|
if(!ValueNode::operator==(other)) {
|
2021-10-28 06:15:28 +00:00
|
|
|
return false;
|
2020-08-11 09:10:23 +00:00
|
|
|
}
|
2023-05-23 00:05:01 +00:00
|
|
|
const BranchHeadNode &o=static_cast<const BranchHeadNode &>(other);
|
2020-08-11 09:10:23 +00:00
|
|
|
return length==o.length && next==o.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t
|
|
|
|
StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
|
|
|
|
if(offset==0) {
|
|
|
|
offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
|
|
|
|
}
|
|
|
|
return edgeNumber;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
|
|
|
|
next->write(builder);
|
|
|
|
if(length<=builder.getMinLinearMatch()) {
|
|
|
|
offset=builder.writeValueAndType(hasValue, value, length-1);
|
|
|
|
} else {
|
|
|
|
builder.write(length-1);
|
|
|
|
offset=builder.writeValueAndType(hasValue, value, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|