373 lines
12 KiB
C++
373 lines
12 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
***************************************************************************
|
|
* Copyright (C) 2002-2016 International Business Machines Corporation *
|
|
* and others. All rights reserved. *
|
|
***************************************************************************
|
|
*/
|
|
|
|
//
|
|
// File: rbbinode.cpp
|
|
//
|
|
// Implementation of class RBBINode, which represents a node in the
|
|
// tree generated when parsing the Rules Based Break Iterator rules.
|
|
//
|
|
// This "Class" is actually closer to a struct.
|
|
// Code using it is expected to directly access fields much of the time.
|
|
//
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_BREAK_ITERATION
|
|
|
|
#include "unicode/unistr.h"
|
|
#include "unicode/uniset.h"
|
|
#include "unicode/uchar.h"
|
|
#include "unicode/parsepos.h"
|
|
|
|
#include "cstr.h"
|
|
#include "uvector.h"
|
|
|
|
#include "rbbirb.h"
|
|
#include "rbbinode.h"
|
|
|
|
#include "uassert.h"
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
#ifdef RBBI_DEBUG
|
|
static int gLastSerial = 0;
|
|
#endif
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// Constructor. Just set the fields to reasonable default values.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
RBBINode::RBBINode(NodeType t) : UMemory() {
|
|
#ifdef RBBI_DEBUG
|
|
fSerialNum = ++gLastSerial;
|
|
#endif
|
|
fType = t;
|
|
fParent = NULL;
|
|
fLeftChild = NULL;
|
|
fRightChild = NULL;
|
|
fInputSet = NULL;
|
|
fFirstPos = 0;
|
|
fLastPos = 0;
|
|
fNullable = FALSE;
|
|
fLookAheadEnd = FALSE;
|
|
fRuleRoot = FALSE;
|
|
fChainIn = FALSE;
|
|
fVal = 0;
|
|
fPrecedence = precZero;
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
|
|
fLastPosSet = new UVector(status);
|
|
fFollowPos = new UVector(status);
|
|
if (t==opCat) {fPrecedence = precOpCat;}
|
|
else if (t==opOr) {fPrecedence = precOpOr;}
|
|
else if (t==opStart) {fPrecedence = precStart;}
|
|
else if (t==opLParen) {fPrecedence = precLParen;}
|
|
|
|
}
|
|
|
|
|
|
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
|
|
#ifdef RBBI_DEBUG
|
|
fSerialNum = ++gLastSerial;
|
|
#endif
|
|
fType = other.fType;
|
|
fParent = NULL;
|
|
fLeftChild = NULL;
|
|
fRightChild = NULL;
|
|
fInputSet = other.fInputSet;
|
|
fPrecedence = other.fPrecedence;
|
|
fText = other.fText;
|
|
fFirstPos = other.fFirstPos;
|
|
fLastPos = other.fLastPos;
|
|
fNullable = other.fNullable;
|
|
fVal = other.fVal;
|
|
fRuleRoot = FALSE;
|
|
fChainIn = other.fChainIn;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
|
|
fLastPosSet = new UVector(status);
|
|
fFollowPos = new UVector(status);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// Destructor. Deletes both this node AND any child nodes,
|
|
// except in the case of variable reference nodes. For
|
|
// these, the l. child points back to the definition, which
|
|
// is common for all references to the variable, meaning
|
|
// it can't be deleted here.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
RBBINode::~RBBINode() {
|
|
// printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
|
|
delete fInputSet;
|
|
fInputSet = NULL;
|
|
|
|
switch (this->fType) {
|
|
case varRef:
|
|
case setRef:
|
|
// for these node types, multiple instances point to the same "children"
|
|
// Storage ownership of children handled elsewhere. Don't delete here.
|
|
break;
|
|
|
|
default:
|
|
delete fLeftChild;
|
|
fLeftChild = NULL;
|
|
delete fRightChild;
|
|
fRightChild = NULL;
|
|
}
|
|
|
|
|
|
delete fFirstPosSet;
|
|
delete fLastPosSet;
|
|
delete fFollowPos;
|
|
|
|
}
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// cloneTree Make a copy of the subtree rooted at this node.
|
|
// Discard any variable references encountered along the way,
|
|
// and replace with copies of the variable's definitions.
|
|
// Used to replicate the expression underneath variable
|
|
// references in preparation for generating the DFA tables.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
RBBINode *RBBINode::cloneTree() {
|
|
RBBINode *n;
|
|
|
|
if (fType == RBBINode::varRef) {
|
|
// If the current node is a variable reference, skip over it
|
|
// and clone the definition of the variable instead.
|
|
n = fLeftChild->cloneTree();
|
|
} else if (fType == RBBINode::uset) {
|
|
n = this;
|
|
} else {
|
|
n = new RBBINode(*this);
|
|
// Check for null pointer.
|
|
if (n != NULL) {
|
|
if (fLeftChild != NULL) {
|
|
n->fLeftChild = fLeftChild->cloneTree();
|
|
n->fLeftChild->fParent = n;
|
|
}
|
|
if (fRightChild != NULL) {
|
|
n->fRightChild = fRightChild->cloneTree();
|
|
n->fRightChild->fParent = n;
|
|
}
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// flattenVariables Walk a parse tree, replacing any variable
|
|
// references with a copy of the variable's definition.
|
|
// Aside from variables, the tree is not changed.
|
|
//
|
|
// Return the root of the tree. If the root was not a variable
|
|
// reference, it remains unchanged - the root we started with
|
|
// is the root we return. If, however, the root was a variable
|
|
// reference, the root of the newly cloned replacement tree will
|
|
// be returned, and the original tree deleted.
|
|
//
|
|
// This function works by recursively walking the tree
|
|
// without doing anything until a variable reference is
|
|
// found, then calling cloneTree() at that point. Any
|
|
// nested references are handled by cloneTree(), not here.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
RBBINode *RBBINode::flattenVariables() {
|
|
if (fType == varRef) {
|
|
RBBINode *retNode = fLeftChild->cloneTree();
|
|
if (retNode != NULL) {
|
|
retNode->fRuleRoot = this->fRuleRoot;
|
|
retNode->fChainIn = this->fChainIn;
|
|
}
|
|
delete this; // TODO: undefined behavior. Fix.
|
|
return retNode;
|
|
}
|
|
|
|
if (fLeftChild != NULL) {
|
|
fLeftChild = fLeftChild->flattenVariables();
|
|
fLeftChild->fParent = this;
|
|
}
|
|
if (fRightChild != NULL) {
|
|
fRightChild = fRightChild->flattenVariables();
|
|
fRightChild->fParent = this;
|
|
}
|
|
return this;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// flattenSets Walk the parse tree, replacing any nodes of type setRef
|
|
// with a copy of the expression tree for the set. A set's
|
|
// equivalent expression tree is precomputed and saved as
|
|
// the left child of the uset node.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
void RBBINode::flattenSets() {
|
|
U_ASSERT(fType != setRef);
|
|
|
|
if (fLeftChild != NULL) {
|
|
if (fLeftChild->fType==setRef) {
|
|
RBBINode *setRefNode = fLeftChild;
|
|
RBBINode *usetNode = setRefNode->fLeftChild;
|
|
RBBINode *replTree = usetNode->fLeftChild;
|
|
fLeftChild = replTree->cloneTree();
|
|
fLeftChild->fParent = this;
|
|
delete setRefNode;
|
|
} else {
|
|
fLeftChild->flattenSets();
|
|
}
|
|
}
|
|
|
|
if (fRightChild != NULL) {
|
|
if (fRightChild->fType==setRef) {
|
|
RBBINode *setRefNode = fRightChild;
|
|
RBBINode *usetNode = setRefNode->fLeftChild;
|
|
RBBINode *replTree = usetNode->fLeftChild;
|
|
fRightChild = replTree->cloneTree();
|
|
fRightChild->fParent = this;
|
|
delete setRefNode;
|
|
} else {
|
|
fRightChild->flattenSets();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// findNodes() Locate all the nodes of the specified type, starting
|
|
// at the specified root.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
|
|
/* test for buffer overflows */
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
if (fType == kind) {
|
|
dest->addElement(this, status);
|
|
}
|
|
if (fLeftChild != NULL) {
|
|
fLeftChild->findNodes(dest, kind, status);
|
|
}
|
|
if (fRightChild != NULL) {
|
|
fRightChild->findNodes(dest, kind, status);
|
|
}
|
|
}
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// print. Print out a single node, for debugging.
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#ifdef RBBI_DEBUG
|
|
|
|
static int32_t serial(const RBBINode *node) {
|
|
return (node == NULL? -1 : node->fSerialNum);
|
|
}
|
|
|
|
|
|
void RBBINode::printNode(const RBBINode *node) {
|
|
static const char * const nodeTypeNames[] = {
|
|
"setRef",
|
|
"uset",
|
|
"varRef",
|
|
"leafChar",
|
|
"lookAhead",
|
|
"tag",
|
|
"endMark",
|
|
"opStart",
|
|
"opCat",
|
|
"opOr",
|
|
"opStar",
|
|
"opPlus",
|
|
"opQuestion",
|
|
"opBreak",
|
|
"opReverse",
|
|
"opLParen"
|
|
};
|
|
|
|
if (node==NULL) {
|
|
RBBIDebugPrintf("%10p", (void *)node);
|
|
} else {
|
|
RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
|
|
(void *)node, node->fSerialNum, nodeTypeNames[node->fType],
|
|
node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
|
|
serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
|
|
node->fFirstPos, node->fVal);
|
|
if (node->fType == varRef) {
|
|
RBBI_DEBUG_printUnicodeString(node->fText);
|
|
}
|
|
}
|
|
RBBIDebugPrintf("\n");
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifdef RBBI_DEBUG
|
|
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
|
|
RBBIDebugPrintf("%*s", minWidth, CStr(s)());
|
|
}
|
|
#endif
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// print. Print out the tree of nodes rooted at "this"
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#ifdef RBBI_DEBUG
|
|
void RBBINode::printNodeHeader() {
|
|
RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
|
|
}
|
|
|
|
void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
|
|
if (printHeading) {
|
|
printNodeHeader();
|
|
}
|
|
printNode(node);
|
|
if (node != NULL) {
|
|
// Only dump the definition under a variable reference if asked to.
|
|
// Unconditinally dump children of all other node types.
|
|
if (node->fType != varRef) {
|
|
if (node->fLeftChild != NULL) {
|
|
printTree(node->fLeftChild, FALSE);
|
|
}
|
|
|
|
if (node->fRightChild != NULL) {
|
|
printTree(node->fRightChild, FALSE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|