godot/thirdparty/icu4c/common/mlbe.h

// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#ifndef MLBREAKENGINE_H
#define MLBREAKENGINE_H

#include "hash.h"
#include "unicode/resbund.h"
#include "unicode/uniset.h"
#include "unicode/utext.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

#if !UCONFIG_NO_BREAK_ITERATION

/**
 * A machine learning break engine for the phrase breaking in Japanese.
 */
class MlBreakEngine : public UMemory {
   public:
    /**
     * Constructor.
     *
     * @param digitOrOpenPunctuationOrAlphabetSet An UnicodeSet with the digit, open punctuation and
     * alphabet.
     * @param closePunctuationSet An UnicodeSet with close punctuation.
     * @param status Information on any errors encountered.
     */
    MlBreakEngine(const UnicodeSet &digitOrOpenPunctuationOrAlphabetSet,
                  const UnicodeSet &closePunctuationSet, UErrorCode &status);

    /**
     * Virtual destructor.
     */
    virtual ~MlBreakEngine();

   public:
    /**
     * Divide up a range of characters handled by this break engine.
     *
     * @param inText A UText representing the text
     * @param rangeStart The start of the range of the characters
     * @param rangeEnd The end of the range of the characters
     * @param foundBreaks Output of C array of int32_t break positions, or 0
     * @param inString The normalized string of text ranging from rangeStart to rangeEnd
     * @param inputMap The vector storing the native index of inText
     * @param status Information on any errors encountered.
     * @return The number of breaks found
     */
    int32_t divideUpRange(UText *inText, int32_t rangeStart, int32_t rangeEnd,
                          UVector32 &foundBreaks, const UnicodeString &inString,
                          const LocalPointer<UVector32> &inputMap, UErrorCode &status) const;

   private:
    /**
     * Load the machine learning's model file.
     *
     * @param error Information on any errors encountered.
     */
    void loadMLModel(UErrorCode &error);

    /**
     * In the machine learning's model file, specify the name of the key and value to load the
     * corresponding feature and its score.
     *
     * @param rb A ResouceBundle corresponding to the model file.
     * @param keyName The kay name in the model file.
     * @param valueName The value name in the model file.
     * @param model A hashtable to store the pairs of the feature and its score.
     * @param error Information on any errors encountered.
     */
    void initKeyValue(UResourceBundle *rb, const char *keyName, const char *valueName,
                      Hashtable &model, UErrorCode &error);

    /**
     * Initialize the index list from the input string.
     *
     * @param inString A input string to be segmented.
     * @param indexList A code unit index list of inString.
     * @param status Information on any errors encountered.
     * @return The number of code units of the first four characters in inString.
     */
    int32_t initIndexList(const UnicodeString &inString, int32_t *indexList,
                          UErrorCode &status) const;

    /**
     * Evaluate whether the index is a potential breakpoint.
     *
     * @param inString A input string to be segmented.
     * @param indexList A code unit index list of the inString.
     * @param startIdx The start index of the indexList.
     * @param numCodeUnits  The current code unit boundary of the indexList.
     * @param numBreaks The accumulated number of breakpoints.
     * @param boundary A vector including the index of the breakpoint.
     * @param status Information on any errors encountered.
     * @return The number of breakpoints
     */
    int32_t evaluateBreakpoint(const UnicodeString &inString, int32_t *indexList, int32_t startIdx,
                               int32_t numCodeUnits, int32_t numBreaks, UVector32 &boundary,
                               UErrorCode &status) const;

    void printUnicodeString(const UnicodeString &s) const;

    UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
    UnicodeSet fClosePunctuationSet;
    Hashtable fModel[13];  // {UW1, UW2, ... UW6, BW1, ... BW3, TW1, TW2, ... TW4} 6+3+4= 13
    int32_t fNegativeSum;
};

#endif

U_NAMESPACE_END

/* MLBREAKENGINE_H */
#endif
Update HarfBuzz, ICU and FreeType HarfBuzz: Update to version 7.3.0 ICU4C: Update to version 73.1 FreeType: Update to version 2.13.0 2023-05-23 00:05:01 +00:00			`// © 2022 and later: Unicode, Inc. and others.`
			`// License & terms of use: http://www.unicode.org/copyright.html`

			`#ifndef MLBREAKENGINE_H`
			`#define MLBREAKENGINE_H`

			`#include "hash.h"`
			`#include "unicode/resbund.h"`
			`#include "unicode/uniset.h"`
			`#include "unicode/utext.h"`
			`#include "uvectr32.h"`

			`U_NAMESPACE_BEGIN`

			`#if !UCONFIG_NO_BREAK_ITERATION`

			`/**`
			`* A machine learning break engine for the phrase breaking in Japanese.`
			`*/`
			`class MlBreakEngine : public UMemory {`
			`public:`
			`/**`
			`* Constructor.`
			`*`
			`* @param digitOrOpenPunctuationOrAlphabetSet An UnicodeSet with the digit, open punctuation and`
			`* alphabet.`
			`* @param closePunctuationSet An UnicodeSet with close punctuation.`
			`* @param status Information on any errors encountered.`
			`*/`
			`MlBreakEngine(const UnicodeSet &digitOrOpenPunctuationOrAlphabetSet,`
			`const UnicodeSet &closePunctuationSet, UErrorCode &status);`

			`/**`
			`* Virtual destructor.`
			`*/`
			`virtual ~MlBreakEngine();`

			`public:`
			`/**`
			`* Divide up a range of characters handled by this break engine.`
			`*`
			`* @param inText A UText representing the text`
			`* @param rangeStart The start of the range of the characters`
			`* @param rangeEnd The end of the range of the characters`
			`* @param foundBreaks Output of C array of int32_t break positions, or 0`
			`* @param inString The normalized string of text ranging from rangeStart to rangeEnd`
			`* @param inputMap The vector storing the native index of inText`
			`* @param status Information on any errors encountered.`
			`* @return The number of breaks found`
			`*/`
			`int32_t divideUpRange(UText *inText, int32_t rangeStart, int32_t rangeEnd,`
			`UVector32 &foundBreaks, const UnicodeString &inString,`
			`const LocalPointer<UVector32> &inputMap, UErrorCode &status) const;`

			`private:`
			`/**`
			`* Load the machine learning's model file.`
			`*`
			`* @param error Information on any errors encountered.`
			`*/`
			`void loadMLModel(UErrorCode &error);`

			`/**`
			`* In the machine learning's model file, specify the name of the key and value to load the`
			`* corresponding feature and its score.`
			`*`
			`* @param rb A ResouceBundle corresponding to the model file.`
			`* @param keyName The kay name in the model file.`
			`* @param valueName The value name in the model file.`
			`* @param model A hashtable to store the pairs of the feature and its score.`
			`* @param error Information on any errors encountered.`
			`*/`
			`void initKeyValue(UResourceBundle rb, const char keyName, const char *valueName,`
			`Hashtable &model, UErrorCode &error);`

			`/**`
			`* Initialize the index list from the input string.`
			`*`
			`* @param inString A input string to be segmented.`
			`* @param indexList A code unit index list of inString.`
			`* @param status Information on any errors encountered.`
			`* @return The number of code units of the first four characters in inString.`
			`*/`
			`int32_t initIndexList(const UnicodeString &inString, int32_t *indexList,`
			`UErrorCode &status) const;`

			`/**`
			`* Evaluate whether the index is a potential breakpoint.`
			`*`
			`* @param inString A input string to be segmented.`
			`* @param indexList A code unit index list of the inString.`
			`* @param startIdx The start index of the indexList.`
			`* @param numCodeUnits The current code unit boundary of the indexList.`
			`* @param numBreaks The accumulated number of breakpoints.`
			`* @param boundary A vector including the index of the breakpoint.`
			`* @param status Information on any errors encountered.`
			`* @return The number of breakpoints`
			`*/`
			`int32_t evaluateBreakpoint(const UnicodeString &inString, int32_t *indexList, int32_t startIdx,`
			`int32_t numCodeUnits, int32_t numBreaks, UVector32 &boundary,`
			`UErrorCode &status) const;`

			`void printUnicodeString(const UnicodeString &s) const;`

			`UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;`
			`UnicodeSet fClosePunctuationSet;`
			`Hashtable fModel[13]; // {UW1, UW2, ... UW6, BW1, ... BW3, TW1, TW2, ... TW4} 6+3+4= 13`
			`int32_t fNegativeSum;`
			`};`

			`#endif`

			`U_NAMESPACE_END`

			`/* MLBREAKENGINE_H */`
			`#endif`