/* ****************************************************************************** * * © 2016 and later: Unicode, Inc. and others. * License & terms of use: http://www.unicode.org/copyright.html * ****************************************************************************** * file name: ubiditransform.c * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2016jul24 * created by: Lina Kemmel * */ #include "cmemory.h" #include "unicode/ubidi.h" #include "unicode/ustring.h" #include "unicode/ushape.h" #include "unicode/utf16.h" #include "ustr_imp.h" #include "unicode/ubiditransform.h" /* Some convenience defines */ #define LTR UBIDI_LTR #define RTL UBIDI_RTL #define LOGICAL UBIDI_LOGICAL #define VISUAL UBIDI_VISUAL #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR #define CHECK_LEN(STR, LEN, ERROR) UPRV_BLOCK_MACRO_BEGIN { \ if (LEN == 0) return 0; \ if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ if (LEN == -1) LEN = u_strlen(STR); \ } UPRV_BLOCK_MACRO_END #define MAX_ACTIONS 7 /** * Typedef for a pointer to a function, which performs some operation (such as * reordering, setting "inverse" mode, character mirroring, etc.). Return value * indicates whether the text was changed in the course of this operation or * not. */ typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); /** * Structure that holds a predefined reordering scheme, including the following * information: *
UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength,
pTransform->pActiveScheme->baseLevel, nullptr, pErrorCode);
return false;
}
/**
* Performs basic reordering of text (Logical -> Visual LTR).
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
static_castUBiDi
object.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
(void)pErrorCode;
ubidi_setInverse(pTransform->pBidi, true);
ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT);
return false;
}
/**
* Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL
* transformation.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
(void)pErrorCode;
ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY);
return false;
}
/**
* Performs string reverse.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_writeReverse(pTransform->src, pTransform->srcLength,
pTransform->dest, pTransform->destSize,
UBIDI_REORDER_DEFAULT, pErrorCode);
*pTransform->pDestLength = pTransform->srcLength;
return true;
}
/**
* Applies a new value to the text that serves as input at the current
* processing step. This value is identical to the original one when we begin
* the processing, but usually changes as the transformation progresses.
*
* @param pTransform A pointer to the UBiDiTransform
structure.
* @param newSrc A pointer whose value is to be used as input text.
* @param newLength A length of the new text in char16_t
s.
* @param newSize A new source capacity in char16_t
s.
* @param pErrorCode Pointer to the error code value.
*/
static void
updateSrc(UBiDiTransform *pTransform, const char16_t *newSrc, uint32_t newLength,
uint32_t newSize, UErrorCode *pErrorCode)
{
if (newSize < newLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return;
}
if (newSize > pTransform->srcSize) {
newSize += 50; // allocate slightly more than needed right now
if (pTransform->src != nullptr) {
uprv_free(pTransform->src);
pTransform->src = nullptr;
}
pTransform->src = (char16_t *)uprv_malloc(newSize * sizeof(char16_t));
if (pTransform->src == nullptr) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
//pTransform->srcLength = pTransform->srcSize = 0;
return;
}
pTransform->srcSize = newSize;
}
u_strncpy(pTransform->src, newSrc, newLength);
pTransform->srcLength = u_terminateUChars(pTransform->src,
pTransform->srcSize, newLength, pErrorCode);
}
/**
* Calls a lower level shaping function.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param options Shaping options.
* @param pErrorCode Pointer to the error code value.
*/
static void
doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode)
{
*pTransform->pDestLength = u_shapeArabic(pTransform->src,
pTransform->srcLength, pTransform->dest, pTransform->destSize,
options, pErrorCode);
}
/**
* Performs digit and letter shaping.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
if ((pTransform->letters | pTransform->digits) == 0) {
return false;
}
if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) {
doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir,
pErrorCode);
} else {
doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode);
if (U_SUCCESS(*pErrorCode)) {
updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength,
*pTransform->pDestLength, pErrorCode);
doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir,
pErrorCode);
}
}
return true;
}
/**
* Performs character mirroring.
*
* @param pTransform Pointer to the UBiDiTransform
structure.
* @param pErrorCode Pointer to the error code value.
*
* @return Whether or not this function modifies the text. Besides the return
* value, the caller should also check U_SUCCESS(*pErrorCode)
.
*/
static UBool
action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
UChar32 c;
uint32_t i = 0, j = 0;
if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) {
return false;
}
if (pTransform->destSize < pTransform->srcLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return false;
}
do {
UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1;
U16_NEXT(pTransform->src, i, pTransform->srcLength, c);
U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c);
} while (i < pTransform->srcLength);
*pTransform->pDestLength = pTransform->srcLength;
pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
return true;
}
/**
* All possible reordering schemes.
*
*/
static const ReorderingScheme Schemes[] =
{
/* 0: Logical LTR => Visual LTR */
{LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_reorder, nullptr}},
/* 1: Logical RTL => Visual LTR */
{RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_resolve, action_reorder, action_shapeArabic, nullptr}},
/* 2: Logical LTR => Visual RTL */
{LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_reorder, action_reverse, nullptr}},
/* 3: Logical RTL => Visual RTL */
{RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_resolve, action_reorder, action_shapeArabic, action_reverse, nullptr}},
/* 4: Visual LTR => Logical RTL */
{LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_shapeArabic, action_setInverse, action_resolve, action_reorder, nullptr}},
/* 5: Visual RTL => Logical RTL */
{RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL,
{action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, nullptr}},
/* 6: Visual LTR => Logical LTR */
{LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_setInverse, action_resolve, action_reorder, action_shapeArabic, nullptr}},
/* 7: Visual RTL => Logical LTR */
{RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, nullptr}},
/* 8: Logical LTR => Logical RTL */
{LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, nullptr}},
/* 9: Logical RTL => Logical LTR */
{RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL,
{action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, nullptr}},
/* 10: Visual LTR => Visual RTL */
{LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, nullptr}},
/* 11: Visual RTL => Visual LTR */
{RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, nullptr}},
/* 12: Logical LTR => Logical LTR */
{LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR,
{action_resolve, action_mirror, action_shapeArabic, nullptr}},
/* 13: Logical RTL => Logical RTL */
{RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL,
{action_resolve, action_mirror, action_shapeArabic, nullptr}},
/* 14: Visual LTR => Visual LTR */
{LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_resolve, action_mirror, action_shapeArabic, nullptr}},
/* 15: Visual RTL => Visual RTL */
{RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR,
{action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, nullptr}}
};
static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes);
/**
* When the direction option is UBIDI_DEFAULT_LTR
or
* UBIDI_DEFAULT_RTL
, resolve the base direction according to that
* of the first strong bidi character.
*/
static void
resolveBaseDirection(const char16_t *text, uint32_t length,
UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel)
{
switch (*pInLevel) {
case UBIDI_DEFAULT_LTR:
case UBIDI_DEFAULT_RTL: {
UBiDiLevel level = static_castReorderingScheme
matching the
* caller-defined scheme.
*
* @return A valid ReorderingScheme
object or nullptr
*/
static const ReorderingScheme*
findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel,
UBiDiOrder inOrder, UBiDiOrder outOrder)
{
uint32_t i;
for (i = 0; i < nSchemes; i++) {
const ReorderingScheme *pScheme = Schemes + i;
if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel
&& inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) {
return pScheme;
}
}
return nullptr;
}
U_CAPI uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const char16_t *src, int32_t srcLength,
char16_t *dest, int32_t destSize,
UBiDiLevel inParaLevel, UBiDiOrder inOrder,
UBiDiLevel outParaLevel, UBiDiOrder outOrder,
UBiDiMirroring doMirroring, uint32_t shapingOptions,
UErrorCode *pErrorCode)
{
uint32_t destLength = 0;
UBool textChanged = false;
const UBiDiTransform *pOrigTransform = pBiDiTransform;
const UBiDiAction *action = nullptr;
if (U_FAILURE(*pErrorCode)) {
return 0;
}
if (src == nullptr || dest == nullptr) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
CHECK_LEN(src, srcLength, pErrorCode);
CHECK_LEN(dest, destSize, pErrorCode);
if (pBiDiTransform == nullptr) {
pBiDiTransform = ubiditransform_open(pErrorCode);
if (U_FAILURE(*pErrorCode)) {
return 0;
}
}
/* Current limitation: in multiple paragraphs will be resolved according
to the 1st paragraph */
resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel);
pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel,
inOrder, outOrder);
if (pBiDiTransform->pActiveScheme == nullptr) {
goto cleanup;
}
pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING
: UBIDI_REORDER_DEFAULT;
/* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text
scheme at the time shaping is invoked. */
shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK;
pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK;
pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK;
updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode);
if (U_FAILURE(*pErrorCode)) {
goto cleanup;
}
if (pBiDiTransform->pBidi == nullptr) {
pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode);
if (U_FAILURE(*pErrorCode)) {
goto cleanup;
}
}
pBiDiTransform->dest = dest;
pBiDiTransform->destSize = destSize;
pBiDiTransform->pDestLength = &destLength;
/* Checking for U_SUCCESS() within the loop to bail out on first failure. */
for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) {
if ((*action)(pBiDiTransform, pErrorCode)) {
if (action + 1) {
updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength,
*pBiDiTransform->pDestLength, pErrorCode);
}
textChanged = true;
}
}
ubidi_setInverse(pBiDiTransform->pBidi, false);
if (!textChanged && U_SUCCESS(*pErrorCode)) {
/* Text was not changed - just copy src to dest */
if (destSize < srcLength) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} else {
u_strncpy(dest, src, srcLength);
destLength = srcLength;
}
}
cleanup:
if (pOrigTransform != pBiDiTransform) {
ubiditransform_close(pBiDiTransform);
} else {
pBiDiTransform->dest = nullptr;
pBiDiTransform->pDestLength = nullptr;
pBiDiTransform->srcLength = 0;
pBiDiTransform->destSize = 0;
}
return U_FAILURE(*pErrorCode) ? 0 : destLength;
}