179 lines
5.8 KiB
C++
179 lines
5.8 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 2002-2010, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
* file name: propsvec.h
|
|
* encoding: UTF-8
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 2002feb22
|
|
* created by: Markus W. Scherer
|
|
*
|
|
* Store bits (Unicode character properties) in bit set vectors.
|
|
*/
|
|
|
|
#ifndef __UPROPSVEC_H__
|
|
#define __UPROPSVEC_H__
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "utrie.h"
|
|
#include "utrie2.h"
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
/**
|
|
* Unicode Properties Vectors associated with code point ranges.
|
|
*
|
|
* Rows of uint32_t integers in a contiguous array store
|
|
* the range limits and the properties vectors.
|
|
*
|
|
* Logically, each row has a certain number of uint32_t values,
|
|
* which is set via the upvec_open() "columns" parameter.
|
|
*
|
|
* Internally, two additional columns are stored.
|
|
* In each internal row,
|
|
* row[0] contains the start code point and
|
|
* row[1] contains the limit code point,
|
|
* which is the start of the next range.
|
|
*
|
|
* Initially, there is only one "normal" row for
|
|
* range [0..0x110000[ with values 0.
|
|
* There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
|
|
*
|
|
* It would be possible to store only one range boundary per row,
|
|
* but self-contained rows allow to later sort them by contents.
|
|
*/
|
|
struct UPropsVectors;
|
|
typedef struct UPropsVectors UPropsVectors;
|
|
|
|
/*
|
|
* Special pseudo code points for storing the initialValue and the errorValue,
|
|
* which are used to initialize a UTrie2 or similar.
|
|
*/
|
|
#define UPVEC_FIRST_SPECIAL_CP 0x110000
|
|
#define UPVEC_INITIAL_VALUE_CP 0x110000
|
|
#define UPVEC_ERROR_VALUE_CP 0x110001
|
|
#define UPVEC_MAX_CP 0x110001
|
|
|
|
/*
|
|
* Special pseudo code point used in upvec_compact() signalling the end of
|
|
* delivering special values and the beginning of delivering real ones.
|
|
* Stable value, unlike UPVEC_MAX_CP which might grow over time.
|
|
*/
|
|
#define UPVEC_START_REAL_VALUES_CP 0x200000
|
|
|
|
/*
|
|
* Open a UPropsVectors object.
|
|
* @param columns Number of value integers (uint32_t) per row.
|
|
*/
|
|
U_CAPI UPropsVectors * U_EXPORT2
|
|
upvec_open(int32_t columns, UErrorCode *pErrorCode);
|
|
|
|
U_CAPI void U_EXPORT2
|
|
upvec_close(UPropsVectors *pv);
|
|
|
|
/*
|
|
* In rows for code points [start..end], select the column,
|
|
* reset the mask bits and set the value bits (ANDed with the mask).
|
|
*
|
|
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
|
|
*/
|
|
U_CAPI void U_EXPORT2
|
|
upvec_setValue(UPropsVectors *pv,
|
|
UChar32 start, UChar32 end,
|
|
int32_t column,
|
|
uint32_t value, uint32_t mask,
|
|
UErrorCode *pErrorCode);
|
|
|
|
/*
|
|
* Logically const but must not be used on the same pv concurrently!
|
|
* Always returns 0 if called after upvec_compact().
|
|
*/
|
|
U_CAPI uint32_t U_EXPORT2
|
|
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
|
|
|
|
/*
|
|
* pRangeStart and pRangeEnd can be NULL.
|
|
* @return NULL if rowIndex out of range and for illegal arguments,
|
|
* or if called after upvec_compact()
|
|
*/
|
|
U_CAPI uint32_t * U_EXPORT2
|
|
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
|
|
UChar32 *pRangeStart, UChar32 *pRangeEnd);
|
|
|
|
/*
|
|
* Compact the vectors:
|
|
* - modify the memory
|
|
* - keep only unique vectors
|
|
* - store them contiguously from the beginning of the memory
|
|
* - for each (non-unique) row, call the handler function
|
|
*
|
|
* The handler's rowIndex is the index of the row in the compacted
|
|
* memory block.
|
|
* (Therefore, it starts at 0 increases in increments of the columns value.)
|
|
*
|
|
* In a first phase, only special values are delivered (each exactly once),
|
|
* with start==end both equalling a special pseudo code point.
|
|
* Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
|
|
* where rowIndex is the length of the compacted array,
|
|
* and the row is arbitrary (but not NULL).
|
|
* Then, in the second phase, the handler is called for each row of real values.
|
|
*/
|
|
typedef void U_CALLCONV
|
|
UPVecCompactHandler(void *context,
|
|
UChar32 start, UChar32 end,
|
|
int32_t rowIndex, uint32_t *row, int32_t columns,
|
|
UErrorCode *pErrorCode);
|
|
|
|
U_CAPI void U_EXPORT2
|
|
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
|
|
|
|
/*
|
|
* Get the vectors array after calling upvec_compact().
|
|
* The caller must not modify nor release the returned array.
|
|
* Returns NULL if called before upvec_compact().
|
|
*/
|
|
U_CAPI const uint32_t * U_EXPORT2
|
|
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
|
|
|
|
/*
|
|
* Get a clone of the vectors array after calling upvec_compact().
|
|
* The caller owns the returned array and must uprv_free() it.
|
|
* Returns NULL if called before upvec_compact().
|
|
*/
|
|
U_CAPI uint32_t * U_EXPORT2
|
|
upvec_cloneArray(const UPropsVectors *pv,
|
|
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
|
|
|
|
/*
|
|
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
|
|
* vectors array, and freeze the trie.
|
|
*/
|
|
U_CAPI UTrie2 * U_EXPORT2
|
|
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
|
|
|
|
struct UPVecToUTrie2Context {
|
|
UTrie2 *trie;
|
|
int32_t initialValue;
|
|
int32_t errorValue;
|
|
int32_t maxValue;
|
|
};
|
|
typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
|
|
|
|
/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
|
|
U_CAPI void U_CALLCONV
|
|
upvec_compactToUTrie2Handler(void *context,
|
|
UChar32 start, UChar32 end,
|
|
int32_t rowIndex, uint32_t *row, int32_t columns,
|
|
UErrorCode *pErrorCode);
|
|
|
|
U_CDECL_END
|
|
|
|
#endif
|