ICU 67.1  67.1
rbbi.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 1999-2016 International Business Machines Corporation *
6 * and others. All rights reserved. *
7 ***************************************************************************
8 
9 **********************************************************************
10 * Date Name Description
11 * 10/22/99 alan Creation.
12 * 11/11/99 rgillam Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
21 #if U_SHOW_CPLUSPLUS_API
22 
28 #if !UCONFIG_NO_BREAK_ITERATION
29 
30 #include "unicode/brkiter.h"
31 #include "unicode/udata.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/schriter.h"
34 
35 U_NAMESPACE_BEGIN
36 
38 class LanguageBreakEngine;
39 struct RBBIDataHeader;
40 class RBBIDataWrapper;
41 class UnhandledEngine;
42 class UStack;
43 
56 
57 private:
62  UText fText;
63 
64 #ifndef U_HIDE_INTERNAL_API
65 public:
66 #endif /* U_HIDE_INTERNAL_API */
72  RBBIDataWrapper *fData;
73 private:
74 
79  int32_t fPosition;
80 
84  int32_t fRuleStatusIndex;
85 
89  class BreakCache;
90  BreakCache *fBreakCache;
91 
96  class DictionaryCache;
97  DictionaryCache *fDictionaryCache;
98 
106  UStack *fLanguageBreakEngines;
107 
115  UnhandledEngine *fUnhandledBreakEngine;
116 
122  uint32_t fDictionaryCharCount;
123 
129  CharacterIterator *fCharIter;
130 
136  StringCharacterIterator fSCharIter;
137 
141  UBool fDone;
142 
143  //=======================================================================
144  // constructors
145  //=======================================================================
146 
157  RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
158 
160  friend class RBBIRuleBuilder;
162  friend class BreakIterator;
163 
164 public:
165 
171 
179 
189  UParseError &parseError,
190  UErrorCode &status);
191 
215  RuleBasedBreakIterator(const uint8_t *compiledRules,
216  uint32_t ruleLength,
217  UErrorCode &status);
218 
232 
238 
247 
256  virtual UBool operator==(const BreakIterator& that) const;
257 
265  inline UBool operator!=(const BreakIterator& that) const;
266 
277  virtual RuleBasedBreakIterator* clone() const;
278 
284  virtual int32_t hashCode(void) const;
285 
291  virtual const UnicodeString& getRules(void) const;
292 
293  //=======================================================================
294  // BreakIterator overrides
295  //=======================================================================
296 
322  virtual CharacterIterator& getText(void) const;
323 
324 
339  virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
340 
348  virtual void adoptText(CharacterIterator* newText);
349 
361  virtual void setText(const UnicodeString& newText);
362 
376  virtual void setText(UText *text, UErrorCode &status);
377 
383  virtual int32_t first(void);
384 
390  virtual int32_t last(void);
391 
402  virtual int32_t next(int32_t n);
403 
409  virtual int32_t next(void);
410 
416  virtual int32_t previous(void);
417 
425  virtual int32_t following(int32_t offset);
426 
434  virtual int32_t preceding(int32_t offset);
435 
444  virtual UBool isBoundary(int32_t offset);
445 
454  virtual int32_t current(void) const;
455 
456 
488  virtual int32_t getRuleStatus() const;
489 
513  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
514 
526  virtual UClassID getDynamicClassID(void) const;
527 
539  static UClassID U_EXPORT2 getStaticClassID(void);
540 
541 #ifndef U_FORCE_HIDE_DEPRECATED_API
568  virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
569  int32_t &BufferSize,
570  UErrorCode &status);
571 #endif // U_FORCE_HIDE_DEPRECATED_API
572 
590  virtual const uint8_t *getBinaryRules(uint32_t &length);
591 
618 
619 
620 private:
621  //=======================================================================
622  // implementation
623  //=======================================================================
629  void reset(void);
630 
635  void init(UErrorCode &status);
636 
646  int32_t handleSafePrevious(int32_t fromPosition);
647 
660  int32_t handleNext();
661 
662 
669  const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
670 
671  public:
672 #ifndef U_HIDE_INTERNAL_API
677  void dumpCache();
678 
683  void dumpTables();
684 
685 #endif /* U_HIDE_INTERNAL_API */
686 };
687 
688 //------------------------------------------------------------------------------
689 //
690 // Inline Functions Definitions ...
691 //
692 //------------------------------------------------------------------------------
693 
695  return !operator==(that);
696 }
697 
698 U_NAMESPACE_END
699 
700 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
701 
702 #endif /* U_SHOW_CPLUSPLUS_API */
703 
704 #endif
C++ API: Break Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:106
UBool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:135
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:55
virtual void setText(UText *text, UErrorCode &status)
Reset the break iterator to operate over the text represented by the UText.
RBBIDataWrapper * fData
The rule data for this BreakIterator instance.
Definition: rbbi.h:72
virtual UText * getUText(UText *fillIn, UErrorCode &status) const
Get a UText for the text being analyzed.
virtual CharacterIterator & getText(void) const
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
virtual UClassID getDynamicClassID(void) const
Returns a unique class ID POLYMORPHICALLY.
void dumpTables()
Debugging function only.
virtual RuleBasedBreakIterator * clone() const
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
virtual int32_t previous(void)
Moves the iterator backwards, to the last boundary preceding this one.
virtual int32_t current(void) const
Returns the current iteration position.
virtual int32_t next(void)
Advances the iterator to the next boundary position.
void dumpCache()
Debugging function only.
virtual int32_t getRuleStatus() const
Return the status tag from the break rule that determined the boundary at the current iteration posit...
RuleBasedBreakIterator()
Default constructor.
virtual void setText(const UnicodeString &newText)
Set the iterator to analyze a new piece of text.
virtual RuleBasedBreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)
Deprecated functionality.
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
virtual UBool operator==(const BreakIterator &that) const
Equality operator.
static UClassID getStaticClassID(void)
Returns the class ID for this class.
virtual UBool isBoundary(int32_t offset)
Returns true if the specified position is a boundary position.
virtual int32_t preceding(int32_t offset)
Sets the iterator to refer to the last boundary position before the specified position.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
virtual int32_t next(int32_t n)
Advances the iterator either forward or backward the specified number of steps.
virtual const UnicodeString & getRules(void) const
Returns the description used to create this iterator.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
virtual int32_t following(int32_t offset)
Sets the iterator to refer to the first boundary position following the specified position.
virtual int32_t first(void)
Sets the current iteration position to the beginning of the text, position zero.
virtual int32_t last(void)
Sets the current iteration position to the end of the text.
virtual ~RuleBasedBreakIterator()
Destructor.
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
virtual void adoptText(CharacterIterator *newText)
Set the iterator to analyze a new piece of text.
virtual int32_t hashCode(void) const
Compute a hash code for this BreakIterator.
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
Definition: schriter.h:48
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:347
C API: Parse Error Information.
C++ API: String Character Iterator.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1328
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
Definition: udata.h:158
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:300