src/wordbreak.c File Reference
Implementation of the word breaking algorithm as described in Unicode Standard Annex 29.  
More...
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include "linebreak.h"
#include "linebreakdef.h"
#include "wordbreak.h"
#include "wordbreakdata.c"
| Defines | 
| #define | ARRAY_LEN(x)   (sizeof(x) / sizeof(x[0])) | 
| #define | IS_WB3ab(cls) | 
| Functions | 
| void | init_wordbreak (void) | 
|  | Initializes the wordbreak internals. 
 | 
| static enum WordBreakClass | get_char_wb_class (utf32_t ch, struct WordBreakProperties *wbp, size_t len) | 
|  | Gets the word breaking class of a character. 
 | 
| static void | set_brks_to (const void *s, char *brks, size_t posStart, size_t posEnd, size_t len, char brkType, get_next_char_t get_next_char) | 
|  | Sets the word break types to a specific value in a range. 
 | 
| static void | set_wordbreaks (const void *s, size_t len, const char *lang, char *brks, get_next_char_t get_next_char) | 
|  | Sets the word breaking information for a generic input string. 
 | 
| void | set_wordbreaks_utf8 (const utf8_t *s, size_t len, const char *lang, char *brks) | 
|  | Sets the word breaking information for a UTF-8 input string. 
 | 
| void | set_wordbreaks_utf16 (const utf16_t *s, size_t len, const char *lang, char *brks) | 
|  | Sets the word breaking information for a UTF-16 input string. 
 | 
| void | set_wordbreaks_utf32 (const utf32_t *s, size_t len, const char *lang, char *brks) | 
|  | Sets the word breaking information for a UTF-32 input string. 
 | 
Detailed Description
Implementation of the word breaking algorithm as described in Unicode Standard Annex 29. 
- Version:
- 2.4, 2013/09/28 
- Author:
- Tom Hacohen 
Define Documentation
      
        
          | #define ARRAY_LEN | ( | x |  | ) | (sizeof(x) / sizeof(x[0])) | 
      
 
 
Function Documentation
Gets the word breaking class of a character. 
- Parameters:
- 
  
    |  | ch | character to check |  |  | wbp | pointer to the wbp breaking properties array |  |  | len | size of the wbp array in number of items |  
 
- Returns:
- the word breaking class if found; WBP_Anyotherwise
 
 
      
        
          | void init_wordbreak | ( | void |  | ) |  | 
      
 
Initializes the wordbreak internals. 
It currently does nothing, but it may in the future. 
 
 
      
        
          | static void set_brks_to | ( | const void * | s, | 
        
          |  |  | char * | brks, | 
        
          |  |  | size_t | posStart, | 
        
          |  |  | size_t | posEnd, | 
        
          |  |  | size_t | len, | 
        
          |  |  | char | brkType, | 
        
          |  |  | get_next_char_t | get_next_char |  | 
        
          |  | ) |  |  |  [static] | 
      
 
Sets the word break types to a specific value in a range. 
It sets the inside chars to WORDBREAK_INSIDEACHAR and the rest to brkType. Assumes brks is initialized - all the cells with WORDBREAK_NOBREAK are cells that we really don't want to break after.
- Parameters:
- 
  
    | [in] | s | input string |  | [out] | brks | breaks array to fill |  | [in] | posStart | start position |  | [in] | posEnd | end position (exclusive) |  | [in] | len | length of the string |  | [in] | brkType | breaks type to use |  | [in] | get_next_char | function to get the next UTF-32 character |  
 
 
 
      
        
          | static void set_wordbreaks | ( | const void * | s, | 
        
          |  |  | size_t | len, | 
        
          |  |  | const char * | lang, | 
        
          |  |  | char * | brks, | 
        
          |  |  | get_next_char_t | get_next_char |  | 
        
          |  | ) |  |  |  [static] | 
      
 
Sets the word breaking information for a generic input string. 
- Parameters:
- 
  
  
 
 
      
        
          | void set_wordbreaks_utf16 | ( | const utf16_t * | s, | 
        
          |  |  | size_t | len, | 
        
          |  |  | const char * | lang, | 
        
          |  |  | char * | brks |  | 
        
          |  | ) |  |  |  | 
      
 
Sets the word breaking information for a UTF-16 input string. 
- Parameters:
- 
  
  
 
 
      
        
          | void set_wordbreaks_utf32 | ( | const utf32_t * | s, | 
        
          |  |  | size_t | len, | 
        
          |  |  | const char * | lang, | 
        
          |  |  | char * | brks |  | 
        
          |  | ) |  |  |  | 
      
 
Sets the word breaking information for a UTF-32 input string. 
- Parameters:
- 
  
  
 
 
      
        
          | void set_wordbreaks_utf8 | ( | const utf8_t * | s, | 
        
          |  |  | size_t | len, | 
        
          |  |  | const char * | lang, | 
        
          |  |  | char * | brks |  | 
        
          |  | ) |  |  |  | 
      
 
Sets the word breaking information for a UTF-8 input string. 
- Parameters:
-