/*
* UTF-8 Utilities
* Copyright
* (C) 2004 Joseph H. Allen
*
* This file is part of JOE (Joe's Own Editor)
*/
/* UTF-8 Encoder
*
* c is Unicode character.
* buf is 7 byte buffer- utf-8 coded character is written to this followed by a 0 termination.
* returns length (not including terminator).
*/
ptrdiff_t utf8_encode(char *buf,int c);
/* UTF-8 decoder state machine */
struct utf8_sm {
char buf[8]; /* Record of sequence */
ptrdiff_t ptr; /* Record pointer */
int state; /* Current state. 0 = idle, anything else is no. of chars left in sequence */
int accu; /* Character accumulator */
};
/* UTF-8 Decoder
*
* Returns 0 - 7FFFFFFF: decoded character
* -1 -257: character accepted, nothing decoded yet.
* -2 -258: incomplete sequence
* -3 -259: no sequence started, but character is between 128 - 191, 254 or 255
*/
#define UTF8_ACCEPTED -257
#define UTF8_INCOMPLETE -258
#define UTF8_BAD -259
int utf8_decode(struct utf8_sm *utf8_sm,char c);
int utf8_decode_string(const char *s);
int utf8_decode_fwrd(const char **p,ptrdiff_t *plen);
/* Initialize state machine */
void utf8_init(struct utf8_sm *utf8_sm);
/* Get next character from string and advance it, locale dependent */
int fwrd_c(struct charmap *map, const char **s, ptrdiff_t *len);
/* UTF-16 encoder
*
* c is Unicode character.
* buf is 4 byte buffer
*
* Returns length or UTF16_BAD for encode error.
* UTF16_BAD is returned if c is between 0xD800 - 0xDFFF, or > 0x10FFFF, or < 0.
*/
ptrdiff_t utf16_encode(char *buf, int c);
ptrdiff_t utf16r_encode(char *buf, int c);
struct utf16_sm {
int state;
};
/* UTF-16 Decoder
*
* Returns 0 - 10FFFF: decoded character
* -257: character accepted, nothing decoded yet.
* -258: incomplete sequence
* -259: no sequence started, but character is between 0xDC00 - 0xDFFF
*/
#define UTF16_ACCEPTED -257
#define UTF16_INCOMPLETE -258
#define UTF16_BAD -259
int utf16_decode(struct utf16_sm *sm, unsigned short c);
int utf16r_decode(struct utf16_sm *sm, unsigned short c);
/* Initialize state machine */
void utf16_init(struct utf16_sm *sm);