/*
* Characters classes
* Copyright
* (C) 2015 Joseph H. Allen
*
* This file is part of JOE (Joe's Own Editor)
*/
#include "types.h"
/* Sort an array of struct intervals */
static int itest(const void *ia, const void *ib)
{
struct interval *a = (struct interval *)ia;
struct interval *b = (struct interval *)ib;
if (a->first > b->first)
return 1;
else if (a->first < b->first)
return -1;
else
return 0;
}
void interval_sort(struct interval *array, ptrdiff_t num)
{
jsort(array, num, SIZEOF(struct interval), itest);
}
/* Test if character ch is in one of the struct intervals in array using binary search.
* Returns index to interval, or -1 if none found. */
ptrdiff_t interval_test(struct interval *array, ptrdiff_t size, int ch)
{
if (size) {
ptrdiff_t min = 0;
ptrdiff_t mid;
ptrdiff_t max = size - 1;
if (ch < array[min].first || ch > array[max].last)
goto no_match;
while (max >= min) {
mid = (min + max) / 2;
if (ch > array[mid].last)
min = mid + 1;
else if (ch < array[mid].first)
max = mid - 1;
else
return mid;
}
}
no_match:
return -1;
}
/* Return true if character ch is in radix tree r */
int rset_lookup(struct Rset *r, int ch)
{
int a = (TOPMASK & (ch >> TOPSHIFT));
int b = (SECONDMASK & (ch >> SECONDSHIFT));
int c = (THIRDMASK & (ch >> THIRDSHIFT));
int d = (LEAFMASK & (ch >> LEAFSHIFT));
if (a >= TOPSIZE)
return 0;
if (a || b) { /* Full lookup for character >= 512 */
int idx = r->top.entry[a];
if (idx != -1) {
idx = r->second.table.b[idx].entry[b];
if (idx != -1) {
idx = r->third.table.c[idx].entry[c];
return ((1 << d) & idx) != 0;
}
}
} else { /* Quick lookup for character < 512 */
int idx = r->mid.entry[c];
return ((1 << d) & idx) != 0;
}
return 0;
}
/* Same as above, but can be be called before rset_opt() */
int rset_lookup_unopt(struct Rset *r, int ch)
{
int a = (TOPMASK & (ch >> TOPSHIFT));
int b = (SECONDMASK & (ch >> SECONDSHIFT));
int c = (THIRDMASK & (ch >> THIRDSHIFT));
int d = (LEAFMASK & (ch >> LEAFSHIFT));
int idx;
if (a >= TOPSIZE)
return 0;
idx = r->top.entry[a];
if (idx != -1) {
idx = r->second.table.b[idx].entry[b];
if (idx != -1) {
idx = r->third.table.c[idx].entry[c];
return ((1 << d) & idx) != 0;
}
}
return 0;
}
void rset_init(struct Rset *r)
{
int x;
for (x = 0; x != TOPSIZE; ++x)
r->top.entry[x] = -1;
r->second.alloc = 0;
r->second.size = 1;
r->second.table.b = (struct Mid *)joe_malloc(r->second.size * SIZEOF(struct Mid));
r->third.alloc = 0;
r->third.size = 1;
r->third.table.c = (struct Mid *)joe_malloc(r->third.size * SIZEOF(struct Mid));
}
void rset_clr(struct Rset *r)
{
joe_free(r->second.table.b);
joe_free(r->third.table.c);
}
static int rset_alloc(struct Level *l, int levelno)
{
int x;
if (l->alloc == l->size) {
l->size *= 2;
switch (levelno) {
case 1: {
l->table.b = (struct Mid *)joe_realloc(l->table.b, l->size * SIZEOF(struct Mid));
break;
} case 2: {
l->table.c = (struct Mid *)joe_realloc(l->table.c, l->size * SIZEOF(struct Mid));
break;
}
}
}
switch (levelno) {
case 1: {
for (x = 0; x != SECONDSIZE; ++x)
l->table.b[l->alloc].entry[x] = -1;
break;
} case 2: {
for (x = 0; x != THIRDSIZE; ++x)
l->table.c[l->alloc].entry[x] = 0;
break;
}
}
return l->alloc++;
}
void rset_add(struct Rset *r, int ch, int che)
{
int a = (TOPMASK & (ch >> TOPSHIFT));
int b = (THIRDMASK & (ch >> SECONDSHIFT));
int c = (SECONDMASK & (ch >> THIRDSHIFT));
int d = (LEAFMASK & (ch >> LEAFSHIFT));
int ib;
int ic;
while (ch <= che) {
if (a >= TOPSIZE)
return;
ib = r->top.entry[a];
if (ib == -1) {
r->top.entry[a] = ib = rset_alloc(&r->second, 1);
}
while (ch <= che) {
ic = r->second.table.b[ib].entry[b];
if (ic == -1) {
r->second.table.b[ib].entry[b] = ic = rset_alloc(&r->third, 2);
}
while (ch <= che) {
r->third.table.c[ic].entry[c] |= (1 << d);
++ch;
if (++d == LEAFSIZE) {
d = 0;
if (++c == THIRDSIZE) {
c = 0;
break;
}
}
}
if (++b == SECONDSIZE) {
b = 0;
break;
}
}
++a;
}
}
/* Optimize radix tree: setup mid */
void rset_opt(struct Rset *r)
{
int x;
int idx;
/* Set up mid table */
for (x = 0; x != THIRDSIZE; ++x)
r->mid.entry[x] = 0;
idx = r->top.entry[0];
if (idx != -1) {
idx = r->second.table.b[idx].entry[0];
if (idx != -1) {
mcpy(r->mid.entry, r->third.table.c[idx].entry, SIZEOF(r->mid.entry));
}
}
}
void rset_set(struct Rset *r, struct interval *array, ptrdiff_t size)
{
ptrdiff_t y;
for (y = 0; y != size; ++y) {
rset_add(r, array[y].first, array[y].last);
/* int x;
for (x = array[y].first; x <= array[y].last; ++x)
rset_add(r, x); */
}
}
void rset_show(struct Rset *r)
{
int first = -2;
int last = -2;
int a;
for (a = 0; a != TOPSIZE; ++a) {
int ib = r->top.entry[a];
if (ib != -1) {
int b;
for (b = 0; b != SECONDSIZE; ++b) {
int ic = r->second.table.b[ib].entry[b];
if (ic != -1) {
int c;
for (c = 0; c != THIRDSIZE; ++c) {
int id = r->third.table.c[ic].entry[c];
int d;
for (d = 0; d != LEAFSIZE; ++d) {
if (id & (1 << d)) {
int ch = (a << TOPSHIFT) + (b << SECONDSHIFT) + (c << THIRDSHIFT) + d;
if (ch == last + 1) {
last = ch;
} else if (first != -2) {
printf(" { 0x%4.4X, 0x%4.4X },\n", first, last);
first = last = ch;
} else {
first = last = ch;
}
}
}
}
}
}
}
}
if (first != -2)
printf(" { 0x%4.4X, 0x%4.4X },\n", first, last);
}
void cclass_init(struct Cclass *m)
{
m->size = 0;
m->len = 0;
m->intervals = 0;
}
void cclass_clr(struct Cclass *m)
{
if (m->intervals)
joe_free(m->intervals);
m->size = 0;
m->len = 0;
m->intervals = 0;
}
/* Delete range from character class at position x
* x is in range 0 to (m->len - 1), inclusive.
*/
static void cclass_del(struct Cclass *m, int x)
{
mmove(m->intervals + x, m->intervals + x + 1, SIZEOF(struct interval) * (m->len - (x + 1)));
--m->len;
}
/* We are about to insert: expand array if necessary */
static void cclass_grow(struct Cclass *m)
{
if (m->len == m->size) {
if (!m->size) {
m->size = 1;
m->intervals = (struct interval *)joe_malloc(SIZEOF(struct interval) * (m->size));
} else {
m->size *= 2;
m->intervals = (struct interval *)joe_realloc(m->intervals, SIZEOF(struct interval) * (m->size));
}
}
}
/* Insert a range into a character class at position x
* x is in range 0 to m->len inclusive.
*/
static void cclass_ins(struct Cclass *m, int x, int first, int last)
{
cclass_grow(m);
mmove(m->intervals + x + 1, m->intervals + x, SIZEOF(struct interval) * (m->len - x));
++m->len;
m->intervals[x].first = first;
m->intervals[x].last = last;
}
/* Add a single range [first,last] into class m. The resulting m->intervals will
* be sorted and consist of non-overlapping, non-adjacent ranges.
*/
void cclass_add(struct Cclass *m, int first, int last)
{
int x;
/* If it's invalid, ignore */
if (last < first)
return;
for (x = 0; x != m->len; ++x) {
if (first > m->intervals[x].last + 1) {
/* intervals[x] is below new range, skip it. */
} else if (m->intervals[x].first > last + 1) {
/* intervals[x] is fully above new range, so insert new one here */
break;
} else if (m->intervals[x].first <= first) {
if (m->intervals[x].last >= last) { /* && m->intervals[x].first <= first */
/* Existing covers new: we're done. */
return;
} else { /* m->intervals[x].last < last && m->intervals[x].first <= first */
/* Enlarge new, delete existing */
first = m->intervals[x].first;
cclass_del(m, x);
--x;
}
} else { /* m->intervals[x].first > first */
if (m->intervals[x].last <= last) { /* && m->intervals[x].first <= first */
/* New fully covers existing, delete existing */
cclass_del(m, x);
--x;
} else { /* m->intervals[x].last > last && m->intervals[x].first > first */
/* Extend existing */
m->intervals[x].first = first;
return;
}
}
}
cclass_ins(m, x, first, last);
}
/* Merge class n into class m */
void cclass_union(struct Cclass *m, struct Cclass *n)
{
int x;
if (n)
for (x = 0; x != n->len; ++x)
cclass_add(m, n->intervals[x].first, n->intervals[x].last);
}
void cclass_merge(struct Cclass *m, struct interval *array, int len)
{
int x;
for (x = 0; x != len; ++x)
cclass_add(m, array[x].first, array[x].last);
}
/* Subtract a single range [first,last] from class m. The resulting m->array will
* be sorted and consist of non-overlapping, non-adjacent ranges.
*/
void cclass_sub(struct Cclass *m, int first, int last)
{
int x;
/* If it's invalid, ignore */
if (last < first)
return;
for (x = 0; x != m->len; ++x) {
if (first > m->intervals[x].last) {
/* intervals[x] is below range, skip it. */
} else if (m->intervals[x].first > last) {
/* intervals[x] is fully above new range, we're done */
break;
} else if (first <= m->intervals[x].first) {
if (last >= m->intervals[x].last) { /* && first <= m->intervals[x].first */
/* Range fully covers entry, delete it */
cclass_del(m, x);
--x;
} else { /* last < m->intervals[x].last && first <= m->intervals[x].first */
/* Range cuts off bottom of entry */
m->intervals[x].first = last + 1;
}
} else { /* first > m->intervals[x].first */
if (last >= m->intervals[x].last) { /* && first > m->intervals[x].first */
/* Range cuts off top of entry */
m->intervals[x].last = first - 1;
} else { /* last < m->intervals[x].last && first > m->intervals[x].first */
/* Range is in middle of entry, split it */
cclass_ins(m, x, m->intervals[x].first, first - 1);
m->intervals[x + 1].first = last + 1;
++x;
}
}
}
}
/* Remove any parts of m which also appear in n */
void cclass_diff(struct Cclass *m, struct Cclass *n)
{
int x;
if (n)
for (x = 0; x != n->len; ++x)
cclass_sub(m, n->intervals[x].first, n->intervals[x].last);
}
/* Compute inverse of class m */
#define UNICODE_LAST 0x10FFFF
void cclass_inv(struct Cclass *m)
{
// printf("\r\nBefore:\n");
// cclass_show(m);
if (m->len && !m->intervals[0].first) {
/* Starts at 0 */
int x;
int last = m->intervals[0].last;
for (x = 0; x != m->len - 1; ++x) {
m->intervals[x].first = last + 1;
m->intervals[x].last = m->intervals[x + 1].first - 1;
last = m->intervals[x + 1].last;
}
if (last == UNICODE_LAST) {
--m->len; /* Delete last one */
} else {
m->intervals[x].first = last + 1;
m->intervals[x].last = UNICODE_LAST;
}
} else {
/* Does not start at 0 */
int x;
int first = 0;
for (x = 0; x != m->len; ++x) {
int new_first = m->intervals[x].last + 1;
m->intervals[x].last = m->intervals[x].first - 1;
m->intervals[x].first = first;
first = new_first;
}
if (first != UNICODE_LAST) {
cclass_grow(m);
m->intervals[x].first = first;
m->intervals[x].last = UNICODE_LAST;
++m->len;
}
}
// printf("\r\nAfter:\n");
// cclass_show(m);
// sleep(1);
}
int cclass_lookup_unopt(struct Cclass *m, int ch)
{
return interval_test(m->intervals, m->len, ch) != -1;
}
void cclass_opt(struct Cclass *m)
{
rset_init(m->rset);
rset_set(m->rset, m->intervals, m->len);
rset_opt(m->rset);
}
int cclass_lookup(struct Cclass *m, int ch)
{
return rset_lookup(m->rset, ch);
}
void cclass_show(struct Cclass *m)
{
int x;
int first = 0;
for (x = 0; x != m->len; ++x) {
if (!first)
first = 1;
else
printf(" ");
printf("[%x..%x]", (unsigned)m->intervals[x].first, (unsigned)m->intervals[x].last);
}
printf("\n");
}