/* * Characters maps * Copyright * (C) 1992 Joseph H. Allen * * This file is part of JOE (Joe's Own Editor) */ #include "types.h" struct interval_list *mkinterval(struct interval_list *next, int first, int last, void *map) { struct interval_list *interval_list = (struct interval_list *)joe_malloc(SIZEOF(struct interval_list)); interval_list->next = next; interval_list->interval.first = first; interval_list->interval.last = last; interval_list->map = map; return interval_list; } void rminterval(struct interval_list *interval_list) { while (interval_list) { struct interval_list *n = interval_list->next; free(interval_list); interval_list = n; } } /* Add a single character range to an interval list */ struct interval_list *interval_add(struct interval_list *interval_list, int first, int last, void *map) { struct interval_list *e, **p; for (p = &interval_list; *p;) { e = *p; if (first > e->interval.last + 1 || (first > e->interval.last && (map != e->map))) { /* e is below new range, skip it */ p = &e->next; } else if (e->interval.first > last + 1 || (e->interval.first > last && (map != e->map))) { /* e is fully above new range, so insert new one here */ break; } else if (e->map == map) { /* merge e into new */ if (e->interval.first <= first) { if (e->interval.last >= last) { /* && e->interval.first <= first */ /* Existing covers new: we're done */ return interval_list; } else { /* e->interval.last < last && e->interval.first <= first */ /* Enlarge new, delete existing */ first = e->interval.first; *p = e->next; e->next = 0; rminterval(e); } } else { /* e->interval.first > first */ if (e->interval.last <= last) { /* && e->interval.first > first */ /* New fully covers existing, delete existing */ *p = e->next; e->next = 0; rminterval(e); } else { /* e->interval.last > last && e->interval.first > first */ /* Extend existing */ e->interval.first = first; return interval_list; } } } else { /* replace e with new */ if (e->interval.first < first) { if (e->interval.last <= last) { /* && e->interval.first < first */ /* Top part of existing get cut-off by new */ e->interval.last = first - 1; } else { /* e->interval.last > last && e->interval.first < first */ int org = e->interval.last; void *orgmap = e->map; e->interval.last = first - 1; p = &e->next; *p = mkinterval(*p, first, last, map); p = &(*p)->next; *p = mkinterval(*p, last + 1, org, orgmap); return interval_list; } } else { /* e->interval.first >= first */ if (e->interval.last <= last) { /* && e->interval.first >= first */ /* Delete existing */ *p = e->next; e->next = 0; rminterval(e); } else { /* e->interval.last > last && e->interval.first >= first */ e->interval.first = last + 1; break; } } } } *p = mkinterval(*p, first, last, map); return interval_list; } /* Add a list of intervals (typically representing a character class) to an interval list */ struct interval_list *interval_set(struct interval_list *interval_list, struct interval *list, int size, void *map) { int x; for (x = 0; x != size; ++x) interval_list = interval_add(interval_list, list[x].first, list[x].last, map); return interval_list; } void *interval_lookup(struct interval_list *list, void *dflt, int item) { while (list) { if (item >= list->interval.first && item <= list->interval.last) return list->map; list = list->next; } return dflt; } /* Build a cmap from an interval list */ void cmap_build(struct cmap *cmap, struct interval_list *list, void *dflt_map) { struct interval_list *l; int x; /* Record default */ cmap->dflt_map = dflt_map; for (x = 0; x != 128; ++x) cmap->direct_map[x] = dflt_map; /* Fill in direct map */ for (l = list; l; l = l->next) if (l->interval.first >= 128) break; else { int ch; for (ch = l->interval.first; ch < 128 && ch <= l->interval.last; ++ch) cmap->direct_map[ch] = l->map; } /* Skip over list items which are wholly in the direct map */ while (list && list->interval.last < 128) list = list -> next; /* Calculate size of array */ cmap->size = 0; for (l = list; l; l = l->next) ++cmap->size; if (cmap->size) { /* Allocate and populate array */ cmap->range_map = (struct interval_map *)joe_malloc(SIZEOF(struct interval_map) * cmap->size); x = 0; for (l = list; l; l = l->next) { cmap->range_map[x].interval.first = l->interval.first; cmap->range_map[x].interval.last = l->interval.last; cmap->range_map[x].map = l->map; if (cmap->range_map[x].interval.first < 128) cmap->range_map[x].interval.first = 128; ++x; } } else cmap->range_map = 0; } /* Clear cmap, free space */ void clr_cmap(struct cmap *cmap) { if (cmap->range_map) joe_free(cmap->range_map); cmap->range_map = 0; cmap->size = 0; } /* Lookup a character in a cmap, return its assigned mapping */ void *cmap_lookup(struct cmap *cmap, int ch) { if (ch < 128) return cmap->direct_map[ch]; if (cmap->size) { ptrdiff_t min = 0; ptrdiff_t mid; ptrdiff_t max = cmap->size - 1; if (ch < cmap->range_map[min].interval.first || ch > cmap->range_map[max].interval.last) goto no_match; while (max >= min) { mid = (min + max) / 2; if (ch > cmap->range_map[mid].interval.last) min = mid + 1; else if (ch < cmap->range_map[mid].interval.first) max = mid - 1; else return cmap->range_map[mid].map; } } no_match: return cmap->dflt_map; } #ifdef junk struct slist { struct slist *next; char *s; } *slist; char *find(char *s) { struct slist *l; for (l = slist; l; l = l->next) if (!zcmp(l->s, s)) return l->s; l = (struct slist *)joe_malloc(SIZEOF(struct slist)); l->next = slist; slist = l; l->s = zdup(s); return l->s; } int main(int argc, char *argv) { char buf[100]; struct interval_list *list = 0; while (gets(buf)) { if (buf[0] == 'a') { int first, last; int map; sscanf(buf + 1," %d %d %d",&first,&last,&map); printf("a %d %d %d\n", first, last, map); list = interval_add(list, first, last, map); } else if (buf[0] == 's') { struct interval_list *l; for (l = list; l; l = l->next) printf("%d %d %d\n",l->interval.first,l->interval.last,l->map); } } } #endif /* Radix tree maps */ void *rtree_lookup(struct Rtree *r, int ch) { int a = (TOPMASK & (ch >> TOPSHIFT)); int b = (SECONDMASK & (ch >> SECONDSHIFT)); int c = (THIRDMASK & (ch >> THIRDSHIFT)); int d = (LEAFMASK & (ch >> LEAFSHIFT)); if (a >= TOPSIZE) return NULL; if (a || b) { /* Full lookup for character >= 512 */ int idx = r->top.entry[a]; if (idx != -1) { idx = r->second.table.b[idx].entry[b]; if (idx != -1) { idx = r->third.table.c[idx].entry[c]; if (idx != -1) return r->leaf.table.d[idx].entry[d]; } } } else { /* Quick lookup for character < 512 */ int idx = r->mid.entry[c]; if (idx != -1) return r->leaf.table.d[idx].entry[d]; } return NULL; } void *rtree_lookup_unopt(struct Rtree *r, int ch) { int a = (TOPMASK & (ch >> TOPSHIFT)); int b = (SECONDMASK & (ch >> SECONDSHIFT)); int c = (THIRDMASK & (ch >> THIRDSHIFT)); int d = (LEAFMASK & (ch >> LEAFSHIFT)); int idx; if (a >= TOPSIZE) return NULL; idx = r->top.entry[a]; if (idx != -1) { idx = r->second.table.b[idx].entry[b]; if (idx != -1) { idx = r->third.table.c[idx].entry[c]; if (idx != -1) return r->leaf.table.d[idx].entry[d]; } } return NULL; } void rtree_init(struct Rtree *r) { int x; for (x = 0; x != TOPSIZE; ++x) r->top.entry[x] = -1; r->second.alloc = 0; r->second.size = 1; r->second.table.b = (struct Mid *)joe_malloc(r->second.size * SIZEOF(struct Mid)); r->third.alloc = 0; r->third.size = 1; r->third.table.c = (struct Mid *)joe_malloc(r->third.size * SIZEOF(struct Mid)); r->leaf.alloc = 0; r->leaf.size = 1; r->leaf.table.d = (struct Leaf *)joe_malloc(r->leaf.size * SIZEOF(struct Leaf)); } void rtree_clr(struct Rtree *r) { joe_free(r->second.table.b); joe_free(r->third.table.c); joe_free(r->leaf.table.d); } static int rtree_alloc(struct Level *l, int levelno) { int x; if (l->alloc == l->size) { l->size *= 2; switch (levelno) { case 1: { l->table.b = (struct Mid *)joe_realloc(l->table.b, l->size * SIZEOF(struct Mid)); break; } case 2: { l->table.c = (struct Mid *)joe_realloc(l->table.c, l->size * SIZEOF(struct Mid)); break; } case 3: { l->table.d = (struct Leaf *)joe_realloc(l->table.d, l->size * SIZEOF(struct Leaf)); break; } } } switch (levelno) { case 1: { for (x = 0; x != SECONDSIZE; ++x) l->table.b[l->alloc].entry[x] = -1; break; } case 2: { for (x = 0; x != THIRDSIZE; ++x) l->table.c[l->alloc].entry[x] = -1; break; } case 3: { for (x = 0; x != LEAFSIZE; ++x) l->table.d[l->alloc].entry[x] = NULL; break; } } return l->alloc++; } void rtree_add(struct Rtree *r, int ch, int che, void *map) { int a = (TOPMASK & (ch >> TOPSHIFT)); int b = (THIRDMASK & (ch >> SECONDSHIFT)); int c = (SECONDMASK & (ch >> THIRDSHIFT)); int d = (LEAFMASK & (ch >> LEAFSHIFT)); int ib; int ic; int id; while (ch <= che) { if (a >= TOPSIZE) return; ib = r->top.entry[a]; if (ib == -1) { r->top.entry[a] = ib = rtree_alloc(&r->second, 1); } while (ch <= che) { ic = r->second.table.b[ib].entry[b]; if (ic == -1) { r->second.table.b[ib].entry[b] = ic = rtree_alloc(&r->third, 2); } while (ch <= che) { id = r->third.table.c[ic].entry[c]; if (id == -1) { r->third.table.c[ic].entry[c] = id = rtree_alloc(&r->leaf, 3); } while (ch <= che) { r->leaf.table.d[id].entry[d] = map; ++ch; if (++d == LEAFSIZE) { d = 0; break; } } if (++c == THIRDSIZE) { c = 0; break; } } if (++b == SECONDSIZE) { b = 0; break; } } ++a; } } /* Optimize radix tree: de-duplicate leaf nodes and setup mid */ struct rhentry { struct rhentry *next; struct Leaf *leaf; int idx; }; static ptrdiff_t rhhash(struct Leaf *l) { ptrdiff_t hval = 0; int x; for (x = 0; x != LEAFSIZE; ++x) hval = (hval << 4) + (hval >> 28) + (ptrdiff_t)l->entry[x]; return hval; } void rtree_opt(struct Rtree *r) { int idx; int x; int rhsize; struct rhentry **rhtable; int *equiv; int *repl; int dupcount; int newalloc; struct Leaf *l; /* De-duplicate leaf nodes (it's not worth bothering with interior nodes) */ equiv = joe_malloc(SIZEOF(int) * r->leaf.alloc); repl = joe_malloc(SIZEOF(int) * r->leaf.alloc); /* Create hash table index of all the leaf nodes */ dupcount = 0; rhsize = 1024; rhtable = joe_calloc(SIZEOF(struct rhentry *), rhsize); for (x = 0; x != r->leaf.alloc; ++x) { struct rhentry *rh; idx = ((rhsize - 1) & rhhash(r->leaf.table.d + x)); /* Already exists? */ for (rh = rhtable[idx]; rh; rh = rh->next) if (!memcmp(rh->leaf, r->leaf.table.d + x, SIZEOF(struct Leaf))) break; if (rh) { equiv[x] = rh->idx; ++dupcount; } else { equiv[x] = -1; rh = (struct rhentry *)joe_malloc(SIZEOF(struct rhentry)); rh->next = rhtable[idx]; rh->idx = x; rh->leaf = r->leaf.table.d + x; rhtable[idx] = rh; } } /* Free hash table */ for (x = 0; x != 1024; ++x) { struct rhentry *rh, *nh; for (rh = rhtable[x]; rh; rh = nh) { nh = rh->next; joe_free(rh); } } joe_free(rhtable); /* Create new leaf table */ newalloc = r->leaf.alloc - dupcount; l = joe_malloc(SIZEOF(struct Leaf) * newalloc); /* Copy entries */ idx = 0; for (x = 0; x != r->leaf.alloc; ++x) if (equiv[x] == -1) { mcpy(l + idx, r->leaf.table.d + x, sizeof(struct Leaf)); repl[x] = idx; ++idx; } for (x = 0; x != r->leaf.alloc; ++x) if (equiv[x] != -1) { repl[x] = repl[equiv[x]]; } /* Install new table */ joe_free(r->leaf.table.d); r->leaf.table.d = l; r->leaf.alloc = newalloc; r->leaf.size = newalloc; /* Remap third level */ for (x = 0; x != r->third.alloc; ++x) for (idx = 0; idx != THIRDSIZE; ++idx) if (r->third.table.c[x].entry[idx] != -1) r->third.table.c[x].entry[idx] = repl[r->third.table.c[x].entry[idx]]; joe_free(equiv); joe_free(repl); /* Set up mid table */ for (x = 0; x != THIRDSIZE; ++x) r->mid.entry[x] = -1; idx = r->top.entry[0]; if (idx != -1) { idx = r->second.table.b[idx].entry[0]; if (idx != -1) { mcpy(r->mid.entry, r->third.table.c[idx].entry, SIZEOF(r->mid.entry)); } } } void rtree_set(struct Rtree *r, struct interval *array, ptrdiff_t len, void *map) { ptrdiff_t y; for (y = 0; y != len; ++y) { rtree_add(r, array[y].first, array[y].last, map); /* int x; for (x = array[y].first; x <= array[y].last; ++x) rtree_add(r, x, map); */ } }