/* * Character classes and character maps * Copyright * (C) 2015 Joseph H. Allen * * This file is part of JOE (Joe's Own Editor) */ /* An interval. A character matches if it's in the range. An array of these can be used to define a character class. */ struct interval { int first; int last; }; /* Sort an struct interval array */ void interval_sort(struct interval *array, ptrdiff_t size); /* Test if character is in a sorted interval array using binary search. Returns -1 if not found, or index to matching struct interval */ ptrdiff_t interval_test(struct interval *array, ptrdiff_t size, int ch); /* An interval list item. This is one implementation of character -> pointer maps. */ struct interval_list { struct interval_list *next; /* Next item in list */ struct interval interval; /* Range of characters */ void *map; }; struct interval_list *mkinterval(struct interval_list *next, int first, int last, void *map); void rminterval(struct interval_list *item); /* Add a single interval to an interval list */ struct interval_list *interval_add(struct interval_list *interval_list, int first, int last, void *map); /* Add set of intervals (a character class) to an interval list */ struct interval_list *interval_set(struct interval_list *list, struct interval *array, int size, void *map); /* Look up single character in an interval list, return what it's mapped to */ void *interval_lookup(struct interval_list *list, void *dflt, int ch); /* Show an interval list */ void interval_show(struct interval_list *list); /* A character classes and maps implemented as radix trees */ #define LEAFSIZE 16 #define LEAFMASK 0xF #define LEAFSHIFT 0 #define THIRDSIZE 32 #define THIRDMASK 0x1f #define THIRDSHIFT 4 #define SECONDSIZE 32 #define SECONDMASK 0x1f #define SECONDSHIFT 9 #define TOPSIZE 68 #define TOPMASK 0x7f #define TOPSHIFT 14 struct First { short entry[TOPSIZE]; }; struct Mid { short entry[SECONDSIZE]; /* THIRDSIZE too */ }; struct Leaf { void *entry[LEAFSIZE]; int refcount; /* No. references */ }; struct Ileaf { int entry[LEAFSIZE]; int refcount; }; struct Level { int alloc; /* Allocation index */ int size; /* Malloc size */ union { struct Mid *b; struct Mid *c; struct Leaf *d; struct Ileaf *e; } table; }; /* A radix tree bit-map for character classes */ struct Rset { struct First top; struct Level second; struct Mid mid; struct Level third; }; void rset_init(struct Rset *r); void rset_clr(struct Rset *r); int rset_lookup(struct Rset *r, int ch); int rset_lookup_unopt(struct Rset *r, int ch); void rset_add(struct Rset *r, int ch, int che); void rset_opt(struct Rset *r); void rset_set(struct Rset *r, struct interval *array, ptrdiff_t size); void rset_show(struct Rset *r); /* A radix tree map: (Unicode character) -> (void *) or (Unicode character) -> (int) */ struct Rtree { struct First top; struct Level second; struct Mid mid; struct Level third; struct Level leaf; }; /* Functions for character -> pointer maps */ void rtree_init(struct Rtree *r); void rtree_clr(struct Rtree *r); void *rtree_lookup(struct Rtree *r, int ch); void *rtree_lookup_unopt(struct Rtree *r, int ch); void rtree_add(struct Rtree *r, int ch, int che, void *map); void rtree_opt(struct Rtree *r); void rtree_set(struct Rtree *r, struct interval *array, ptrdiff_t len, void *map); void rtree_build(struct Rtree *r, struct interval_list *l); void rtree_show(struct Rtree *r); /* Functions for character -> int maps */ void rmap_init(struct Rtree *r); void rmap_clr(struct Rtree *r); int rmap_lookup(struct Rtree *r, int ch, int dflt); int rmap_lookup_unopt(struct Rtree *r, int ch, int dflt); void rmap_add(struct Rtree *r, int ch, int che, int map, int dflt); void rmap_opt(struct Rtree *r); void rmap_set(struct Rtree *r, struct interval *array, ptrdiff_t len, int map, int dflt); void rmap_show(struct Rtree *r); /* A character class */ struct Cclass { ptrdiff_t size; /* Malloc size of intervals */ ptrdiff_t len; /* Number of entries in intervals */ struct interval *intervals; /* sorted, non-overlapping, non-adjacent */ struct Rset rset[1]; /* Radix tree version of character class */ }; /* Initialize a character class */ void cclass_init(struct Cclass *cclass); /* Free memory used by a Cclass (does not free cclass itself) */ void cclass_clr(struct Cclass *cclass); /* Add a range to a character class */ void cclass_add(struct Cclass *cclass, int first, int last); /* Remove a range to a character class */ void cclass_sub(struct Cclass *cclass, int first, int last); /* Merge one character class into another */ void cclass_union(struct Cclass *cclass, struct Cclass *n); void cclass_merge(struct Cclass *cclass, struct interval *intervals, int len); /* Subtract one character class from another */ void cclass_diff(struct Cclass *cclass, struct Cclass *n); /* Compute inverse of a character class */ void cclass_inv(struct Cclass *cclass); /* Lookup a character in a character class using binary search. Return true if character is in the class. */ int cclass_lookup_unopt(struct Cclass *m, int ch); /* Optimize a character class for fast lookup with cclass_lookup. Generates a radix tree version of the character class. */ void cclass_opt(struct Cclass *cclass); /* Return true if character is in the class */ int cclass_lookup(struct Cclass *cclass, int ch); void cclass_show(struct Cclass *cclass); /* Remap a character class */ struct Cclass *cclass_remap(struct Cclass *m, struct charmap *map);