regex.c


DEFINITIONS

This source file includes following functions.
  1. PARAMS
  2. PARAMS
  3. _
  4. _
  5. init_syntax_once
  6. re_set_casetable
  7. ISASCII
  8. ISASCII
  9. ISBLANK
  10. ISBLANK
  11. ISGRAPH
  12. ISGRAPH
  13. SIGN_EXTEND_CHAR
  14. SIGN_EXTEND_CHAR
  15. re_set_syntax
  16. utf8_firstbyte
  17. print_mbc
  18. set_list_bits
  19. is_in_list
  20. print_partial_compiled_pattern
  21. print_compiled_pattern
  22. calculate_must_string
  23. read_backslash
  24. read_special
  25. re_compile_pattern
  26. re_free_pattern
  27. store_jump
  28. insert_jump
  29. store_jump_n
  30. insert_jump_n
  31. insert_op
  32. insert_op_2
  33. slow_match
  34. slow_search
  35. bm_init_skip
  36. bm_search
  37. re_compile_fastmap
  38. re_adjust_startpos
  39. re_search
  40. init_regs
  41. re_match
  42. memcmp_translate
  43. re_copy_registers
  44. re_free_registers
  45. re_mbcinit
  46. asc_startpos
  47. euc_startpos
  48. sjis_startpos
  49. utf8_startpos


   1  /* Extended regular expression matching and search library.
   2     Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
   3  
   4     The GNU C Library is free software; you can redistribute it and/or
   5     modify it under the terms of the GNU Library General Public License as
   6     published by the Free Software Foundation; either version 2 of the
   7     License, or (at your option) any later version.
   8  
   9     The GNU C Library is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12     Library General Public License for more details.
  13  
  14     You should have received a copy of the GNU Library General Public
  15     License along with the GNU C Library; see the file LGPL.  If not,
  16     write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17     Boston, MA 02111-1307, USA.  */
  18  /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
  19     Last change: May 21, 1993 by t^2  */
  20  /* removed gapped buffer support, multiple syntax support by matz <matz@nts.co.jp> */
  21  /* Perl5 extension added by matz <matz@caelum.co.jp> */
  22  /* UTF-8 extension added Jan 16 1999 by Yoshida Masato  <yoshidam@tau.bekkoame.ne.jp> */
  23  
  24  #include "config.h"
  25  
  26  #ifdef HAVE_STRING_H
  27  # include <string.h>
  28  #else
  29  # include <strings.h>
  30  #endif
  31  
  32  /* We write fatal error messages on standard error.  */
  33  #include <stdio.h>
  34  
  35  /* isalpha(3) etc. are used for the character classes.  */
  36  #include <ctype.h>
  37  #include <sys/types.h>
  38  
  39  #ifndef PARAMS
  40  # if defined __GNUC__ || (defined __STDC__ && __STDC__)
  41  #  define PARAMS(args) args
  42  # else
  43  #  define PARAMS(args) ()
  44  # endif  /* GCC.  */
  45  #endif  /* Not PARAMS.  */
  46  
  47  #if defined(STDC_HEADERS)
  48  # include <stddef.h>
  49  #else
  50  /* We need this for `regex.h', and perhaps for the Emacs include files.  */
  51  # include <sys/types.h>
  52  #endif
  53  
  54  #if !defined(__STDC__) && !defined(_MSC_VER)
  55  # define volatile
  56  #endif
  57  
  58  #ifdef HAVE_PROTOTYPES
  59  # define _(args) args
  60  #else
  61  # define _(args) ()
  62  #endif
  63  
  64  #ifdef RUBY_PLATFORM
  65  #include "defines.h"
  66  
  67  # define RUBY
  68  extern int rb_prohibit_interrupt;
  69  extern int rb_trap_pending;
  70  void rb_trap_exec _((void));
  71  
  72  # define CHECK_INTS do {\
  73      if (!rb_prohibit_interrupt) {\
  74          if (rb_trap_pending) rb_trap_exec();\
  75      }\
  76  } while (0)
  77  #endif
  78  
  79  /* Make alloca work the best possible way.  */
  80  #ifdef __GNUC__
  81  # ifndef atarist
  82  #  ifndef alloca
  83  #   define alloca __builtin_alloca
  84  #  endif
  85  # endif /* atarist */
  86  #else
  87  # if defined(HAVE_ALLOCA_H)
  88  #  include <alloca.h>
  89  # elif !defined(alloca)
  90  char *alloca();
  91  # endif
  92  #endif /* __GNUC__ */
  93  
  94  #ifdef _AIX
  95  #pragma alloca
  96  #endif
  97  
  98  #ifdef HAVE_STRING_H
  99  # include <string.h>
 100  #else
 101  # include <strings.h>
 102  #endif
 103  
 104  #ifdef C_ALLOCA
 105  #define FREE_VARIABLES() alloca(0)
 106  #else
 107  #define FREE_VARIABLES()
 108  #endif
 109  
 110  #define FREE_AND_RETURN_VOID(stackb)   do {                             \
 111    FREE_VARIABLES();                                                     \
 112    if (stackb != stacka) xfree(stackb);                                  \
 113    return;                                                               \
 114  } while(0)
 115  
 116  #define FREE_AND_RETURN(stackb,val)    do {                             \
 117    FREE_VARIABLES();                                                     \
 118    if (stackb != stacka) xfree(stackb);                                  \
 119    return(val);                                                          \
 120  } while(0)
 121  
 122  #define DOUBLE_STACK(type) do {                                         \
 123    type *stackx;                                                         \
 124    unsigned int xlen = stacke - stackb;                                  \
 125    if (stackb == stacka) {                                               \
 126      stackx = (type*)xmalloc(2 * xlen * sizeof(type));                   \
 127      memcpy(stackx, stackb, xlen * sizeof (type));                       \
 128    }                                                                     \
 129    else {                                                                \
 130      stackx = (type*)xrealloc(stackb, 2 * xlen * sizeof(type));          \
 131    }                                                                     \
 132    /* Rearrange the pointers. */                                         \
 133    stackp = stackx + (stackp - stackb);                                  \
 134    stackb = stackx;                                                      \
 135    stacke = stackb + 2 * xlen;                                           \
 136  } while (0)
 137  
 138  #define RE_TALLOC(n,t)  ((t*)alloca((n)*sizeof(t)))
 139  #define TMALLOC(n,t)    ((t*)xmalloc((n)*sizeof(t)))
 140  #define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
 141  
 142  #define EXPAND_FAIL_STACK() DOUBLE_STACK(unsigned char*)
 143  #define ENSURE_FAIL_STACK(n)                                            \
 144    do {                                                                  \
 145      if (stacke - stackp <= (n)) {                                       \
 146          /* if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS)           \
 147             {                                                            \
 148             FREE_AND_RETURN(stackb,(-2));                                \
 149             }*/                                                          \
 150                                                                          \
 151          /* Roughly double the size of the stack.  */                    \
 152          EXPAND_FAIL_STACK();                                            \
 153        }                                                                 \
 154    } while (0)
 155  
 156  /* Get the interface, including the syntax bits.  */
 157  #include "regex.h"
 158  
 159  /* Subroutines for re_compile_pattern.  */
 160  static void store_jump _((char*, int, char*));
 161  static void insert_jump _((int, char*, char*, char*));
 162  static void store_jump_n _((char*, int, char*, unsigned));
 163  static void insert_jump_n _((int, char*, char*, char*, unsigned));
 164  static void insert_op _((int, char*, char*));
 165  static void insert_op_2 _((int, char*, char*, int, int));
 166  static int memcmp_translate _((unsigned char*, unsigned char*, int));
 167  
 168  /* Define the syntax stuff, so we can do the \<, \>, etc.  */
 169  
 170  /* This must be nonzero for the wordchar and notwordchar pattern
 171     commands in re_match.  */
 172  #define Sword  1
 173  #define Sword2 2
 174  
 175  #define SYNTAX(c) re_syntax_table[c]
 176  
 177  static char re_syntax_table[256];
 178  static void init_syntax_once _((void));
 179  static const unsigned char *translate = 0;
 180  static void init_regs _((struct re_registers*, unsigned int));
 181  static void bm_init_skip _((int *, unsigned char*, int, const unsigned char*));
 182  static int current_mbctype = MBCTYPE_ASCII;
 183  
 184  #undef P
 185  
 186  #ifdef RUBY
 187  #include "util.h"
 188  #endif
 189  
 190  static void
 191  init_syntax_once()
 192  {
 193     register int c;
 194     static int done = 0;
 195  
 196     if (done)
 197       return;
 198  
 199     memset(re_syntax_table, 0, sizeof re_syntax_table);
 200  
 201     for (c=0; c<=0x7f; c++)
 202       if (isalnum(c)) 
 203         re_syntax_table[c] = Sword;
 204     re_syntax_table['_'] = Sword;
 205  
 206     for (c=0x80; c<=0xff; c++)
 207       if (isalnum(c)) 
 208         re_syntax_table[c] = Sword2;
 209     done = 1;
 210  }
 211  
 212  void
 213  re_set_casetable(table)
 214       const char *table;
 215  {
 216    translate = (const unsigned char*)table;
 217  }
 218  
 219  /* Jim Meyering writes:
 220  
 221     "... Some ctype macros are valid only for character codes that
 222     isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
 223     using /bin/cc or gcc but without giving an ansi option).  So, all
 224     ctype uses should be through macros like ISPRINT...  If
 225     STDC_HEADERS is defined, then autoconf has verified that the ctype
 226     macros don't need to be guarded with references to isascii. ...
 227     Defining isascii to 1 should let any compiler worth its salt
 228     eliminate the && through constant folding."
 229     Solaris defines some of these symbols so we must undefine them first.  */
 230  
 231  #undef ISASCII
 232  #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
 233  # define ISASCII(c) 1
 234  #else
 235  # define ISASCII(c) isascii(c)
 236  #endif
 237  
 238  #ifdef isblank
 239  # define ISBLANK(c) (ISASCII(c) && isblank(c))
 240  #else
 241  # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 242  #endif
 243  #ifdef isgraph
 244  # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
 245  #else
 246  # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
 247  #endif
 248  
 249  #undef ISPRINT
 250  #define ISPRINT(c) (ISASCII(c) && isprint(c))
 251  #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
 252  #define ISALNUM(c) (ISASCII(c) && isalnum(c))
 253  #define ISALPHA(c) (ISASCII(c) && isalpha(c))
 254  #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
 255  #define ISLOWER(c) (ISASCII(c) && islower(c))
 256  #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
 257  #define ISSPACE(c) (ISASCII(c) && isspace(c))
 258  #define ISUPPER(c) (ISASCII(c) && isupper(c))
 259  #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
 260  
 261  #ifndef NULL
 262  # define NULL (void *)0
 263  #endif
 264  
 265  /* We remove any previous definition of `SIGN_EXTEND_CHAR',
 266     since ours (we hope) works properly with all combinations of
 267     machines, compilers, `char' and `unsigned char' argument types.
 268     (Per Bothner suggested the basic approach.)  */
 269  #undef SIGN_EXTEND_CHAR
 270  #if __STDC__
 271  # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
 272  #else  /* not __STDC__ */
 273  /* As in Harbison and Steele.  */
 274  # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
 275  #endif
 276  
 277  /* These are the command codes that appear in compiled regular
 278     expressions, one per byte.  Some command codes are followed by
 279     argument bytes.  A command code can specify any interpretation
 280     whatsoever for its arguments.  Zero-bytes may appear in the compiled
 281     regular expression.
 282  
 283     The value of `exactn' is needed in search.c (search_buffer) in emacs.
 284     So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
 285     `exactn' we use here must also be 1.  */
 286  
 287  enum regexpcode
 288    {
 289      unused=0,
 290      exactn=1, /* Followed by one byte giving n, then by n literal bytes.  */
 291      begline,  /* Fail unless at beginning of line.  */
 292      endline,  /* Fail unless at end of line.  */
 293      begbuf,   /* Succeeds if at beginning of buffer (if emacs) or at beginning
 294                   of string to be matched (if not).  */
 295      endbuf,   /* Analogously, for end of buffer/string.  */
 296      endbuf2,  /* End of buffer/string, or newline just before it.  */
 297      begpos,   /* Matches where last scan//gsub left off.  */
 298      jump,     /* Followed by two bytes giving relative address to jump to.  */
 299      jump_past_alt,/* Same as jump, but marks the end of an alternative.  */
 300      on_failure_jump,     /* Followed by two bytes giving relative address of 
 301                              place to resume at in case of failure.  */
 302      finalize_jump,       /* Throw away latest failure point and then jump to 
 303                              address.  */
 304      maybe_finalize_jump, /* Like jump but finalize if safe to do so.
 305                              This is used to jump back to the beginning
 306                              of a repeat.  If the command that follows
 307                              this jump is clearly incompatible with the
 308                              one at the beginning of the repeat, such that
 309                              we can be sure that there is no use backtracking
 310                              out of repetitions already completed,
 311                              then we finalize.  */
 312      dummy_failure_jump,  /* Jump, and push a dummy failure point. This 
 313                              failure point will be thrown away if an attempt 
 314                              is made to use it for a failure. A + construct 
 315                              makes this before the first repeat.  Also
 316                              use it as an intermediary kind of jump when
 317                              compiling an or construct.  */
 318      push_dummy_failure, /* Push a dummy failure point and continue.  Used at the end of
 319                             alternatives.  */
 320      succeed_n,   /* Used like on_failure_jump except has to succeed n times;
 321                      then gets turned into an on_failure_jump. The relative
 322                      address following it is useless until then.  The
 323                      address is followed by two bytes containing n.  */
 324      jump_n,      /* Similar to jump, but jump n times only; also the relative
 325                      address following is in turn followed by yet two more bytes
 326                      containing n.  */
 327      try_next,    /* Jump to next pattern for the first time,
 328                      leaving this pattern on the failure stack. */
 329      finalize_push,      /* Finalize stack and push the beginning of the pattern
 330                             on the stack to retry (used for non-greedy match) */
 331      finalize_push_n,    /* Similar to finalize_push, buf finalize n time only */
 332      set_number_at,      /* Set the following relative location to the
 333                             subsequent number.  */
 334      anychar,     /* Matches any (more or less) one character excluding newlines.  */
 335      anychar_repeat,      /* Matches sequence of characters excluding newlines.  */
 336      charset,     /* Matches any one char belonging to specified set.
 337                      First following byte is number of bitmap bytes.
 338                      Then come bytes for a bitmap saying which chars are in.
 339                      Bits in each byte are ordered low-bit-first.
 340                      A character is in the set if its bit is 1.
 341                      A character too large to have a bit in the map
 342                      is automatically not in the set.  */
 343      charset_not, /* Same parameters as charset, but match any character
 344                      that is not one of those specified.  */
 345      start_memory, /* Start remembering the text that is matched, for
 346                      storing in a memory register.  Followed by one
 347                      byte containing the register number.  Register numbers
 348                      must be in the range 0 through RE_NREGS.  */
 349      stop_memory, /* Stop remembering the text that is matched
 350                      and store it in a memory register.  Followed by
 351                      one byte containing the register number. Register
 352                      numbers must be in the range 0 through RE_NREGS.  */
 353      start_paren,    /* Place holder at the start of (?:..). */
 354      stop_paren,    /* Place holder at the end of (?:..). */
 355      casefold_on,   /* Turn on casefold flag. */
 356      casefold_off,  /* Turn off casefold flag. */
 357      option_set,    /* Turn on multi line match (match with newlines). */
 358      start_nowidth, /* Save string point to the stack. */
 359      stop_nowidth,  /* Restore string place at the point start_nowidth. */
 360      pop_and_fail,  /* Fail after popping nowidth entry from stack. */
 361      stop_backtrack,  /* Restore backtrack stack at the point start_nowidth. */
 362      duplicate,   /* Match a duplicate of something remembered.
 363                      Followed by one byte containing the index of the memory 
 364                      register.  */
 365      wordchar,    /* Matches any word-constituent character.  */
 366      notwordchar, /* Matches any char that is not a word-constituent.  */
 367      wordbeg,     /* Succeeds if at word beginning.  */
 368      wordend,     /* Succeeds if at word end.  */
 369      wordbound,   /* Succeeds if at a word boundary.  */
 370      notwordbound /* Succeeds if not at a word boundary.  */
 371    };
 372  
 373  
 374  /* Number of failure points to allocate space for initially,
 375     when matching.  If this number is exceeded, more space is allocated,
 376     so it is not a hard limit.  */
 377  
 378  #ifndef NFAILURES
 379  #define NFAILURES 160
 380  #endif
 381  
 382  /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 383  #define STORE_NUMBER(destination, number)                               \
 384    do { (destination)[0] = (number) & 0377;                              \
 385      (destination)[1] = (number) >> 8; } while (0)
 386  
 387  /* Same as STORE_NUMBER, except increment the destination pointer to
 388     the byte after where the number is stored.  Watch out that values for
 389     DESTINATION such as p + 1 won't work, whereas p will.  */
 390  #define STORE_NUMBER_AND_INCR(destination, number)                      \
 391    do { STORE_NUMBER(destination, number);                               \
 392      (destination) += 2; } while (0)
 393  
 394  
 395  /* Put into DESTINATION a number stored in two contingous bytes starting
 396     at SOURCE.  */
 397  #define EXTRACT_NUMBER(destination, source)                             \
 398    do { (destination) = *(source) & 0377;                                \
 399      (destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)
 400  
 401  /* Same as EXTRACT_NUMBER, except increment the pointer for source to
 402     point to second byte of SOURCE.  Note that SOURCE has to be a value
 403     such as p, not, e.g., p + 1. */
 404  #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
 405    do { EXTRACT_NUMBER(destination, source);                             \
 406         (source) += 2; } while (0)
 407  
 408  
 409  /* Specify the precise syntax of regexps for compilation.  This provides
 410     for compatibility for various utilities which historically have
 411     different, incompatible syntaxes.
 412  
 413     The argument SYNTAX is a bit-mask comprised of the various bits
 414     defined in regex.h.  */
 415  
 416  long
 417  re_set_syntax(syntax)
 418    long syntax;
 419  {
 420      /* obsolete */
 421      return 0;
 422  }
 423  
 424  /* Macros for re_compile_pattern, which is found below these definitions.  */
 425  
 426  #define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
 427  #define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
 428  /* Fetch the next character in the uncompiled pattern---translating it 
 429     if necessary.  Also cast from a signed character in the constant
 430     string passed to us by the user to an unsigned char that we can use
 431     as an array index (in, e.g., `translate').  */
 432  #define PATFETCH(c)                                                     \
 433    do {if (p == pend) goto end_of_pattern;                               \
 434      c = (unsigned char) *p++;                                           \
 435      if (TRANSLATE_P()) c = (unsigned char)translate[c]; \
 436    } while (0)
 437  
 438  /* Fetch the next character in the uncompiled pattern, with no
 439     translation.  */
 440  #define PATFETCH_RAW(c)                                                 \
 441    do {if (p == pend) goto end_of_pattern;                               \
 442      c = (unsigned char)*p++;                                            \
 443    } while (0)
 444  
 445  /* Go backwards one character in the pattern.  */
 446  #define PATUNFETCH p--
 447  
 448  #define MBC2WC(c, p)                                                    \
 449    do {                                                                  \
 450      if (current_mbctype == MBCTYPE_UTF8) {                              \
 451        int n = mbclen(c) - 1;                                            \
 452        c &= (1<<(BYTEWIDTH-2-n)) - 1;                                    \
 453        while (n--) {                                                     \
 454          c = c << 6 | (*p++ & ((1<<6)-1));                               \
 455        }                                                                 \
 456      }                                                                   \
 457      else {                                                              \
 458        c <<= 8;                                                          \
 459        c |= (unsigned char)*(p)++;                                       \
 460      }                                                                   \
 461    } while (0)
 462  
 463  #define PATFETCH_MBC(c)                                                 \
 464    do {                                                                  \
 465      if (p + mbclen(c) - 1 >= pend) goto end_of_pattern;                 \
 466      MBC2WC(c, p);                                                       \
 467    } while(0)
 468  
 469  #define WC2MBC1ST(c)                                                    \
 470   ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
 471  
 472  typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
 473  
 474  static unsigned int asc_startpos _((const char *string, unsigned int pos));
 475  static unsigned int euc_startpos _((const char *string, unsigned int pos));
 476  static unsigned int sjis_startpos _((const char *string, unsigned int pos));
 477  static unsigned int utf8_startpos _((const char *string, unsigned int pos));
 478  
 479  static const mbc_startpos_func_t mbc_startpos_func[4] = {
 480    asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
 481  };
 482  
 483  #define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
 484  
 485  static unsigned int
 486  utf8_firstbyte(c)
 487       unsigned long c;
 488  {
 489    if (c < 0x80) return c;
 490    if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0;
 491    if (c <= 0xffff) return ((c>>12)&0xff)|0xe0;
 492    if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0;
 493    if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8;
 494    if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;
 495  #if SIZEOF_INT > 4
 496    if (c <= 0xfffffffff) return 0xfe;
 497  #else
 498    return 0xfe;
 499  #endif
 500  }
 501  
 502  static void
 503  print_mbc(c)
 504       unsigned int c;
 505  {
 506    if (current_mbctype == MBCTYPE_UTF8) {
 507      if (c < 0x80)
 508        printf("%c", (int)c);
 509      else if (c <= 0x7ff)
 510        printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
 511      else if (c <= 0xffff)
 512        printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
 513               (int)(c & 0x3f));
 514      else if (c <= 0x1fffff) 
 515        printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
 516               (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
 517      else if (c <= 0x3ffffff)
 518        printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
 519               (int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
 520      else if (c <= 0x7fffffff)
 521        printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
 522               (int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
 523               (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
 524    }
 525    else if (c < 0xff) {
 526      printf("\\%o", (int)c);
 527    }
 528    else {
 529      printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
 530    }
 531  }
 532  
 533  /* If the buffer isn't allocated when it comes in, use this.  */
 534  #define INIT_BUF_SIZE  28
 535  
 536  /* Make sure we have at least N more bytes of space in buffer.  */
 537  #define GET_BUFFER_SPACE(n)                                             \
 538    do {                                                                  \
 539      while (b - bufp->buffer + (n) >= bufp->allocated)                   \
 540        EXTEND_BUFFER;                                                    \
 541    } while (0)
 542  
 543  /* Make sure we have one more byte of buffer space and then add CH to it.  */
 544  #define BUFPUSH(ch)                                                     \
 545    do {                                                                  \
 546      GET_BUFFER_SPACE(1);                                                \
 547      *b++ = (char)(ch);                                                  \
 548    } while (0)
 549  
 550  /* Extend the buffer by twice its current size via reallociation and
 551     reset the pointers that pointed into the old allocation to point to
 552     the correct places in the new allocation.  If extending the buffer
 553     results in it being larger than 1 << 16, then flag memory exhausted.  */
 554  #define EXTEND_BUFFER                                           \
 555    do { char *old_buffer = bufp->buffer;                                 \
 556      if (bufp->allocated == (1L<<16)) goto too_big;                      \
 557      bufp->allocated *= 2;                                               \
 558      if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16);         \
 559      bufp->buffer = (char*)xrealloc(bufp->buffer, bufp->allocated);      \
 560      if (bufp->buffer == 0)                                              \
 561        goto memory_exhausted;                                            \
 562      b = (b - old_buffer) + bufp->buffer;                                \
 563      if (fixup_alt_jump)                                                 \
 564        fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;    \
 565      if (laststart)                                                      \
 566        laststart = (laststart - old_buffer) + bufp->buffer;              \
 567      begalt = (begalt - old_buffer) + bufp->buffer;                      \
 568      if (pending_exact)                                                  \
 569        pending_exact = (pending_exact - old_buffer) + bufp->buffer;      \
 570    } while (0)
 571  
 572  
 573  /* Set the bit for character C in a character set list.  */
 574  #define SET_LIST_BIT(c)                                                 \
 575    (b[(unsigned char)(c) / BYTEWIDTH]                                    \
 576     |= 1 << ((unsigned char)(c) % BYTEWIDTH))
 577  
 578  /* Get the next unsigned number in the uncompiled pattern.  */
 579  #define GET_UNSIGNED_NUMBER(num)                                        \
 580    do { if (p != pend) {                                                 \
 581          PATFETCH(c);                                                    \
 582          while (ISDIGIT(c)) {                                            \
 583            if (num < 0)                                                  \
 584               num = 0;                                                   \
 585            num = num * 10 + c - '0';                                     \
 586            if (p == pend)                                                \
 587               break;                                                     \
 588            PATFETCH(c);                                                  \
 589          }                                                               \
 590       }                                                                  \
 591    } while (0)
 592  
 593  #define STREQ(s1, s2) ((strcmp(s1, s2) == 0))
 594  
 595  #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
 596  
 597  #define IS_CHAR_CLASS(string)                                           \
 598     (STREQ(string, "alpha") || STREQ(string, "upper")                    \
 599      || STREQ(string, "lower") || STREQ(string, "digit")                 \
 600      || STREQ(string, "alnum") || STREQ(string, "xdigit")                \
 601      || STREQ(string, "space") || STREQ(string, "print")                 \
 602      || STREQ(string, "punct") || STREQ(string, "graph")                 \
 603      || STREQ(string, "cntrl") || STREQ(string, "blank"))
 604  
 605  #define STORE_MBC(p, c)                                                 \
 606    do {                                                                  \
 607      (p)[0] = (unsigned char)(((c) >>24) & 0xff);                        \
 608      (p)[1] = (unsigned char)(((c) >>16) & 0xff);                        \
 609      (p)[2] = (unsigned char)(((c) >> 8) & 0xff);                        \
 610      (p)[3] = (unsigned char)(((c) >> 0) & 0xff);                        \
 611    } while (0)
 612  
 613  #define STORE_MBC_AND_INCR(p, c)                                        \
 614    do {                                                                  \
 615      *(p)++ = (unsigned char)(((c) >>24) & 0xff);                        \
 616      *(p)++ = (unsigned char)(((c) >>16) & 0xff);                        \
 617      *(p)++ = (unsigned char)(((c) >> 8) & 0xff);                        \
 618      *(p)++ = (unsigned char)(((c) >> 0) & 0xff);                        \
 619    } while (0)
 620  
 621  #define EXTRACT_MBC(p)                                                  \
 622    ((unsigned int)((unsigned char)(p)[0] << 24 |                         \
 623                      (unsigned char)(p)[1] << 16 |                       \
 624                      (unsigned char)(p)[2] <<  8 |                       \
 625                      (unsigned char)(p)[3]))
 626  
 627  #define EXTRACT_MBC_AND_INCR(p)                                         \
 628    ((unsigned int)((p) += 4,                                             \
 629                      (unsigned char)(p)[-4] << 24 |                      \
 630                      (unsigned char)(p)[-3] << 16 |                      \
 631                      (unsigned char)(p)[-2] <<  8 |                      \
 632                      (unsigned char)(p)[-1]))
 633  
 634  #define EXTRACT_UNSIGNED(p) \
 635    ((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)
 636  #define EXTRACT_UNSIGNED_AND_INCR(p) \
 637    ((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)
 638  
 639  /* Handle (mb)?charset(_not)?.
 640  
 641     Structure of mbcharset(_not)? in compiled pattern.
 642  
 643       struct {
 644         unsinged char id;                mbcharset(_not)?
 645         unsigned char sbc_size;
 646         unsigned char sbc_map[sbc_size]; same as charset(_not)? up to here.
 647         unsigned short mbc_size;         number of intervals.
 648         struct {
 649           unsigned long beg;             beginning of interval.
 650           unsigned long end;             end of interval.
 651         } intervals[mbc_size];
 652       }; */
 653  
 654  static void
 655  set_list_bits(c1, c2, b)
 656      unsigned long c1, c2;
 657      unsigned char *b;
 658  {
 659    unsigned char sbc_size = b[-1];
 660    unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]);
 661    unsigned short beg, end, upb;
 662  
 663    if (c1 > c2)
 664      return;
 665    b = &b[sbc_size + 2];
 666  
 667    for (beg = 0, upb = mbc_size; beg < upb; ) {
 668      unsigned short mid = (unsigned short)(beg + upb) >> 1;
 669  
 670      if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*8+4]))
 671        beg = mid + 1;
 672      else
 673        upb = mid;
 674    }
 675  
 676    for (end = beg, upb = mbc_size; end < upb; ) {
 677      unsigned short mid = (unsigned short)(end + upb) >> 1;
 678  
 679      if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*8]) - 1)
 680        end = mid + 1;
 681      else
 682        upb = mid;
 683    }
 684  
 685    if (beg != end) {
 686      if (c1 > EXTRACT_MBC(&b[beg*8]))
 687        c1 = EXTRACT_MBC(&b[beg*8]);
 688      if (c2 < EXTRACT_MBC(&b[(end - 1)*8+4]))
 689        c2 = EXTRACT_MBC(&b[(end - 1)*8+4]);
 690    }
 691    if (end < mbc_size && end != beg + 1)
 692      /* NOTE: memcpy() would not work here.  */
 693      memmove(&b[(beg + 1)*8], &b[end*8], (mbc_size - end)*8);
 694    STORE_MBC(&b[beg*8 + 0], c1);
 695    STORE_MBC(&b[beg*8 + 4], c2);
 696    mbc_size += beg - end + 1;
 697    STORE_NUMBER(&b[-2], mbc_size);
 698  }
 699  
 700  static int
 701  is_in_list(c, b)
 702      unsigned long c;
 703      const unsigned char *b;
 704  {
 705    unsigned short size;
 706    unsigned short i, j;
 707  
 708    size = *b++;
 709    if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
 710      return 1;
 711    }
 712    b += size + 2;
 713    size = EXTRACT_UNSIGNED(&b[-2]);
 714    if (size == 0) return 0;
 715  
 716    for (i = 0, j = size; i < j; ) {
 717      unsigned short k = (unsigned short)(i + j) >> 1;
 718  
 719      if (c > EXTRACT_MBC(&b[k*8+4]))
 720        i = k + 1;
 721      else
 722        j = k;
 723    }
 724    if (i < size && EXTRACT_MBC(&b[i*8]) <= c)
 725      return 1;
 726  
 727    return 0;
 728  }
 729  
 730  static void
 731  print_partial_compiled_pattern(start, end)
 732      unsigned char *start;
 733      unsigned char *end;
 734  {
 735    int mcnt, mcnt2;
 736    unsigned char *p = start;
 737    unsigned char *pend = end;
 738  
 739    if (start == NULL) {
 740      printf("(null)\n");
 741      return;
 742    }
 743  
 744    /* Loop over pattern commands.  */
 745    while (p < pend) {
 746      switch ((enum regexpcode)*p++) {
 747      case unused:
 748        printf("/unused");
 749        break;
 750  
 751      case exactn:
 752        mcnt = *p++;
 753        printf("/exactn/%d", mcnt);
 754        do {
 755          putchar('/');
 756          printf("%c", *p++);
 757        }
 758        while (--mcnt);
 759        break;
 760  
 761      case start_memory:
 762        mcnt = *p++;
 763        printf("/start_memory/%d/%d", mcnt, *p++);
 764        break;
 765  
 766      case stop_memory:
 767        mcnt = *p++;
 768        printf("/stop_memory/%d/%d", mcnt, *p++);
 769        break;
 770  
 771      case start_paren:
 772        printf("/start_paren");
 773        break;
 774  
 775      case stop_paren:
 776        printf("/stop_paren");
 777        break;
 778  
 779      case casefold_on:
 780        printf("/casefold_on");
 781        break;
 782  
 783      case casefold_off:
 784        printf("/casefold_off");
 785        break;
 786  
 787      case option_set:
 788        printf("/option_set/%d", *p++);
 789        break;
 790  
 791      case start_nowidth:
 792        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 793        printf("/start_nowidth//%d", mcnt);
 794        break;
 795  
 796      case stop_nowidth:
 797        printf("/stop_nowidth//");
 798        p += 2;
 799        break;
 800  
 801      case pop_and_fail:
 802        printf("/pop_and_fail");
 803        break;
 804  
 805      case stop_backtrack:
 806        printf("/stop_backtrack//");
 807        p += 2;
 808        break;
 809  
 810      case duplicate:
 811        printf("/duplicate/%d", *p++);
 812        break;
 813  
 814      case anychar:
 815        printf("/anychar");
 816        break;
 817  
 818      case anychar_repeat:
 819        printf("/anychar_repeat");
 820        break;
 821  
 822      case charset:
 823      case charset_not:
 824        {
 825          register int c;
 826  
 827          printf("/charset%s",
 828                 (enum regexpcode)*(p - 1) == charset_not ? "_not" : "");
 829  
 830          mcnt = *p++;
 831          printf("/%d", mcnt);
 832          for (c = 0; c < mcnt; c++) {
 833            unsigned bit;
 834            unsigned char map_byte = p[c];
 835  
 836            putchar('/');
 837  
 838            for (bit = 0; bit < BYTEWIDTH; bit++)
 839              if (map_byte & (1 << bit))
 840                printf("%c", c * BYTEWIDTH + bit);
 841          }
 842          p += mcnt;
 843          mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
 844          putchar('/');
 845          while (mcnt--) {
 846            print_mbc(EXTRACT_MBC_AND_INCR(p));
 847            putchar('-');
 848            print_mbc(EXTRACT_MBC_AND_INCR(p));
 849          }
 850          break;
 851        }
 852  
 853      case begline:
 854        printf("/begline");
 855        break;
 856  
 857      case endline:
 858        printf("/endline");
 859        break;
 860  
 861      case on_failure_jump:
 862        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 863        printf("/on_failure_jump//%d", mcnt);
 864        break;
 865  
 866      case dummy_failure_jump:
 867        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 868        printf("/dummy_failure_jump//%d", mcnt);
 869        break;
 870  
 871      case push_dummy_failure:
 872        printf("/push_dummy_failure");
 873        break;
 874  
 875      case finalize_jump:
 876        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 877        printf("/finalize_jump//%d", mcnt);
 878        break;
 879  
 880      case maybe_finalize_jump:
 881        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 882        printf("/maybe_finalize_jump//%d", mcnt);
 883        break;
 884  
 885      case jump_past_alt:
 886        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 887        printf("/jump_past_alt//%d", mcnt);
 888        break;
 889  
 890      case jump:
 891        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 892        printf("/jump//%d", mcnt);
 893        break;
 894  
 895      case succeed_n: 
 896        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 897        EXTRACT_NUMBER_AND_INCR(mcnt2, p);
 898        printf("/succeed_n//%d//%d", mcnt, mcnt2);
 899        break;
 900  
 901      case jump_n: 
 902        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 903        EXTRACT_NUMBER_AND_INCR(mcnt2, p);
 904        printf("/jump_n//%d//%d", mcnt, mcnt2);
 905        break;
 906  
 907      case set_number_at: 
 908        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 909        EXTRACT_NUMBER_AND_INCR(mcnt2, p);
 910        printf("/set_number_at//%d//%d", mcnt, mcnt2);
 911        break;
 912  
 913      case try_next:
 914        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 915        printf("/try_next//%d", mcnt);
 916        break;
 917  
 918      case finalize_push:
 919        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 920        printf("/finalize_push//%d", mcnt);
 921        break;
 922  
 923      case finalize_push_n:
 924        EXTRACT_NUMBER_AND_INCR(mcnt, p);
 925        EXTRACT_NUMBER_AND_INCR(mcnt2, p);
 926        printf("/finalize_push_n//%d//%d", mcnt, mcnt2);
 927        break;
 928  
 929      case wordbound:
 930        printf("/wordbound");
 931        break;
 932  
 933      case notwordbound:
 934        printf("/notwordbound");
 935        break;
 936  
 937      case wordbeg:
 938        printf("/wordbeg");
 939        break;
 940  
 941      case wordend:
 942        printf("/wordend");
 943  
 944      case wordchar:
 945        printf("/wordchar");
 946        break;
 947            
 948      case notwordchar:
 949        printf("/notwordchar");
 950        break;
 951  
 952      case begbuf:
 953        printf("/begbuf");
 954        break;
 955  
 956      case endbuf:
 957        printf("/endbuf");
 958        break;
 959  
 960      case endbuf2:
 961        printf("/endbuf2");
 962        break;
 963  
 964      case begpos:
 965        printf("/begpos");
 966        break;
 967  
 968      default:
 969        printf("?%d", *(p-1));
 970      }
 971    }
 972    printf("/\n");
 973  }
 974  
 975  
 976  static void
 977  print_compiled_pattern(bufp)
 978       struct re_pattern_buffer *bufp;
 979  {
 980    unsigned char *buffer = (unsigned char*)bufp->buffer;
 981  
 982    print_partial_compiled_pattern(buffer, buffer + bufp->used);
 983  }
 984  
 985  static char*
 986  calculate_must_string(start, end)
 987       char *start;
 988       char *end;
 989  {
 990    int mcnt;
 991    int max = 0;
 992    char *p = start;
 993    char *pend = end;
 994    char *must = 0;
 995  
 996    if (start == NULL) return 0;
 997  
 998    /* Loop over pattern commands.  */
 999    while (p < pend) {
1000      switch ((enum regexpcode)*p++) {
1001      case unused:
1002        break;
1003  
1004      case exactn:
1005        mcnt = *p;
1006        if (mcnt > max) {
1007          must = p;
1008          max = mcnt;
1009        }
1010        p += mcnt+1;
1011        break;
1012  
1013      case start_memory:
1014      case stop_memory:
1015        p += 2;
1016        break;
1017  
1018      case duplicate:
1019        p++;
1020        break;
1021  
1022      case casefold_on:
1023      case casefold_off:
1024        return 0;         /* should not check must_string */
1025  
1026      case pop_and_fail:
1027      case anychar:
1028      case anychar_repeat:
1029      case begline:
1030      case endline:
1031      case wordbound:
1032      case notwordbound:
1033      case wordbeg:
1034      case wordend:
1035      case wordchar:
1036      case notwordchar:
1037      case begbuf:
1038      case endbuf:
1039      case endbuf2:
1040      case begpos:
1041      case push_dummy_failure:
1042      case start_paren:
1043      case stop_paren:
1044      case option_set:
1045        break;
1046  
1047      case charset:
1048      case charset_not:
1049        mcnt = *p++;
1050        p += mcnt;
1051        mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
1052        while (mcnt--) {
1053          p += 8;
1054        }
1055        break;
1056  
1057      case on_failure_jump:
1058        EXTRACT_NUMBER_AND_INCR(mcnt, p);
1059        if (mcnt > 0) p += mcnt;
1060        if ((enum regexpcode)p[-3] == jump) {
1061          p -= 2;
1062          EXTRACT_NUMBER_AND_INCR(mcnt, p);
1063          if (mcnt > 0) p += mcnt;
1064        }
1065        break;
1066  
1067      case dummy_failure_jump:
1068      case succeed_n: 
1069      case try_next:
1070      case jump:
1071        EXTRACT_NUMBER_AND_INCR(mcnt, p);
1072        if (mcnt > 0) p += mcnt;
1073        break;
1074  
1075      case start_nowidth:
1076      case stop_nowidth:
1077      case stop_backtrack:
1078      case finalize_jump:
1079      case maybe_finalize_jump:
1080      case finalize_push:
1081        p += 2;
1082        break;
1083  
1084      case jump_n: 
1085      case set_number_at: 
1086      case finalize_push_n:
1087        p += 4;
1088        break;
1089  
1090      default:
1091        break;
1092      }
1093    }
1094    return must;
1095  }
1096  
1097  static unsigned int
1098  read_backslash(c)
1099       int c;
1100  {
1101    switch (c) {
1102    case 'n':
1103      return '\n';
1104  
1105    case 't':
1106      return '\t';
1107  
1108    case 'r':
1109      return '\r';
1110  
1111    case 'f':
1112      return '\f';
1113  
1114    case 'v':
1115      return '\v';
1116  
1117    case 'a':
1118      return '\007';
1119  
1120    case 'b':
1121      return '\010';
1122  
1123    case 'e':
1124      return '\033';
1125    }
1126    return c;
1127  }
1128  
1129  static unsigned int
1130  read_special(p, pend, pp)
1131       const char *p, *pend, **pp;
1132  {
1133    int c;
1134  
1135    PATFETCH_RAW(c);
1136    switch (c) {
1137    case 'M':
1138      PATFETCH_RAW(c);
1139      if (c != '-') return -1;
1140      PATFETCH_RAW(c);
1141      *pp = p;
1142      if (c == '\\') {
1143        return read_special(p, pend, pp) | 0x80;
1144      }
1145      else if (c == -1) return ~0;
1146      else {
1147        return ((c & 0xff) | 0x80);
1148      }
1149  
1150    case 'C':
1151      PATFETCH_RAW(c);
1152      if (c != '-') return ~0;
1153    case 'c':
1154      PATFETCH_RAW(c);
1155      *pp = p;
1156      if (c == '\\') {
1157        c = read_special(p, pend, pp);
1158      }
1159      else if (c == '?') return 0177;
1160      else if (c == -1) return ~0;
1161      return c & 0x9f;
1162    default:
1163      return read_backslash(c);
1164    }
1165  
1166   end_of_pattern:
1167    return ~0;
1168  }
1169  
1170  /* re_compile_pattern takes a regular-expression string
1171     and converts it into a buffer full of byte commands for matching.
1172  
1173     PATTERN   is the address of the pattern string
1174     SIZE      is the length of it.
1175     BUFP     is a  struct re_pattern_buffer *  which points to the info
1176               on where to store the byte commands.
1177               This structure contains a  char *  which points to the
1178               actual space, which should have been obtained with malloc.
1179               re_compile_pattern may use realloc to grow the buffer space.
1180  
1181     The number of bytes of commands can be found out by looking in
1182     the `struct re_pattern_buffer' that bufp pointed to, after
1183     re_compile_pattern returns. */
1184  
1185  char *
1186  re_compile_pattern(pattern, size, bufp)
1187       const char *pattern;
1188       int size;
1189       struct re_pattern_buffer *bufp;
1190  {
1191    register char *b = bufp->buffer;
1192    register const char *p = pattern;
1193    const char *nextp;
1194    const char *pend = pattern + size;
1195    register unsigned int c, c1 = 0;
1196    const char *p0;
1197    int numlen;
1198  #define ERROR_MSG_MAX_SIZE 200
1199    static char error_msg[ERROR_MSG_MAX_SIZE+1];
1200  
1201    /* Address of the count-byte of the most recently inserted `exactn'
1202       command.  This makes it possible to tell whether a new exact-match
1203       character can be added to that command or requires a new `exactn'
1204       command.  */
1205  
1206    char *pending_exact = 0;
1207  
1208    /* Address of the place where a forward-jump should go to the end of
1209       the containing expression.  Each alternative of an `or', except the
1210       last, ends with a forward-jump of this sort.  */
1211  
1212    char *fixup_alt_jump = 0;
1213  
1214    /* Address of start of the most recently finished expression.
1215       This tells postfix * where to find the start of its operand.  */
1216  
1217    char *laststart = 0;
1218  
1219    /* In processing a repeat, 1 means zero matches is allowed.  */
1220  
1221    char zero_times_ok;
1222  
1223    /* In processing a repeat, 1 means many matches is allowed.  */
1224  
1225    char many_times_ok;
1226  
1227    /* In processing a repeat, 1 means non-greedy matches.  */
1228  
1229    char greedy;
1230  
1231    /* Address of beginning of regexp, or inside of last (.  */
1232  
1233    char *begalt = b;
1234  
1235    /* Place in the uncompiled pattern (i.e., the {) to
1236       which to go back if the interval is invalid.  */
1237    const char *beg_interval;
1238  
1239    /* In processing an interval, at least this many matches must be made.  */
1240    int lower_bound;
1241  
1242    /* In processing an interval, at most this many matches can be made.  */
1243    int upper_bound;
1244  
1245    /* Stack of information saved by ( and restored by ).
1246       Five stack elements are pushed by each (:
1247       First, the value of b.
1248       Second, the value of fixup_alt_jump.
1249       Third, the value of begalt.
1250       Fourth, the value of regnum.
1251       Fifth, the type of the paren. */
1252  
1253    int stacka[40];
1254    int *stackb = stacka;
1255    int *stackp = stackb;
1256    int *stacke = stackb + 40;
1257  
1258    /* Counts ('s as they are encountered.  Remembered for the matching ),
1259       where it becomes the register number to put in the stop_memory
1260       command.  */
1261  
1262    int regnum = 1;
1263  
1264    int range = 0;
1265    int had_mbchar = 0;
1266    int had_num_literal = 0;
1267    int had_char_class = 0;
1268  
1269    int options = bufp->options;
1270  
1271    bufp->fastmap_accurate = 0;
1272    bufp->must = 0;
1273    bufp->must_skip = 0;
1274  
1275    /* Initialize the syntax table.  */
1276    init_syntax_once();
1277  
1278    if (bufp->allocated == 0) {
1279      bufp->allocated = INIT_BUF_SIZE;
1280      /* EXTEND_BUFFER loses when bufp->allocated is 0.  */
1281      bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
1282      if (!bufp->buffer) goto memory_exhausted; /* this not happen */
1283      begalt = b = bufp->buffer;
1284    }
1285  
1286    while (p != pend) {
1287      PATFETCH(c);
1288  
1289      switch (c) {
1290      case '$':
1291        if (bufp->options & RE_OPTION_SINGLELINE) {
1292          BUFPUSH(endbuf);
1293        }
1294        else {
1295          p0 = p;
1296          /* When testing what follows the $,
1297             look past the \-constructs that don't consume anything.  */
1298  
1299          while (p0 != pend) {
1300            if (*p0 == '\\' && p0 + 1 != pend
1301                && (p0[1] == 'b' || p0[1] == 'B'))
1302              p0 += 2;
1303            else
1304              break;
1305          }
1306          BUFPUSH(endline);
1307        }
1308        break;
1309  
1310      case '^':
1311        if (bufp->options & RE_OPTION_SINGLELINE)
1312          BUFPUSH(begbuf);
1313        else
1314          BUFPUSH(begline);
1315        break;
1316  
1317      case '+':
1318      case '?':
1319      case '*':
1320        /* If there is no previous pattern, char not special. */
1321        if (!laststart) {
1322          snprintf(error_msg, ERROR_MSG_MAX_SIZE, 
1323                   "invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d", 
1324                   c, p-pattern);
1325          FREE_AND_RETURN(stackb, error_msg);
1326        }
1327        /* If there is a sequence of repetition chars,
1328           collapse it down to just one.  */
1329        zero_times_ok = c != '+';
1330        many_times_ok = c != '?';
1331        greedy = 1;
1332        if (p != pend) {
1333          PATFETCH(c);
1334          switch (c) {
1335          case '?':
1336            greedy = 0;
1337            break;
1338          case '*':
1339          case '+':
1340            goto nested_meta;
1341          default:
1342            PATUNFETCH;
1343            break;
1344          }
1345        }
1346  
1347      repeat:
1348        /* Star, etc. applied to an empty pattern is equivalent
1349           to an empty pattern.  */
1350        if (!laststart)  
1351          break;
1352  
1353        if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) {
1354          if (b[-1] == stop_paren)
1355            b--;
1356          if (zero_times_ok)
1357            *laststart = anychar_repeat;
1358          else {
1359            BUFPUSH(anychar_repeat);
1360          }
1361          break;
1362        }
1363        /* Now we know whether or not zero matches is allowed
1364           and also whether or not two or more matches is allowed.  */
1365        if (many_times_ok) {
1366          /* If more than one repetition is allowed, put in at the
1367             end a backward relative jump from b to before the next
1368             jump we're going to put in below (which jumps from
1369             laststart to after this jump).  */
1370          GET_BUFFER_SPACE(3);
1371          store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);
1372          b += 3;         /* Because store_jump put stuff here.  */
1373        }
1374  
1375        /* On failure, jump from laststart to next pattern, which will be the
1376           end of the buffer after this jump is inserted.  */
1377        GET_BUFFER_SPACE(3);
1378        insert_jump(on_failure_jump, laststart, b + 3, b);
1379        b += 3;
1380  
1381        if (zero_times_ok) {
1382          if (greedy == 0) {
1383            GET_BUFFER_SPACE(3);
1384            insert_jump(try_next, laststart, b + 3, b);
1385            b += 3;
1386          }
1387        }
1388        else {
1389          /* At least one repetition is required, so insert a
1390             `dummy_failure_jump' before the initial
1391             `on_failure_jump' instruction of the loop. This
1392             effects a skip over that instruction the first time
1393             we hit that loop.  */
1394          GET_BUFFER_SPACE(3);
1395          insert_jump(dummy_failure_jump, laststart, laststart + 6, b);
1396          b += 3;
1397        }
1398        break;
1399  
1400      case '.':
1401        laststart = b;
1402        BUFPUSH(anychar);
1403        break;
1404  
1405      case '[':
1406        if (p == pend)
1407          FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");
1408        while ((b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH)
1409               > bufp->allocated)
1410          EXTEND_BUFFER;
1411  
1412        laststart = b;
1413        if (*p == '^') {
1414          BUFPUSH(charset_not); 
1415          p++;
1416        }
1417        else
1418          BUFPUSH(charset);
1419        p0 = p;
1420  
1421        BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
1422        /* Clear the whole map */
1423        memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
1424  
1425        had_mbchar = 0;
1426        had_num_literal = 0;
1427        had_char_class = 0;
1428  
1429        /* Read in characters and ranges, setting map bits.  */
1430        for (;;) {
1431          int size;
1432          unsigned last = (unsigned)-1;
1433  
1434          if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH]))
1435              || current_mbctype) {
1436            /* Ensure the space is enough to hold another interval
1437               of multi-byte chars in charset(_not)?.  */
1438            size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
1439            while (b + size + 1 > bufp->buffer + bufp->allocated)
1440              EXTEND_BUFFER;
1441          }
1442        range_retry:
1443          if (range && had_char_class) {
1444            FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");
1445          }
1446          PATFETCH(c);
1447  
1448          if (c == ']') {
1449            if (p == p0 + 1) {
1450              if (p == pend)
1451                FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");
1452            }
1453            else 
1454              /* Stop if this isn't merely a ] inside a bracket
1455                 expression, but rather the end of a bracket
1456                 expression.  */
1457              break;
1458          }
1459          /* Look ahead to see if it's a range when the last thing
1460             was a character class.  */
1461          if (had_char_class && c == '-' && *p != ']')
1462            FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range");
1463          if (ismbchar(c)) {
1464            PATFETCH_MBC(c);
1465            had_mbchar++;
1466          }
1467          had_char_class = 0;
1468  
1469          /* \ escapes characters when inside [...].  */
1470          if (c == '\\') {
1471            PATFETCH_RAW(c);
1472            switch (c) {
1473            case 'w':
1474              for (c = 0; c < (1 << BYTEWIDTH); c++) {
1475                if (SYNTAX(c) == Sword ||
1476                    (!current_mbctype && SYNTAX(c) == Sword2))
1477                  SET_LIST_BIT(c);
1478              }
1479              if (current_mbctype) {
1480                set_list_bits(0x80, 0xffffffff, b);
1481              }
1482              had_char_class = 1;
1483              last = -1;
1484              continue;
1485  
1486            case 'W':
1487              for (c = 0; c < (1 << BYTEWIDTH); c++) {
1488                if (SYNTAX(c) != Sword &&
1489                    ((current_mbctype && !re_mbctab[c]) ||
1490                    (!current_mbctype && SYNTAX(c) != Sword2)))
1491                  SET_LIST_BIT(c);
1492              }
1493              had_char_class = 1;
1494              last = -1;
1495              continue;
1496  
1497            case 's':
1498              for (c = 0; c < 256; c++)
1499                if (ISSPACE(c))
1500                  SET_LIST_BIT(c);
1501              had_char_class = 1;
1502              last = -1;
1503              continue;
1504  
1505            case 'S':
1506              for (c = 0; c < 256; c++)
1507                if (!ISSPACE(c))
1508                  SET_LIST_BIT(c);
1509              if (current_mbctype)
1510                set_list_bits(0x80, 0xffffffff, b);
1511              had_char_class = 1;
1512              last = -1;
1513              continue;
1514  
1515            case 'd':
1516              for (c = '0'; c <= '9'; c++)
1517                SET_LIST_BIT(c);
1518              had_char_class = 1;
1519              last = -1;
1520              continue;
1521  
1522            case 'D':
1523              for (c = 0; c < 256; c++)
1524                if (!ISDIGIT(c))
1525                  SET_LIST_BIT(c);
1526              if (current_mbctype)
1527                set_list_bits(0x80, 0xffffffff, b);
1528              had_char_class = 1;
1529              last = -1;
1530              continue;
1531  
1532            case 'x':
1533              c = scan_hex(p, 2, &numlen);
1534              if (numlen == 0) goto invalid_escape;
1535              p += numlen;
1536              had_num_literal = 1;
1537              break;
1538  
1539            case '0': case '1': case '2': case '3': case '4':
1540            case '5': case '6': case '7': case '8': case '9':
1541              PATUNFETCH;
1542              c = scan_oct(p, 3, &numlen);
1543              p += numlen;
1544              had_num_literal = 1;
1545              break;
1546  
1547            case 'M':
1548            case 'C':
1549            case 'c':
1550              {
1551                char *pp;
1552  
1553                --p;
1554                c = read_special(p, pend, &pp);
1555                if (c > 255) goto invalid_escape;
1556                p = pp;
1557                had_num_literal = 1;
1558              }
1559              break;
1560  
1561            default:
1562              c = read_backslash(c);
1563              if (ismbchar(c)) {
1564                PATFETCH_MBC(c);
1565                had_mbchar++;
1566              }
1567              break;
1568            }
1569          }
1570  
1571          /* Get a range.  */
1572          if (range) {
1573            if (last > c)
1574              goto invalid_pattern;
1575  
1576            range = 0;
1577            if (had_mbchar == 0) {
1578              for (;last<=c;last++)
1579                SET_LIST_BIT(last);
1580            }
1581            else if (had_mbchar == 2) {
1582              set_list_bits(last, c, b);
1583            }
1584            else {
1585              /* restriction: range between sbc and mbc */
1586              goto invalid_pattern;
1587            }
1588          }
1589          else if (p[0] == '-' && p[1] != ']') {
1590            last = c;
1591            PATFETCH(c1);
1592            range = 1;
1593            goto range_retry;
1594          }
1595          else if (c == '[' && *p == ':') {
1596            /* Leave room for the null.  */
1597            char str[CHAR_CLASS_MAX_LENGTH + 1];
1598  
1599            PATFETCH_RAW(c);
1600            c1 = 0;
1601  
1602            /* If pattern is `[[:'.  */
1603            if (p == pend) 
1604              FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");
1605  
1606            for (;;) {
1607              PATFETCH (c);
1608              if (c == ':' || c == ']' || p == pend
1609                  || c1 == CHAR_CLASS_MAX_LENGTH)
1610                break;
1611              str[c1++] = c;
1612            }
1613            str[c1] = '\0';
1614  
1615            /* If isn't a word bracketed by `[:' and:`]':
1616               undo the ending character, the letters, and leave 
1617               the leading `:' and `[' (but set bits for them).  */
1618            if (c == ':' && *p == ']') {
1619              int ch;
1620              char is_alnum = STREQ(str, "alnum");
1621              char is_alpha = STREQ(str, "alpha");
1622              char is_blank = STREQ(str, "blank");
1623              char is_cntrl = STREQ(str, "cntrl");
1624              char is_digit = STREQ(str, "digit");
1625              char is_graph = STREQ(str, "graph");
1626              char is_lower = STREQ(str, "lower");
1627              char is_print = STREQ(str, "print");
1628              char is_punct = STREQ(str, "punct");
1629              char is_space = STREQ(str, "space");
1630              char is_upper = STREQ(str, "upper");
1631              char is_xdigit = STREQ(str, "xdigit");
1632  
1633              if (!IS_CHAR_CLASS(str)){
1634                snprintf(error_msg, ERROR_MSG_MAX_SIZE, 
1635                         "invalid regular expression; [:%s:] is not a character class", str);
1636                FREE_AND_RETURN(stackb, error_msg);
1637              }
1638  
1639              /* Throw away the ] at the end of the character class.  */
1640              PATFETCH(c);
1641  
1642              if (p == pend) 
1643                FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");
1644  
1645              for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
1646                if (   (is_alnum  && ISALNUM(ch))
1647                    || (is_alpha  && ISALPHA(ch))
1648                    || (is_blank  && ISBLANK(ch))
1649                    || (is_cntrl  && ISCNTRL(ch))
1650                    || (is_digit  && ISDIGIT(ch))
1651                    || (is_graph  && ISGRAPH(ch))
1652                    || (is_lower  && ISLOWER(ch))
1653                    || (is_print  && ISPRINT(ch))
1654                    || (is_punct  && ISPUNCT(ch))
1655                    || (is_space  && ISSPACE(ch))
1656                    || (is_upper  && ISUPPER(ch))
1657                    || (is_xdigit && ISXDIGIT(ch)))
1658                  SET_LIST_BIT(ch);
1659              }
1660              had_char_class = 1;
1661            }
1662            else {
1663              c1++;
1664              while (c1--)    
1665                PATUNFETCH;
1666              SET_LIST_BIT(TRANSLATE_P()?translate['[']:'[');
1667              SET_LIST_BIT(TRANSLATE_P()?translate[':']:':');
1668              had_char_class = 0;
1669              last = ':';
1670            }
1671          }
1672          else if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
1673            SET_LIST_BIT(c);
1674            had_num_literal = 0;
1675          }
1676          else
1677            set_list_bits(c, c, b);
1678          had_mbchar = 0;
1679        }
1680  
1681        /* Discard any character set/class bitmap bytes that are all
1682           0 at the end of the map. Decrement the map-length byte too.  */
1683        while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
1684          b[-1]--; 
1685        if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
1686          memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
1687                  2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
1688        b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
1689        break;
1690  
1691      case '(':
1692        {
1693          int old_options = options;
1694          int push_option = 0;
1695          int casefold = 0;
1696  
1697          PATFETCH(c);
1698          if (c == '?') {
1699            int negative = 0;
1700  
1701            PATFETCH_RAW(c);
1702            switch (c) {
1703            case 'x': case 'm': case 'i': case '-':
1704              for (;;) {
1705                switch (c) {
1706                case '-':
1707                  negative = 1;
1708                  break;
1709  
1710                case ':':
1711                case ')':
1712                  break;
1713  
1714                case 'x':
1715                  if (negative)
1716                    options &= ~RE_OPTION_EXTENDED;
1717                  else
1718                    options |= RE_OPTION_EXTENDED;
1719                  break;
1720  
1721                case 'm':
1722                  if (negative) {
1723                    if (options&RE_OPTION_MULTILINE) {
1724                      options &= ~RE_OPTION_MULTILINE;
1725                    }
1726                  }
1727                  else if (!(options&RE_OPTION_MULTILINE)) {
1728                    options |= RE_OPTION_MULTILINE;
1729                  }
1730                  push_option = 1;
1731                  break;
1732  
1733                case 'i':
1734                  if (negative) {
1735                    if (options&RE_OPTION_IGNORECASE) {
1736                      options &= ~RE_OPTION_IGNORECASE;
1737                    }
1738                  }
1739                  else if (!(options&RE_OPTION_IGNORECASE)) {
1740                    options |= RE_OPTION_IGNORECASE;
1741                  }
1742                  casefold = 1;
1743                  break;
1744  
1745                default:
1746                  FREE_AND_RETURN(stackb, "undefined (?...) inline option");
1747                }
1748                if (c == ')') {
1749                  c = '#';        /* read whole in-line options */
1750                  break;
1751                }
1752                if (c == ':') break;
1753                PATFETCH_RAW(c);
1754              }
1755              break;
1756  
1757            case '#':
1758              for (;;) {
1759                PATFETCH(c);
1760                if (c == ')') break;
1761              }
1762              c = '#';
1763              break;
1764  
1765            case ':':
1766            case '=':
1767            case '!':
1768            case '>':
1769              break;
1770  
1771            default:
1772              FREE_AND_RETURN(stackb, "undefined (?...) sequence");
1773            }
1774          }
1775          else {
1776            PATUNFETCH;
1777            c = '(';
1778          }
1779          if (c == '#') {
1780            if (push_option) {
1781              BUFPUSH(option_set);
1782              BUFPUSH(options);
1783            }
1784            if (casefold) {
1785              if (options & RE_OPTION_IGNORECASE)
1786                BUFPUSH(casefold_on);
1787              else
1788                BUFPUSH(casefold_off);
1789            }
1790            break;
1791          }
1792          if (stackp+8 >= stacke) {
1793            DOUBLE_STACK(int);
1794          }
1795  
1796          /* Laststart should point to the start_memory that we are about
1797             to push (unless the pattern has RE_NREGS or more ('s).  */
1798          /* obsolete: now RE_NREGS is just a default register size. */
1799          *stackp++ = b - bufp->buffer;    
1800          *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1801          *stackp++ = begalt - bufp->buffer;
1802          switch (c) {
1803          case '(':
1804            BUFPUSH(start_memory);
1805            BUFPUSH(regnum);
1806            *stackp++ = regnum++;
1807            *stackp++ = b - bufp->buffer;
1808            BUFPUSH(0);
1809            /* too many ()'s to fit in a byte. (max 254) */
1810            if (regnum >= RE_REG_MAX) goto too_big;
1811            break;
1812  
1813          case '=':
1814          case '!':
1815          case '>':
1816            BUFPUSH(start_nowidth);
1817            *stackp++ = b - bufp->buffer;
1818            BUFPUSH(0);   /* temporary value */
1819            BUFPUSH(0);
1820            if (c != '!') break;
1821  
1822            BUFPUSH(on_failure_jump);
1823            *stackp++ = b - bufp->buffer;
1824            BUFPUSH(0);   /* temporary value */
1825            BUFPUSH(0);
1826            break;
1827  
1828          case ':':
1829            BUFPUSH(start_paren);
1830            pending_exact = 0;
1831          default:
1832            break;
1833          }
1834          if (push_option) {
1835            BUFPUSH(option_set);
1836            BUFPUSH(options);
1837          }
1838          if (casefold) {
1839            if (options & RE_OPTION_IGNORECASE)
1840              BUFPUSH(casefold_on);
1841            else
1842              BUFPUSH(casefold_off);
1843          }
1844          *stackp++ = c;
1845          *stackp++ = old_options;
1846          fixup_alt_jump = 0;
1847          laststart = 0;
1848          begalt = b;
1849        }
1850        break;
1851  
1852      case ')':
1853        if (stackp == stackb) 
1854          FREE_AND_RETURN(stackb, "unmatched )");
1855  
1856        pending_exact = 0;
1857        if (fixup_alt_jump) {
1858          /* Push a dummy failure point at the end of the
1859             alternative for a possible future
1860             `finalize_jump' to pop.  See comments at
1861             `push_dummy_failure' in `re_match'.  */
1862          BUFPUSH(push_dummy_failure);
1863  
1864          /* We allocated space for this jump when we assigned
1865             to `fixup_alt_jump', in the `handle_alt' case below.  */
1866          store_jump(fixup_alt_jump, jump, b);
1867        }
1868        if (options != stackp[-1]) {
1869          if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
1870            BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
1871          }
1872          if ((options ^ stackp[-1]) != RE_OPTION_IGNORECASE) {
1873            BUFPUSH(option_set);
1874            BUFPUSH(stackp[-1]);
1875          }
1876        }
1877        p0 = b;
1878        options = *--stackp;
1879        switch (c = *--stackp) {
1880        case '(':
1881          {
1882            char *loc = bufp->buffer + *--stackp;
1883            *loc = regnum - stackp[-1];
1884            BUFPUSH(stop_memory);
1885            BUFPUSH(stackp[-1]);
1886            BUFPUSH(regnum - stackp[-1]);
1887            stackp--;
1888          }
1889          break;
1890  
1891        case '!':
1892          BUFPUSH(pop_and_fail);
1893          /* back patch */
1894          STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1895          stackp--;
1896          /* fall through */
1897        case '=':
1898          BUFPUSH(stop_nowidth);
1899          /* tell stack-pos place to start_nowidth */
1900          STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1901          BUFPUSH(0);     /* space to hold stack pos */
1902          BUFPUSH(0);
1903          stackp--;
1904          break;
1905  
1906        case '>':
1907          BUFPUSH(stop_backtrack);
1908          /* tell stack-pos place to start_nowidth */
1909          STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1910          BUFPUSH(0);     /* space to hold stack pos */
1911          BUFPUSH(0);
1912          stackp--;
1913          break;
1914  
1915        case ':':
1916          BUFPUSH(stop_paren);
1917          break;
1918  
1919        default:
1920          break;
1921        }
1922        begalt = *--stackp + bufp->buffer;
1923        stackp--;
1924        fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
1925        laststart = *--stackp + bufp->buffer;
1926        if (c == '!' || c == '=') laststart = b;
1927        break;
1928  
1929      case '|':
1930        /* Insert before the previous alternative a jump which
1931           jumps to this alternative if the former fails.  */
1932        GET_BUFFER_SPACE(3);
1933        insert_jump(on_failure_jump, begalt, b + 6, b);
1934        pending_exact = 0;
1935        b += 3;
1936        /* The alternative before this one has a jump after it
1937           which gets executed if it gets matched.  Adjust that
1938           jump so it will jump to this alternative's analogous
1939           jump (put in below, which in turn will jump to the next
1940           (if any) alternative's such jump, etc.).  The last such
1941           jump jumps to the correct final destination.  A picture:
1942           _____ _____ 
1943           |   | |   |   
1944           |   v |   v 
1945           a | b   | c   
1946  
1947           If we are at `b', then fixup_alt_jump right now points to a
1948           three-byte space after `a'.  We'll put in the jump, set
1949           fixup_alt_jump to right after `b', and leave behind three
1950           bytes which we'll fill in when we get to after `c'.  */
1951  
1952        if (fixup_alt_jump)
1953          store_jump(fixup_alt_jump, jump_past_alt, b);
1954  
1955        /* Mark and leave space for a jump after this alternative,
1956           to be filled in later either by next alternative or
1957           when know we're at the end of a series of alternatives.  */
1958        fixup_alt_jump = b;
1959        GET_BUFFER_SPACE(3);
1960        b += 3;
1961  
1962        laststart = 0;
1963        begalt = b;
1964        break;
1965  
1966      case '{':
1967        /* If there is no previous pattern, this is an invalid pattern.  */
1968        if (!laststart) {
1969          snprintf(error_msg, ERROR_MSG_MAX_SIZE, 
1970                   "invalid regular expression; there's no previous pattern, to which '{' would define cardinality at %d", 
1971                   p-pattern);
1972          FREE_AND_RETURN(stackb, error_msg);
1973        }
1974        if( p == pend)
1975          FREE_AND_RETURN(stackb, "invalid regular expression; '{' can't be last character" );
1976  
1977        beg_interval = p - 1;
1978  
1979        lower_bound = -1;                 /* So can see if are set.  */
1980        upper_bound = -1;
1981        GET_UNSIGNED_NUMBER(lower_bound);
1982        if (c == ',') {
1983          GET_UNSIGNED_NUMBER(upper_bound);
1984        }
1985        else
1986          /* Interval such as `{1}' => match exactly once. */
1987          upper_bound = lower_bound;
1988  
1989        if (lower_bound < 0 || c != '}')
1990          goto unfetch_interval;
1991  
1992        if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
1993          FREE_AND_RETURN(stackb, "too big quantifier in {,}");
1994        if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1995        if (lower_bound > upper_bound)
1996          FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
1997  
1998        beg_interval = 0;
1999        pending_exact = 0;
2000  
2001        greedy = 1;
2002        if (p != pend) {
2003          PATFETCH(c);
2004          if (c == '?') greedy = 0;
2005          else PATUNFETCH;
2006        }
2007  
2008        if (lower_bound == 0) {
2009          zero_times_ok = 1;
2010          if (upper_bound == RE_DUP_MAX) {
2011            many_times_ok = 1;
2012            goto repeat;
2013          }
2014          if (upper_bound == 1) {
2015            many_times_ok = 0;
2016            goto repeat;
2017          }
2018        }
2019        if (lower_bound == 1) {
2020          if (upper_bound == 1) {
2021            /* No need to repeat */
2022            break;
2023          }
2024          if (upper_bound == RE_DUP_MAX) {
2025            many_times_ok = 1;
2026            zero_times_ok = 0;
2027            goto repeat;
2028          }
2029        }
2030  
2031        /* If upper_bound is zero, don't want to succeed at all; 
2032           jump from laststart to b + 3, which will be the end of
2033           the buffer after this jump is inserted.  */
2034  
2035        if (upper_bound == 0) {
2036          GET_BUFFER_SPACE(3);
2037          insert_jump(jump, laststart, b + 3, b);
2038          b += 3;
2039          break;
2040        }
2041  
2042        /* If lower_bound == upper_bound, repeat count can be removed */
2043        if (lower_bound == upper_bound) {
2044          int mcnt;
2045          int skip_stop_paren = 0;
2046  
2047          if (b[-1] == stop_paren) {
2048            skip_stop_paren = 1;
2049            b--;
2050          }
2051  
2052          if (*laststart == exactn && laststart[1]+2 == b - laststart
2053              && laststart[1]*lower_bound < 256) {
2054            mcnt = laststart[1];
2055            GET_BUFFER_SPACE((lower_bound-1)*mcnt);
2056            laststart[1] = lower_bound*mcnt;
2057            while (--lower_bound) {
2058              memcpy(b, laststart+2, mcnt);
2059              b += mcnt;
2060            }
2061            if (skip_stop_paren) BUFPUSH(stop_paren);
2062            break;
2063          }
2064  
2065          if (lower_bound < 5 && b - laststart < 10) {
2066            /* 5 and 10 are the magic numbers */
2067  
2068            mcnt = b - laststart;
2069            GET_BUFFER_SPACE((lower_bound-1)*mcnt);
2070            while (--lower_bound) {
2071              memcpy(b, laststart, mcnt);
2072              b += mcnt;
2073            }
2074            if (skip_stop_paren) BUFPUSH(stop_paren);
2075            break;
2076          }
2077          if (skip_stop_paren) b++; /* push back stop_paren */
2078        }
2079  
2080        /* Otherwise, we have a nontrivial interval.  When
2081           we're all done, the pattern will look like:
2082           set_number_at <jump count> <upper bound>
2083           set_number_at <succeed_n count> <lower bound>
2084           succeed_n <after jump addr> <succed_n count>
2085           <body of loop>
2086           jump_n <succeed_n addr> <jump count>
2087           (The upper bound and `jump_n' are omitted if
2088           `upper_bound' is 1, though.)  */
2089        { /* If the upper bound is > 1, we need to insert
2090             more at the end of the loop.  */
2091          unsigned nbytes = upper_bound == 1 ? 10 : 20;
2092  
2093          GET_BUFFER_SPACE(nbytes);
2094          /* Initialize lower bound of the `succeed_n', even
2095             though it will be set during matching by its
2096             attendant `set_number_at' (inserted next),
2097             because `re_compile_fastmap' needs to know.
2098             Jump to the `jump_n' we might insert below.  */
2099          insert_jump_n(succeed_n, laststart, b + (nbytes/2), 
2100                        b, lower_bound);
2101          b += 5;         /* Just increment for the succeed_n here.  */
2102  
2103          /* Code to initialize the lower bound.  Insert 
2104             before the `succeed_n'.  The `5' is the last two
2105             bytes of this `set_number_at', plus 3 bytes of
2106             the following `succeed_n'.  */
2107          insert_op_2(set_number_at, laststart, b, 5, lower_bound);
2108          b += 5;
2109  
2110          if (upper_bound > 1) {
2111            /* More than one repetition is allowed, so
2112               append a backward jump to the `succeed_n'
2113               that starts this interval.
2114  
2115               When we've reached this during matching,
2116               we'll have matched the interval once, so
2117               jump back only `upper_bound - 1' times.  */
2118            GET_BUFFER_SPACE(5);
2119            store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
2120                         upper_bound - 1);
2121            b += 5;
2122  
2123            /* The location we want to set is the second
2124               parameter of the `jump_n'; that is `b-2' as
2125               an absolute address.  `laststart' will be
2126               the `set_number_at' we're about to insert;
2127               `laststart+3' the number to set, the source
2128               for the relative address.  But we are
2129               inserting into the middle of the pattern --
2130               so everything is getting moved up by 5.
2131               Conclusion: (b - 2) - (laststart + 3) + 5,
2132               i.e., b - laststart.
2133  
2134               We insert this at the beginning of the loop
2135               so that if we fail during matching, we'll
2136               reinitialize the bounds.  */
2137            insert_op_2(set_number_at, laststart, b, b - laststart,
2138                        upper_bound - 1);
2139            b += 5;
2140          }
2141        }
2142        break;
2143  
2144      unfetch_interval:
2145        /* If an invalid interval, match the characters as literals.  */
2146        p = beg_interval;
2147        beg_interval = 0;
2148  
2149        /* normal_char and normal_backslash need `c'.  */
2150        PATFETCH(c);      
2151        goto normal_char;
2152  
2153      case '\\':
2154        if (p == pend)
2155          FREE_AND_RETURN(stackb, "invalid regular expression; '\\' can't be last character");
2156        /* Do not translate the character after the \, so that we can
2157           distinguish, e.g., \B from \b, even if we normally would
2158           translate, e.g., B to b.  */
2159        PATFETCH_RAW(c);
2160        switch (c) {
2161        case 's':
2162        case 'S':
2163        case 'd':
2164        case 'D':
2165          while (b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH
2166                 > bufp->allocated)
2167            EXTEND_BUFFER;
2168  
2169          laststart = b;
2170          if (c == 's' || c == 'd') {
2171            BUFPUSH(charset);
2172          }
2173          else {
2174            BUFPUSH(charset_not);
2175          }
2176  
2177          BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2178          memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
2179          if (c == 's' || c == 'S') {
2180            SET_LIST_BIT(' ');
2181            SET_LIST_BIT('\t');
2182            SET_LIST_BIT('\n');
2183            SET_LIST_BIT('\r');
2184            SET_LIST_BIT('\f');
2185          }
2186          else {
2187            char cc;
2188  
2189            for (cc = '0'; cc <= '9'; cc++) {
2190              SET_LIST_BIT(cc);
2191            }
2192          }
2193  
2194          while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) 
2195            b[-1]--; 
2196          if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
2197            memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
2198                    2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
2199          b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*8;
2200          break;
2201  
2202        case 'w':
2203          laststart = b;
2204          BUFPUSH(wordchar);
2205          break;
2206  
2207        case 'W':
2208          laststart = b;
2209          BUFPUSH(notwordchar);
2210          break;
2211  
2212  #ifndef RUBY
2213        case '<':
2214          BUFPUSH(wordbeg);
2215          break;
2216  
2217        case '>':
2218          BUFPUSH(wordend);
2219          break;
2220  #endif
2221  
2222        case 'b':
2223          BUFPUSH(wordbound);
2224          break;
2225  
2226        case 'B':
2227          BUFPUSH(notwordbound);
2228          break;
2229  
2230        case 'A':
2231          BUFPUSH(begbuf);
2232          break;
2233  
2234        case 'Z':
2235          if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
2236            BUFPUSH(endbuf2);
2237            break;
2238          }
2239          /* fall through */
2240        case 'z':
2241          BUFPUSH(endbuf);
2242          break;
2243  
2244        case 'G':
2245          BUFPUSH(begpos);
2246          break;
2247  
2248          /* hex */
2249        case 'x':
2250          had_mbchar = 0;
2251          c = scan_hex(p, 2, &numlen);
2252          if (numlen == 0) goto invalid_escape;
2253          p += numlen;
2254          had_num_literal = 1;
2255          goto numeric_char;
2256  
2257          /* octal */
2258        case '0':
2259          had_mbchar = 0;
2260          c = scan_oct(p, 2, &numlen);
2261          p += numlen;
2262          had_num_literal = 1;
2263          goto numeric_char;
2264  
2265          /* back-ref or octal */
2266        case '1': case '2': case '3':
2267        case '4': case '5': case '6':
2268        case '7': case '8': case '9':
2269          PATUNFETCH;
2270          p0 = p;
2271  
2272          had_mbchar = 0;
2273          c1 = 0;
2274          GET_UNSIGNED_NUMBER(c1);
2275          if (!ISDIGIT(c)) PATUNFETCH;
2276  
2277          if (9 < c1 && c1 >= regnum) {
2278            /* need to get octal */
2279            c = scan_oct(p0, 3, &numlen) & 0xff;
2280            p = p0 + numlen;
2281            c1 = 0;
2282            had_num_literal = 1;
2283            goto numeric_char;
2284          }
2285  
2286          laststart = b;
2287          BUFPUSH(duplicate);
2288          BUFPUSH(c1);
2289          break;
2290  
2291        case 'M':
2292        case 'C':
2293        case 'c':
2294          p0 = --p;
2295          c = read_special(p, pend, &p0);
2296          if (c > 255) goto invalid_escape;
2297          p = p0;
2298          had_num_literal = 1;
2299          goto numeric_char;
2300  
2301        default:
2302          c = read_backslash(c);
2303          goto normal_char;
2304        }
2305        break;
2306  
2307      case '#':
2308        if (options & RE_OPTION_EXTENDED) {
2309          while (p != pend) {
2310            PATFETCH(c);
2311            if (c == '\n') break;
2312          }
2313          break;
2314        }
2315        goto normal_char;
2316  
2317      case ' ':
2318      case '\t':
2319      case '\f':
2320      case '\r':
2321      case '\n':
2322        if (options & RE_OPTION_EXTENDED)
2323          break;
2324  
2325      default:
2326      normal_char:                /* Expects the character in `c'.  */
2327        had_mbchar = 0;
2328        if (ismbchar(c)) {
2329          had_mbchar = 1;
2330          c1 = p - pattern;
2331        }
2332      numeric_char:
2333        nextp = p + mbclen(c) - 1;
2334        if (!pending_exact || pending_exact + *pending_exact + 1 != b
2335            || *pending_exact >= (c1 ? 0176 : 0177)
2336            || *nextp == '+' || *nextp == '?'
2337            || *nextp == '*' || *nextp == '^'
2338            || *nextp == '{') {
2339          laststart = b;
2340          BUFPUSH(exactn);
2341          pending_exact = b;
2342          BUFPUSH(0);
2343        }
2344        if (had_num_literal || c == 0xff) {
2345          BUFPUSH(0xff);
2346          (*pending_exact)++;
2347          had_num_literal = 0;
2348        }
2349        BUFPUSH(c);
2350        (*pending_exact)++;
2351        if (had_mbchar) {
2352          int len = mbclen(c) - 1;
2353          while (len--) {
2354            PATFETCH_RAW(c);
2355            BUFPUSH(c);
2356            (*pending_exact)++;
2357          }
2358        }
2359      }
2360    }
2361  
2362    if (fixup_alt_jump)
2363      store_jump(fixup_alt_jump, jump, b);
2364  
2365    if (stackp != stackb)
2366      FREE_AND_RETURN(stackb, "unmatched (");
2367  
2368    /* set optimize flags */
2369    laststart = bufp->buffer;
2370    if (laststart != b) {
2371      if (*laststart == start_memory) laststart += 3;
2372      if (*laststart == dummy_failure_jump) laststart += 3;
2373      else if (*laststart == try_next) laststart += 3;
2374      if (*laststart == anychar_repeat) {
2375        bufp->options |= RE_OPTIMIZE_ANCHOR;
2376      }
2377    }
2378  
2379    bufp->used = b - bufp->buffer;
2380    bufp->re_nsub = regnum;
2381    laststart = bufp->buffer;
2382    if (laststart != b) {
2383      if (*laststart == start_memory) laststart += 3;
2384      if (*laststart == exactn) {
2385        bufp->options |= RE_OPTIMIZE_EXACTN;
2386        bufp->must = laststart+1;
2387      }
2388    }
2389    if (!bufp->must) {
2390      bufp->must = calculate_must_string(bufp->buffer, b);
2391    }
2392    if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
2393    else if (bufp->must) {
2394      int i;
2395      int len = (unsigned char)bufp->must[0];
2396  
2397      for (i=1; i<len; i++) {
2398        if ((unsigned char)bufp->must[i] == 0xff ||
2399            (current_mbctype && ismbchar(bufp->must[i]))) {
2400          bufp->options |= RE_OPTIMIZE_NO_BM;
2401          break;
2402        }
2403      }
2404      if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
2405        bufp->must_skip = (int *) xmalloc((1 << BYTEWIDTH)*sizeof(int));
2406        bm_init_skip(bufp->must_skip, (unsigned char*)bufp->must+1,
2407                     (unsigned char)bufp->must[0],
2408                     (unsigned char*)(MAY_TRANSLATE()?translate:0));
2409      }
2410    }
2411  
2412    bufp->regstart = TMALLOC(regnum, unsigned char*);
2413    bufp->regend = TMALLOC(regnum, unsigned char*);
2414    bufp->old_regstart = TMALLOC(regnum, unsigned char*);
2415    bufp->old_regend = TMALLOC(regnum, unsigned char*);
2416    bufp->reg_info = TMALLOC(regnum, register_info_type);
2417    bufp->best_regstart = TMALLOC(regnum, unsigned char*);
2418    bufp->best_regend = TMALLOC(regnum, unsigned char*);
2419    FREE_AND_RETURN(stackb, 0);
2420  
2421   invalid_pattern:
2422    FREE_AND_RETURN(stackb, "invalid regular expression");
2423  
2424   end_of_pattern:
2425    FREE_AND_RETURN(stackb, "premature end of regular expression");
2426  
2427   too_big:
2428    FREE_AND_RETURN(stackb, "regular expression too big");
2429  
2430   memory_exhausted:
2431    FREE_AND_RETURN(stackb, "memory exhausted");
2432  
2433   nested_meta:
2434    FREE_AND_RETURN(stackb, "nested *?+ in regexp");
2435  
2436   invalid_escape:
2437    FREE_AND_RETURN(stackb, "Invalid escape character syntax");
2438  }
2439  
2440  void
2441  re_free_pattern(bufp)
2442       struct re_pattern_buffer *bufp;
2443  {
2444    xfree(bufp->buffer);
2445    xfree(bufp->fastmap);
2446    if (bufp->must_skip) xfree(bufp->must_skip);
2447  
2448    xfree(bufp->regstart);
2449    xfree(bufp->regend);
2450    xfree(bufp->old_regstart);
2451    xfree(bufp->old_regend);
2452    xfree(bufp->best_regstart);
2453    xfree(bufp->best_regend);
2454    xfree(bufp->reg_info);
2455    xfree(bufp);
2456  }
2457  
2458  /* Store a jump of the form <OPCODE> <relative address>.
2459     Store in the location FROM a jump operation to jump to relative
2460     address FROM - TO.  OPCODE is the opcode to store.  */
2461  
2462  static void
2463  store_jump(from, opcode, to)
2464       char *from, *to;
2465       int opcode;
2466  {
2467    from[0] = (char)opcode;
2468    STORE_NUMBER(from + 1, to - (from + 3));
2469  }
2470  
2471  
2472  /* Open up space before char FROM, and insert there a jump to TO.
2473     CURRENT_END gives the end of the storage not in use, so we know 
2474     how much data to copy up. OP is the opcode of the jump to insert.
2475  
2476     If you call this function, you must zero out pending_exact.  */
2477  
2478  static void
2479  insert_jump(op, from, to, current_end)
2480       int op;
2481       char *from, *to, *current_end;
2482  {
2483    register char *pfrom = current_end;           /* Copy from here...  */
2484    register char *pto = current_end + 3;         /* ...to here.  */
2485  
2486    while (pfrom != from)                        
2487      *--pto = *--pfrom;
2488    store_jump(from, op, to);
2489  }
2490  
2491  
2492  /* Store a jump of the form <opcode> <relative address> <n> .
2493  
2494     Store in the location FROM a jump operation to jump to relative
2495     address FROM - TO.  OPCODE is the opcode to store, N is a number the
2496     jump uses, say, to decide how many times to jump.
2497  
2498     If you call this function, you must zero out pending_exact.  */
2499  
2500  static void
2501  store_jump_n(from, opcode, to, n)
2502       char *from, *to;
2503       int opcode;
2504       unsigned n;
2505  {
2506    from[0] = (char)opcode;
2507    STORE_NUMBER(from + 1, to - (from + 3));
2508    STORE_NUMBER(from + 3, n);
2509  }
2510  
2511  
2512  /* Similar to insert_jump, but handles a jump which needs an extra
2513     number to handle minimum and maximum cases.  Open up space at
2514     location FROM, and insert there a jump to TO.  CURRENT_END gives the
2515     end of the storage in use, so we know how much data to copy up. OP is
2516     the opcode of the jump to insert.
2517  
2518     If you call this function, you must zero out pending_exact.  */
2519  
2520  static void
2521  insert_jump_n(op, from, to, current_end, n)
2522       int op;
2523       char *from, *to, *current_end;
2524       unsigned n;
2525  {
2526    register char *pfrom = current_end;           /* Copy from here...  */
2527    register char *pto = current_end + 5;         /* ...to here.  */
2528  
2529    while (pfrom != from)                        
2530      *--pto = *--pfrom;
2531    store_jump_n(from, op, to, n);
2532  }
2533  
2534  
2535  /* Open up space at location THERE, and insert operation OP.
2536     CURRENT_END gives the end of the storage in use, so
2537     we know how much data to copy up.
2538  
2539     If you call this function, you must zero out pending_exact.  */
2540  
2541  static void
2542  insert_op(op, there, current_end)
2543       int op;
2544       char *there, *current_end;
2545  {
2546    register char *pfrom = current_end;           /* Copy from here...  */
2547    register char *pto = current_end + 1;         /* ...to here.  */
2548  
2549    while (pfrom != there)                               
2550      *--pto = *--pfrom;
2551  
2552    there[0] = (char)op;
2553  }
2554  
2555  
2556  /* Open up space at location THERE, and insert operation OP followed by
2557     NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
2558     we know how much data to copy up.
2559  
2560     If you call this function, you must zero out pending_exact.  */
2561  
2562  static void
2563  insert_op_2(op, there, current_end, num_1, num_2)
2564       int op;
2565       char *there, *current_end;
2566       int num_1, num_2;
2567  {
2568    register char *pfrom = current_end;           /* Copy from here...  */
2569    register char *pto = current_end + 5;         /* ...to here.  */
2570  
2571    while (pfrom != there)                               
2572      *--pto = *--pfrom;
2573  
2574    there[0] = (char)op;
2575    STORE_NUMBER(there + 1, num_1);
2576    STORE_NUMBER(there + 3, num_2);
2577  }
2578  
2579  
2580  #define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
2581  static int
2582  slow_match(little, lend, big, bend, translate)
2583       unsigned char *little, *lend;
2584       unsigned char *big, *bend;
2585       unsigned char *translate;
2586  {
2587    int c;
2588  
2589    while (little < lend && big < bend) {
2590      c = *little++;
2591      if (c == 0xff)
2592        c = *little++;
2593      if (!trans_eq(*big++, c, translate)) break;
2594    }
2595    if (little == lend) return 1;
2596    return 0;
2597  }
2598  
2599  static int
2600  slow_search(little, llen, big, blen, translate)
2601       unsigned char *little;
2602       int llen;
2603       unsigned char *big;
2604       int blen;
2605       char *translate;
2606  {
2607    unsigned char *bsave = big;
2608    unsigned char *bend = big + blen;
2609    register int c;
2610    int fescape = 0;
2611  
2612    c = *little;
2613    if (c == 0xff) {
2614      c = little[1];
2615      fescape = 1;
2616    }
2617    else if (translate && !ismbchar(c)) {
2618      c = translate[c];
2619    }
2620  
2621    while (big < bend) {
2622      /* look for first character */
2623      if (fescape) {
2624        while (big < bend) {
2625          if (*big == c) break;
2626          big++;
2627        }
2628      }
2629      else if (translate && !ismbchar(c)) {
2630        while (big < bend) {
2631          if (ismbchar(*big)) big+=mbclen(*big)-1;
2632          else if (translate[*big] == c) break;
2633          big++;
2634        }
2635      }
2636      else {
2637        while (big < bend) {
2638          if (*big == c) break;
2639          if (ismbchar(*big)) big+=mbclen(*big)-1;
2640          big++;
2641        }
2642      }
2643  
2644      if (slow_match(little, little+llen, big, bend, translate))
2645        return big - bsave;
2646  
2647      big+=mbclen(*big);
2648    }
2649    return -1;
2650  }
2651  
2652  static void
2653  bm_init_skip(skip, pat, m, translate)
2654       int *skip;
2655       unsigned char *pat;
2656       int m;
2657       const unsigned char *translate;
2658  {
2659    int j, c;
2660  
2661    for (c=0; c<256; c++) {
2662      skip[c] = m;
2663    }
2664    if (translate) {
2665      for (j=0; j<m-1; j++) {
2666        skip[translate[pat[j]]] = m-1-j;
2667      }
2668    }
2669    else {
2670      for (j=0; j<m-1; j++) {
2671        skip[pat[j]] = m-1-j;
2672      }
2673    }
2674  }
2675  
2676  static int
2677  bm_search(little, llen, big, blen, skip, translate)
2678       unsigned char *little;
2679       int llen;
2680       unsigned char *big;
2681       int blen;
2682       int *skip;
2683       unsigned char *translate;
2684  {
2685    int i, j, k;
2686  
2687    i = llen-1;
2688    if (translate) {
2689      while (i < blen) {
2690        k = i;
2691        j = llen-1;
2692        while (j >= 0 && translate[big[k]] == translate[little[j]]) {
2693          k--;
2694          j--;
2695        }
2696        if (j < 0) return k+1;
2697  
2698        i += skip[translate[big[i]]];
2699      }
2700      return -1;
2701    }
2702    while (i < blen) {
2703      k = i;
2704      j = llen-1;
2705      while (j >= 0 && big[k] == little[j]) {
2706        k--;
2707        j--;
2708      }
2709      if (j < 0) return k+1;
2710  
2711      i += skip[big[i]];
2712    }
2713    return -1;
2714  }
2715  
2716  /* Given a pattern, compute a fastmap from it.  The fastmap records
2717     which of the (1 << BYTEWIDTH) possible characters can start a string
2718     that matches the pattern.  This fastmap is used by re_search to skip
2719     quickly over totally implausible text.
2720  
2721     The caller must supply the address of a (1 << BYTEWIDTH)-byte data 
2722     area as bufp->fastmap.
2723     The other components of bufp describe the pattern to be used.  */
2724  void
2725  re_compile_fastmap(bufp)
2726       struct re_pattern_buffer *bufp;
2727  {
2728    unsigned char *pattern = (unsigned char*)bufp->buffer;
2729    int size = bufp->used;
2730    register char *fastmap = bufp->fastmap;
2731    register unsigned char *p = pattern;
2732    register unsigned char *pend = pattern + size;
2733    register int j, k;
2734    unsigned is_a_succeed_n;
2735  
2736    
2737    unsigned char *stacka[NFAILURES];
2738    unsigned char **stackb = stacka;
2739    unsigned char **stackp = stackb;
2740    unsigned char **stacke = stackb + NFAILURES;
2741    int options = bufp->options;
2742  
2743    memset(fastmap, 0, (1 << BYTEWIDTH));
2744    bufp->fastmap_accurate = 1;
2745    bufp->can_be_null = 0;
2746  
2747    while (p) {
2748      is_a_succeed_n = 0;
2749      if (p == pend) {
2750        bufp->can_be_null = 1;
2751        break;
2752      }
2753  #ifdef SWITCH_ENUM_BUG
2754      switch ((int)((enum regexpcode)*p++))
2755  #else
2756      switch ((enum regexpcode)*p++)
2757  #endif
2758        {
2759        case exactn:
2760          if (p[1] == 0xff) {
2761            if (TRANSLATE_P())
2762              fastmap[translate[p[2]]] = 2;
2763            else
2764              fastmap[p[2]] = 2;
2765            bufp->options |= RE_OPTIMIZE_BMATCH;
2766          }
2767          else if (TRANSLATE_P())
2768            fastmap[translate[p[1]]] = 1;
2769          else
2770            fastmap[p[1]] = 1;
2771          break;
2772  
2773        case begline:
2774        case begbuf:
2775        case begpos:
2776        case endbuf:
2777        case endbuf2:
2778        case wordbound:
2779        case notwordbound:
2780        case wordbeg:
2781        case wordend:
2782        case pop_and_fail:
2783        case push_dummy_failure:
2784        case start_paren:
2785        case stop_paren:
2786          continue;
2787  
2788        case casefold_on:
2789          bufp->options |= RE_MAY_IGNORECASE;
2790        case casefold_off:
2791          options ^= RE_OPTION_IGNORECASE;
2792          continue;
2793  
2794        case option_set:
2795          options = *p++;
2796          continue;
2797  
2798        case endline:
2799          if (TRANSLATE_P())
2800            fastmap[translate['\n']] = 1;
2801          else
2802            fastmap['\n'] = 1;
2803          if ((options & RE_OPTION_SINGLELINE) == 0 && bufp->can_be_null == 0)
2804            bufp->can_be_null = 2;
2805          break;
2806  
2807        case jump_n:
2808        case finalize_jump:
2809        case maybe_finalize_jump:
2810        case jump:
2811        case jump_past_alt:
2812        case dummy_failure_jump:
2813        case finalize_push:
2814        case finalize_push_n:
2815          EXTRACT_NUMBER_AND_INCR(j, p);
2816          p += j; 
2817          if (j > 0)
2818            continue;
2819          /* Jump backward reached implies we just went through
2820             the body of a loop and matched nothing.
2821             Opcode jumped to should be an on_failure_jump.
2822             Just treat it like an ordinary jump.
2823             For a * loop, it has pushed its failure point already;
2824             If so, discard that as redundant.  */
2825  
2826          if ((enum regexpcode)*p != on_failure_jump
2827              && (enum regexpcode)*p != try_next
2828              && (enum regexpcode)*p != succeed_n)
2829            continue;
2830          p++;
2831          EXTRACT_NUMBER_AND_INCR(j, p);
2832          p += j; 
2833          if (stackp != stackb && *stackp == p)
2834            stackp--;             /* pop */
2835          continue;
2836  
2837        case try_next:
2838        case start_nowidth:
2839        case stop_nowidth:
2840        case stop_backtrack:
2841          p += 2;
2842          continue;
2843  
2844        case succeed_n:
2845          is_a_succeed_n = 1;
2846          /* Get to the number of times to succeed.  */
2847          EXTRACT_NUMBER(k, p + 2);
2848          /* Increment p past the n for when k != 0.  */
2849          if (k != 0) {
2850            p += 4;
2851            continue;
2852          }
2853          /* fall through */
2854  
2855        case on_failure_jump:
2856        EXTRACT_NUMBER_AND_INCR(j, p);
2857        if (p + j < pend) {
2858          if (stackp == stacke) {
2859            EXPAND_FAIL_STACK();
2860          }
2861          *++stackp = p + j;      /* push */
2862        }
2863        else {
2864          bufp->can_be_null = 1;
2865        }
2866        if (is_a_succeed_n)
2867          EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
2868        continue;
2869  
2870        case set_number_at:
2871          p += 4;
2872          continue;
2873  
2874        case start_memory:
2875        case stop_memory:
2876          p += 2;
2877          continue;
2878  
2879        case duplicate:
2880          bufp->can_be_null = 1;
2881          if (*p >= bufp->re_nsub) break;
2882          fastmap['\n'] = 1;
2883        case anychar_repeat:
2884        case anychar:
2885          for (j = 0; j < (1 << BYTEWIDTH); j++) {
2886            if (j != '\n' || (options & RE_OPTION_MULTILINE))
2887              fastmap[j] = 1;
2888          }
2889          if (bufp->can_be_null) {
2890            FREE_AND_RETURN_VOID(stackb);
2891          }
2892          /* Don't return; check the alternative paths
2893             so we can set can_be_null if appropriate.  */
2894          if ((enum regexpcode)p[-1] == anychar_repeat) {
2895              continue;
2896          }
2897          break;
2898  
2899        case wordchar:
2900          for (j = 0; j < 0x80; j++) {
2901            if (SYNTAX(j) == Sword)
2902              fastmap[j] = 1;
2903          }
2904          switch (current_mbctype) {
2905          case MBCTYPE_ASCII:
2906            for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2907              if (SYNTAX(j) == Sword2)
2908                fastmap[j] = 1;
2909            }
2910            break;
2911          case MBCTYPE_EUC:
2912          case MBCTYPE_SJIS:
2913          case MBCTYPE_UTF8:
2914            for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2915              if (re_mbctab[j])
2916                fastmap[j] = 1;
2917            }
2918            break;
2919          }
2920          break;
2921  
2922        case notwordchar:
2923          for (j = 0; j < 0x80; j++)
2924            if (SYNTAX(j) != Sword)
2925              fastmap[j] = 1;
2926          switch (current_mbctype) {
2927          case MBCTYPE_ASCII:
2928            for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2929              if (SYNTAX(j) != Sword2)
2930                fastmap[j] = 1;
2931            }
2932            break;
2933          case MBCTYPE_EUC:
2934          case MBCTYPE_SJIS:
2935          case MBCTYPE_UTF8:
2936            for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2937              if (!re_mbctab[j])
2938                fastmap[j] = 1;
2939            }
2940            break;
2941          }
2942          break;
2943  
2944        case charset:
2945          /* NOTE: Charset for single-byte chars never contain
2946             multi-byte char.  See set_list_bits().  */
2947          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2948            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) {
2949              int tmp = TRANSLATE_P()?translate[j]:j;
2950              fastmap[tmp] = 1;
2951            }
2952          {
2953            unsigned short size;
2954            unsigned long c, beg, end;
2955  
2956            p += p[-1] + 2;
2957            size = EXTRACT_UNSIGNED(&p[-2]);
2958            for (j = 0; j < (int)size; j++) {
2959              c = EXTRACT_MBC(&p[j*8]);
2960              beg = WC2MBC1ST(c);
2961              c = EXTRACT_MBC(&p[j*8+4]);
2962              end = WC2MBC1ST(c);
2963              /* set bits for 1st bytes of multi-byte chars.  */
2964              while (beg <= end) {
2965                /* NOTE: Charset for multi-byte chars might contain
2966                   single-byte chars.  We must reject them. */
2967                if (c < 0x100) {
2968                  fastmap[beg] = 2;
2969                  bufp->options |= RE_OPTIMIZE_BMATCH;
2970                }
2971                else if (ismbchar(beg))
2972                  fastmap[beg] = 1;
2973                beg++;
2974              }
2975            }
2976          }
2977          break;
2978  
2979        case charset_not:
2980          /* S: set of all single-byte chars.
2981             M: set of all first bytes that can start multi-byte chars.
2982             s: any set of single-byte chars.
2983             m: any set of first bytes that can start multi-byte chars.
2984  
2985             We assume S+M = U.
2986             ___      _   _
2987             s+m = (S*s+M*m).  */
2988          /* Chars beyond end of map must be allowed */
2989          /* NOTE: Charset_not for single-byte chars might contain
2990             multi-byte chars.  See set_list_bits(). */
2991          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2992            if (!ismbchar(j))
2993              fastmap[j] = 1;
2994  
2995          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2996            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) {
2997              if (!ismbchar(j))
2998                fastmap[j] = 1;
2999            }
3000          {
3001            unsigned short size;
3002            unsigned long c, beg;
3003            int num_literal = 0;
3004  
3005            p += p[-1] + 2;
3006            size = EXTRACT_UNSIGNED(&p[-2]);
3007            if (size == 0) {
3008              for (j = 0x80; j < (1 << BYTEWIDTH); j++)
3009                if (ismbchar(j))
3010                  fastmap[j] = 1;
3011              break;
3012            }
3013            for (j = 0,c = 0;j < (int)size; j++) {
3014              unsigned int cc = EXTRACT_MBC(&p[j*8]);
3015              beg = WC2MBC1ST(cc);
3016              while (c <= beg) {
3017                if (ismbchar(c))
3018                  fastmap[c] = 1;
3019                c++;
3020              }
3021  
3022              cc = EXTRACT_MBC(&p[j*8+4]);
3023              if (cc < 0xff) {
3024                num_literal = 1;
3025                while (c <= cc) {
3026                  if (ismbchar(c))
3027                    fastmap[c] = 1;
3028                  c++;
3029                }
3030              }
3031              c = WC2MBC1ST(cc);
3032            }
3033  
3034            for (j = c; j < (1 << BYTEWIDTH); j++) {
3035              if (num_literal)
3036                fastmap[j] = 1;
3037              if (ismbchar(j))
3038                fastmap[j] = 1;
3039            }
3040          }
3041          break;
3042  
3043        case unused:      /* pacify gcc -Wall */
3044          break;
3045        }
3046  
3047      /* Get here means we have successfully found the possible starting
3048         characters of one path of the pattern.  We need not follow this
3049         path any farther.  Instead, look at the next alternative
3050         remembered in the stack.  */
3051      if (stackp != stackb)
3052        p = *stackp--;            /* pop */
3053      else
3054        break;
3055    }
3056    FREE_AND_RETURN_VOID(stackb);
3057  }
3058  
3059  /* adjust startpos value to the position between characters. */
3060  int
3061  re_adjust_startpos(bufp, string, size, startpos, range)
3062       struct re_pattern_buffer *bufp;
3063       const char *string;
3064       int size, startpos, range;
3065  {
3066    /* Update the fastmap now if not correct already.  */
3067    if (!bufp->fastmap_accurate) {
3068      re_compile_fastmap(bufp);
3069    }
3070  
3071    /* Adjust startpos for mbc string */
3072    if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
3073      int i = mbc_startpos(string, startpos);
3074  
3075      if (i < startpos) {
3076        if (range > 0) {
3077          startpos = i + mbclen(string[i]);
3078        }
3079        else {
3080          int len = mbclen(string[i]);
3081          if (i + len <= startpos)
3082            startpos = i + len;
3083          else
3084            startpos = i;
3085        }
3086      }
3087    }
3088    return startpos;
3089  }
3090  
3091  
3092  /* Using the compiled pattern in BUFP->buffer, first tries to match
3093     STRING, starting first at index STARTPOS, then at STARTPOS + 1, and
3094     so on.  RANGE is the number of places to try before giving up.  If
3095     RANGE is negative, it searches backwards, i.e., the starting
3096     positions tried are STARTPOS, STARTPOS - 1, etc.  STRING is of SIZE.
3097     In REGS, return the indices of STRING that matched the entire
3098     BUFP->buffer and its contained subexpressions.
3099  
3100     The value returned is the position in the strings at which the match
3101     was found, or -1 if no match was found, or -2 if error (such as
3102     failure stack overflow).  */
3103  
3104  int
3105  re_search(bufp, string, size, startpos, range, regs)
3106       struct re_pattern_buffer *bufp;
3107       const char *string;
3108       int size, startpos, range;
3109       struct re_registers *regs;
3110  {
3111    register char *fastmap = bufp->fastmap;
3112    int val, anchor = 0;
3113  
3114    /* Check for out-of-range starting position.  */
3115    if (startpos < 0  ||  startpos > size)
3116      return -1;
3117  
3118    /* Update the fastmap now if not correct already.  */
3119    if (fastmap && !bufp->fastmap_accurate) {
3120      re_compile_fastmap(bufp);
3121    }
3122  
3123  
3124    /* If the search isn't to be a backwards one, don't waste time in a
3125       search for a pattern that must be anchored.  */
3126    if (bufp->used > 0) {
3127      switch ((enum regexpcode)bufp->buffer[0]) {
3128      case begbuf:
3129      begbuf_match:
3130        if (range > 0) {
3131          if (startpos > 0) return -1;
3132          else {
3133            val = re_match(bufp, string, size, 0, regs);
3134            if (val >= 0) return 0;
3135            return val;
3136          }
3137        }
3138        break;
3139  
3140      case begline:
3141        anchor = 1;
3142        break;
3143  
3144      case begpos:
3145        val = re_match(bufp, string, size, startpos, regs);
3146        if (val >= 0) return startpos;
3147        return val;
3148  
3149      default:
3150        break;
3151      }
3152    }
3153    if (bufp->options & RE_OPTIMIZE_ANCHOR) {
3154      if (bufp->options&RE_OPTION_SINGLELINE) {
3155        goto begbuf_match;
3156      }
3157      anchor = 1;
3158    }
3159  
3160    if (bufp->must) {
3161      int len = ((unsigned char*)bufp->must)[0];
3162      int pos, pbeg, pend;
3163  
3164      pbeg = startpos;
3165      pend = startpos + range;
3166      if (pbeg > pend) {          /* swap pbeg,pend */
3167        pos = pend; pend = pbeg; pbeg = pos;
3168      }
3169      pend = size;
3170      if (bufp->options & RE_OPTIMIZE_NO_BM) {
3171        pos = slow_search(bufp->must+1, len,
3172                          string+pbeg, pend-pbeg,
3173                          MAY_TRANSLATE()?translate:0);
3174      }
3175      else {
3176        pos = bm_search(bufp->must+1, len,
3177                        string+pbeg, pend-pbeg,
3178                        bufp->must_skip,
3179                        MAY_TRANSLATE()?translate:0);
3180      }
3181      if (pos == -1) return -1;
3182      if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) {
3183        startpos += pos;
3184        range -= pos;
3185        if (range < 0) return -1;
3186      }
3187    }
3188  
3189    for (;;) {
3190      /* If a fastmap is supplied, skip quickly over characters that
3191         cannot possibly be the start of a match.  Note, however, that
3192         if the pattern can possibly match the null string, we must
3193         test it at each starting point so that we take the first null
3194         string we get.  */
3195  
3196      if (fastmap && startpos < size
3197          && bufp->can_be_null != 1 && !(anchor && startpos == 0)) {
3198        if (range > 0) {  /* Searching forwards.  */
3199          register unsigned char *p, c;
3200          int irange = range;
3201  
3202          p = (unsigned char*)string+startpos;
3203  
3204          while (range > 0) {
3205            c = *p++;
3206            if (ismbchar(c)) {
3207              int len;
3208  
3209              if (fastmap[c])
3210                break;
3211              len = mbclen(c) - 1;
3212              while (len--) {
3213                c = *p++;
3214                range--;
3215                if (fastmap[c] == 2)
3216                  goto startpos_adjust;
3217              }
3218            }
3219            else {
3220              if (fastmap[MAY_TRANSLATE() ? translate[c] : c])
3221                break;
3222            }
3223            range--;
3224          }
3225        startpos_adjust:
3226          startpos += irange - range;
3227        }
3228        else {                    /* Searching backwards.  */
3229          register unsigned char c;
3230  
3231          c = string[startpos];
3232          c &= 0xff;
3233          if (MAY_TRANSLATE() ? !fastmap[translate[c]] : !fastmap[c])
3234            goto advance;
3235        }
3236      }
3237  
3238      if (startpos > size) return -1;
3239      if ((anchor || !bufp->can_be_null) && range > 0 && size > 0 && startpos == size)
3240        return -1;
3241      val = re_match(bufp, string, size, startpos, regs);
3242      if (val >= 0) return startpos;
3243      if (val == -2) return -2;
3244  
3245  #ifndef NO_ALLOCA
3246  #ifdef C_ALLOCA
3247      alloca(0);
3248  #endif /* C_ALLOCA */
3249  #endif /* NO_ALLOCA */
3250  
3251      if (range > 0) {
3252        if (anchor && startpos < size &&
3253            (startpos < 1 || string[startpos-1] != '\n')) {
3254          while (range > 0 && string[startpos] != '\n') {
3255            range--;
3256            startpos++;
3257          }
3258        }
3259      }
3260  
3261    advance:
3262      if (!range) 
3263        break;
3264      else if (range > 0) {
3265        const char *d = string + startpos;
3266  
3267        if (ismbchar(*d)) {
3268          int len = mbclen(*d) - 1;
3269          range-=len, startpos+=len;
3270          if (!range)
3271            break;
3272        }
3273        range--, startpos++;
3274      }
3275      else {
3276        range++, startpos--;
3277        {
3278          const char *s, *d, *p;
3279  
3280          s = string; d = string + startpos;
3281          for (p = d; p-- > s && ismbchar(*p); )
3282            /* --p >= s would not work on 80[12]?86. 
3283               (when the offset of s equals 0 other than huge model.)  */
3284            ;
3285          if (!((d - p) & 1)) {
3286            if (!range)
3287              break;
3288            range++, startpos--;
3289          }
3290        }
3291      }
3292    }
3293    return -1;
3294  }
3295  
3296  
3297  
3298  
3299  /* The following are used for re_match, defined below:  */
3300  
3301  /* Accessing macros used in re_match: */
3302  
3303  #define IS_ACTIVE(R)  ((R).bits.is_active)
3304  #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3305  
3306  
3307  /* Macros used by re_match:  */
3308  
3309  /* I.e., regstart, regend, and reg_info.  */
3310  #define NUM_REG_ITEMS  3
3311  
3312  /* I.e., ptr and count.  */
3313  #define NUM_COUNT_ITEMS 2
3314  
3315  /* Individual items aside from the registers.  */
3316  #define NUM_NONREG_ITEMS 4
3317  
3318  /* We push at most this many things on the stack whenever we
3319     fail.  The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
3320     arguments to the PUSH_FAILURE_POINT macro.  */
3321  #define MAX_NUM_FAILURE_ITEMS   (num_regs * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
3322  
3323  /* We push this many things on the stack whenever we fail.  */
3324  #define NUM_FAILURE_ITEMS  (last_used_reg * NUM_REG_ITEMS + NUM_NONREG_ITEMS + 1)
3325  
3326  /* This pushes counter information for succeed_n and jump_n */
3327  #define PUSH_FAILURE_COUNT(ptr)                                         \
3328    do {                                                                  \
3329      int c;                                                              \
3330      EXTRACT_NUMBER(c, ptr);                                             \
3331      ENSURE_FAIL_STACK(NUM_COUNT_ITEMS);                                 \
3332      *stackp++ = (unsigned char*)(long)c;                                \
3333      *stackp++ = (ptr);                                                  \
3334      num_failure_counts++;                                               \
3335    } while (0)
3336  
3337  /* This pushes most of the information about the current state we will want
3338     if we ever fail back to it.  */
3339  
3340  #define PUSH_FAILURE_POINT(pattern_place, string_place)                 \
3341    do {                                                                  \
3342      long last_used_reg, this_reg;                                       \
3343                                                                          \
3344      /* Find out how many registers are active or have been matched.     \
3345         (Aside from register zero, which is only set at the end.) */     \
3346      for (last_used_reg = num_regs-1; last_used_reg > 0; last_used_reg--)\
3347        if (!REG_UNSET(regstart[last_used_reg]))                          \
3348          break;                                                          \
3349                                                                          \
3350      ENSURE_FAIL_STACK(NUM_FAILURE_ITEMS);                               \
3351      *stackp++ = (unsigned char*)(long)num_failure_counts;               \
3352      num_failure_counts = 0;                                             \
3353                                                                          \
3354      /* Now push the info for each of those registers.  */               \
3355      for (this_reg = 1; this_reg <= last_used_reg; this_reg++) {         \
3356        *stackp++ = regstart[this_reg];                                   \
3357        *stackp++ = regend[this_reg];                                     \
3358        *stackp++ = reg_info[this_reg].word;                              \
3359      }                                                                   \
3360                                                                          \
3361      /* Push how many registers we saved.  */                            \
3362      *stackp++ = (unsigned char*)last_used_reg;                          \
3363                                                                          \
3364      *stackp++ = pattern_place;                                          \
3365      *stackp++ = string_place;                                           \
3366      *stackp++ = (unsigned char*)(long)options; /* current option status */      \
3367      *stackp++ = (unsigned char*)0; /* non-greedy flag */                \
3368    } while(0)
3369  
3370  #define NON_GREEDY ((unsigned char*)1)
3371  
3372  #define POP_FAILURE_COUNT()                                             \
3373    do {                                                                  \
3374      unsigned char *ptr = *--stackp;                                     \
3375      int count = (long)*--stackp;                                        \
3376      STORE_NUMBER(ptr, count);                                           \
3377    } while (0)
3378  
3379  /* This pops what PUSH_FAILURE_POINT pushes.  */
3380  
3381  #define POP_FAILURE_POINT()                                             \
3382    do {                                                                  \
3383      long temp;                                                          \
3384      stackp -= NUM_NONREG_ITEMS; /* Remove failure points (and flag). */ \
3385      temp = (long)*--stackp;     /* How many regs pushed.  */            \
3386      temp *= NUM_REG_ITEMS;      /* How much to take off the stack.  */  \
3387      stackp -= temp;             /* Remove the register info.  */        \
3388      temp = (long)*--stackp;     /* How many counters pushed.  */        \
3389      while (temp--) {                                                    \
3390        POP_FAILURE_COUNT();      /* Remove the counter info.  */         \
3391      }                                                                   \
3392      num_failure_counts = 0;     /* Reset num_failure_counts.  */        \
3393    } while(0)
3394  
3395       /* Registers are set to a sentinel when they haven't yet matched.  */
3396  #define REG_UNSET_VALUE ((unsigned char*)-1)
3397  #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3398  
3399  #define PREFETCH if (d == dend) goto fail
3400  
3401       /* Call this when have matched something; it sets `matched' flags for the
3402     registers corresponding to the subexpressions of which we currently
3403     are inside.  */
3404  #define SET_REGS_MATCHED                                                \
3405    do { unsigned this_reg;                                               \
3406      for (this_reg = 0; this_reg < num_regs; this_reg++) {               \
3407          if (IS_ACTIVE(reg_info[this_reg]))                              \
3408            MATCHED_SOMETHING(reg_info[this_reg]) = 1;                    \
3409          else                                                            \
3410            MATCHED_SOMETHING(reg_info[this_reg]) = 0;                    \
3411        }                                                                 \
3412    } while(0)
3413  
3414  #define AT_STRINGS_BEG(d)  ((d) == string)
3415  #define AT_STRINGS_END(d)  ((d) == dend)
3416  
3417  #define IS_A_LETTER(d) (SYNTAX(*(d)) == Sword ||                        \
3418                          (current_mbctype ?                              \
3419                           (re_mbctab[*(d)] && ((d)+mbclen(*(d)))<=dend): \
3420                           SYNTAX(*(d)) == Sword2))
3421  
3422  #define PREV_IS_A_LETTER(d) ((current_mbctype == MBCTYPE_SJIS)?         \
3423                               IS_A_LETTER((d)-(!AT_STRINGS_BEG((d)-1)&&  \
3424                                                ismbchar((d)[-2])?2:1)):  \
3425                               ((current_mbctype && ((d)[-1] >= 0x80)) || \
3426                                IS_A_LETTER((d)-1)))
3427  
3428  static void
3429  init_regs(regs, num_regs)
3430       struct re_registers *regs;
3431       unsigned int num_regs;
3432  {
3433    int i;
3434  
3435    regs->num_regs = num_regs;
3436    if (num_regs < RE_NREGS)
3437      num_regs = RE_NREGS;
3438  
3439    if (regs->allocated == 0) {
3440      regs->beg = TMALLOC(num_regs, int);
3441      regs->end = TMALLOC(num_regs, int);
3442      regs->allocated = num_regs;
3443    }
3444    else if (regs->allocated < num_regs) {
3445      TREALLOC(regs->beg, num_regs, int);
3446      TREALLOC(regs->end, num_regs, int);
3447      regs->allocated = num_regs;
3448    }
3449    for (i=0; i<num_regs; i++) {
3450      regs->beg[i] = regs->end[i] = -1;
3451    }
3452  }
3453  
3454  /* Match the pattern described by BUFP against STRING, which is of
3455     SIZE.  Start the match at index POS in STRING.  In REGS, return the
3456     indices of STRING that matched the entire BUFP->buffer and its
3457     contained subexpressions.
3458  
3459     If bufp->fastmap is nonzero, then it had better be up to date.
3460  
3461     The reason that the data to match are specified as two components
3462     which are to be regarded as concatenated is so this function can be
3463     used directly on the contents of an Emacs buffer.
3464  
3465     -1 is returned if there is no match.  -2 is returned if there is an
3466     error (such as match stack overflow).  Otherwise the value is the
3467     length of the substring which was matched.  */
3468  
3469  int
3470  re_match(bufp, string_arg, size, pos, regs)
3471       struct re_pattern_buffer *bufp;
3472       const char *string_arg;
3473       int size, pos;
3474       struct re_registers *regs;
3475  {
3476    register unsigned char *p = (unsigned char*)bufp->buffer;
3477    unsigned char *p1;
3478  
3479    /* Pointer to beyond end of buffer.  */
3480    register unsigned char *pend = p + bufp->used;
3481  
3482    unsigned num_regs = bufp->re_nsub;
3483  
3484    unsigned char *string = (unsigned char*)string_arg;
3485  
3486    register unsigned char *d, *dend;
3487    register int mcnt;                    /* Multipurpose.  */
3488    int options = bufp->options;
3489  
3490    /* Failure point stack.  Each place that can handle a failure further
3491       down the line pushes a failure point on this stack.  It consists of
3492       restart, regend, and reg_info for all registers corresponding to the
3493       subexpressions we're currently inside, plus the number of such
3494       registers, and, finally, two char *'s.  The first char * is where to
3495       resume scanning the pattern; the second one is where to resume
3496       scanning the strings.  If the latter is zero, the failure point is a
3497       ``dummy''; if a failure happens and the failure point is a dummy, it
3498       gets discarded and the next next one is tried.  */
3499  
3500    unsigned char **stacka;
3501    unsigned char **stackb;
3502    unsigned char **stackp;
3503    unsigned char **stacke;
3504  
3505    /* Information on the contents of registers. These are pointers into
3506       the input strings; they record just what was matched (on this
3507       attempt) by a subexpression part of the pattern, that is, the
3508       regnum-th regstart pointer points to where in the pattern we began
3509       matching and the regnum-th regend points to right after where we
3510       stopped matching the regnum-th subexpression.  (The zeroth register
3511       keeps track of what the whole pattern matches.)  */
3512  
3513    unsigned char **regstart = bufp->regstart;
3514    unsigned char **regend = bufp->regend;
3515  
3516    /* If a group that's operated upon by a repetition operator fails to
3517       match anything, then the register for its start will need to be
3518       restored because it will have been set to wherever in the string we
3519       are when we last see its open-group operator.  Similarly for a
3520       register's end.  */
3521    unsigned char **old_regstart = bufp->old_regstart;
3522    unsigned char **old_regend = bufp->old_regend;
3523  
3524    /* The is_active field of reg_info helps us keep track of which (possibly
3525       nested) subexpressions we are currently in. The matched_something
3526       field of reg_info[reg_num] helps us tell whether or not we have
3527       matched any of the pattern so far this time through the reg_num-th
3528       subexpression.  These two fields get reset each time through any
3529       loop their register is in.  */
3530  
3531    register_info_type *reg_info = bufp->reg_info;
3532  
3533    /* The following record the register info as found in the above
3534       variables when we find a match better than any we've seen before. 
3535       This happens as we backtrack through the failure points, which in
3536       turn happens only if we have not yet matched the entire string.  */
3537  
3538    unsigned best_regs_set = 0;
3539    unsigned char **best_regstart = bufp->best_regstart;
3540    unsigned char **best_regend = bufp->best_regend;
3541  
3542    int num_failure_counts = 0;
3543  
3544    if (regs) {
3545      init_regs(regs, num_regs);
3546    }
3547  
3548    /* Initialize the stack. */
3549    stacka = RE_TALLOC(MAX_NUM_FAILURE_ITEMS * NFAILURES, unsigned char*);
3550    stackb = stacka;
3551    stackp = stackb;
3552    stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
3553  
3554  #ifdef DEBUG_REGEX
3555    fprintf(stderr, "Entering re_match(%s)\n", string_arg);
3556  #endif
3557  
3558    /* Initialize subexpression text positions to -1 to mark ones that no
3559       ( or ( and ) or ) has been seen for. Also set all registers to
3560       inactive and mark them as not having matched anything or ever
3561       failed. */
3562    for (mcnt = 0; mcnt < num_regs; mcnt++) {
3563      regstart[mcnt] = regend[mcnt]
3564        = old_regstart[mcnt] = old_regend[mcnt]
3565        = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
3566  #ifdef __CHECKER__
3567      reg_info[mcnt].word = 0;
3568  #endif
3569      IS_ACTIVE (reg_info[mcnt]) = 0;
3570      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3571    }
3572  
3573    /* Set up pointers to ends of strings.
3574       Don't allow the second string to be empty unless both are empty.  */
3575  
3576  
3577    /* `p' scans through the pattern as `d' scans through the data. `dend'
3578       is the end of the input string that `d' points within. `d' is
3579       advanced into the following input string whenever necessary, but
3580       this happens before fetching; therefore, at the beginning of the
3581       loop, `d' can be pointing at the end of a string, but it cannot
3582       equal string2.  */
3583  
3584    d = string + pos, dend = string + size;
3585  
3586    /* This loops over pattern commands.  It exits by returning from the
3587       function if match is complete, or it drops through if match fails
3588       at this starting point in the input data.  */
3589  
3590    for (;;) {
3591  #ifdef DEBUG_REGEX
3592      fprintf(stderr,
3593              "regex loop(%d):  matching 0x%02d\n",
3594              p - (unsigned char*)bufp->buffer,
3595              *p);
3596  #endif
3597      /* End of pattern means we might have succeeded.  */
3598      if (p == pend) {
3599        /* If not end of string, try backtracking.  Otherwise done.  */
3600        if ((bufp->options & RE_OPTION_LONGEST) && d != dend) {
3601          if (best_regs_set) /* non-greedy, no need to backtrack */
3602            goto restore_best_regs;
3603          while (stackp != stackb && stackp[-1] == NON_GREEDY) {
3604            if (best_regs_set) /* non-greedy, no need to backtrack */
3605              goto restore_best_regs;
3606            POP_FAILURE_POINT();
3607          }
3608          if (stackp != stackb) {
3609            /* More failure points to try.  */
3610  
3611            /* If exceeds best match so far, save it.  */
3612            if (! best_regs_set || (d > best_regend[0])) {
3613              best_regs_set = 1;
3614              best_regend[0] = d; /* Never use regstart[0].  */
3615  
3616              for (mcnt = 1; mcnt < num_regs; mcnt++) {
3617                best_regstart[mcnt] = regstart[mcnt];
3618                best_regend[mcnt] = regend[mcnt];
3619              }
3620            }
3621            goto fail;           
3622          }
3623          /* If no failure points, don't restore garbage.  */
3624          else if (best_regs_set) {
3625          restore_best_regs:
3626            /* Restore best match.  */
3627            d = best_regend[0];
3628  
3629            for (mcnt = 0; mcnt < num_regs; mcnt++) {
3630              regstart[mcnt] = best_regstart[mcnt];
3631              regend[mcnt] = best_regend[mcnt];
3632            }
3633          }
3634        }
3635  
3636        /* If caller wants register contents data back, convert it 
3637           to indices.  */
3638        if (regs) {
3639          regs->beg[0] = pos;
3640          regs->end[0] = d - string;
3641          for (mcnt = 1; mcnt < num_regs; mcnt++) {
3642            if (REG_UNSET(regend[mcnt])) {
3643              regs->beg[mcnt] = -1;
3644              regs->end[mcnt] = -1;
3645              continue;
3646            }
3647            regs->beg[mcnt] = regstart[mcnt] - string;
3648            regs->end[mcnt] = regend[mcnt] - string;
3649          }
3650        }
3651        FREE_AND_RETURN(stackb, (d - pos - string));
3652      }
3653  
3654      /* Otherwise match next pattern command.  */
3655  #ifdef SWITCH_ENUM_BUG
3656      switch ((int)((enum regexpcode)*p++))
3657  #else
3658      switch ((enum regexpcode)*p++)
3659  #endif
3660        {
3661          /* ( [or `(', as appropriate] is represented by start_memory,
3662             ) by stop_memory.  Both of those commands are followed by
3663             a register number in the next byte.  The text matched
3664             within the ( and ) is recorded under that number.  */
3665        case start_memory:
3666          old_regstart[*p] = regstart[*p];
3667          regstart[*p] = d;
3668          IS_ACTIVE(reg_info[*p]) = 1;
3669          MATCHED_SOMETHING(reg_info[*p]) = 0;
3670          p += 2;
3671          continue;
3672  
3673        case stop_memory:
3674          old_regend[*p] = regend[*p];
3675          regend[*p] = d;
3676          IS_ACTIVE(reg_info[*p]) = 0;
3677          p += 2;
3678          continue;
3679  
3680        case start_paren:
3681        case stop_paren:
3682          break;
3683  
3684          /* \<digit> has been turned into a `duplicate' command which is
3685             followed by the numeric value of <digit> as the register number.  */
3686        case duplicate:
3687          {
3688            int regno = *p++;   /* Get which register to match against */
3689            register unsigned char *d2, *dend2;
3690  
3691            /* Check if there's corresponding group */
3692            if (regno >= num_regs) goto fail;
3693            /* Check if corresponding group is still open */
3694            if (IS_ACTIVE(reg_info[regno])) goto fail;
3695  
3696            /* Where in input to try to start matching.  */
3697            d2 = regstart[regno];
3698            if (REG_UNSET(d2)) goto fail;
3699  
3700            /* Where to stop matching; if both the place to start and
3701               the place to stop matching are in the same string, then
3702               set to the place to stop, otherwise, for now have to use
3703               the end of the first string.  */
3704  
3705            dend2 = regend[regno];
3706            if (REG_UNSET(dend2)) goto fail;
3707            for (;;) {
3708              /* At end of register contents => success */
3709              if (d2 == dend2) break;
3710  
3711              /* If necessary, advance to next segment in data.  */
3712              PREFETCH;
3713  
3714              /* How many characters left in this segment to match.  */
3715              mcnt = dend - d;
3716  
3717              /* Want how many consecutive characters we can match in
3718                 one shot, so, if necessary, adjust the count.  */
3719              if (mcnt > dend2 - d2)
3720                mcnt = dend2 - d2;
3721  
3722              /* Compare that many; failure if mismatch, else move
3723                 past them.  */
3724              if ((options & RE_OPTION_IGNORECASE) 
3725                  ? memcmp_translate(d, d2, mcnt) 
3726                  : memcmp((char*)d, (char*)d2, mcnt))
3727                goto fail;
3728              d += mcnt, d2 += mcnt;
3729            }
3730          }
3731          break;
3732  
3733        case start_nowidth:
3734          PUSH_FAILURE_POINT(0, d);
3735          if (stackp - stackb > RE_DUP_MAX) {
3736             FREE_AND_RETURN(stackb,(-2));
3737          }
3738          EXTRACT_NUMBER_AND_INCR(mcnt, p);
3739          STORE_NUMBER(p+mcnt, stackp - stackb);
3740          continue;
3741  
3742        case stop_nowidth:
3743          EXTRACT_NUMBER_AND_INCR(mcnt, p);
3744          stackp = stackb + mcnt;
3745          d = stackp[-3];
3746          POP_FAILURE_POINT();
3747          continue;
3748  
3749        case stop_backtrack:
3750          EXTRACT_NUMBER_AND_INCR(mcnt, p);
3751          stackp = stackb + mcnt;
3752          POP_FAILURE_POINT();
3753          continue;
3754  
3755        case pop_and_fail:
3756          EXTRACT_NUMBER(mcnt, p+1);
3757          stackp = stackb + mcnt;
3758          POP_FAILURE_POINT();
3759          goto fail;
3760  
3761        case anychar:
3762          PREFETCH;
3763          if (ismbchar(*d)) {
3764            if (d + mbclen(*d) > dend)
3765              goto fail;
3766            SET_REGS_MATCHED;
3767            d += mbclen(*d);
3768            break;
3769          }
3770          if (!(options&RE_OPTION_MULTILINE)
3771              && (TRANSLATE_P() ? translate[*d] : *d) == '\n')
3772            goto fail;
3773          SET_REGS_MATCHED;
3774          d++;
3775          break;
3776  
3777        case anychar_repeat:
3778          for (;;) {
3779            PUSH_FAILURE_POINT(p, d);
3780            PREFETCH;
3781            if (ismbchar(*d)) {
3782              if (d + mbclen(*d) > dend)
3783                goto fail;
3784              SET_REGS_MATCHED;
3785              d += mbclen(*d);
3786              continue;
3787            }
3788            if (!(options&RE_OPTION_MULTILINE) &&
3789                (TRANSLATE_P() ? translate[*d] : *d) == '\n')
3790              goto fail;
3791            SET_REGS_MATCHED;
3792            d++;
3793          }
3794          break;
3795  
3796        case charset:
3797        case charset_not:
3798          {
3799            int not;          /* Nonzero for charset_not.  */
3800            int part = 0;     /* true if matched part of mbc */
3801            unsigned char *dsave = d + 1;
3802            int cc, c;
3803  
3804            PREFETCH;
3805            cc = c = (unsigned char)*d++;
3806            if (ismbchar(c)) {
3807              if (d + mbclen(c) - 1 <= dend) {
3808                MBC2WC(c, d);
3809              }
3810            }
3811            else if (TRANSLATE_P())
3812              cc = c = (unsigned char)translate[c];
3813  
3814            not = is_in_list(c, p);
3815            if (!not && cc != c) {
3816                part = not = is_in_list(cc, p);
3817            }
3818            if (*(p - 1) == (unsigned char)charset_not) {
3819              not = !not;
3820            }
3821            if (!not) goto fail;
3822  
3823            p += 1 + *p + 2 + EXTRACT_UNSIGNED(&p[1 + *p])*8;
3824            SET_REGS_MATCHED;
3825  
3826            if (part) d = dsave;
3827            break;
3828          }
3829  
3830        case begline:
3831          if (size == 0 || AT_STRINGS_BEG(d))
3832            break;
3833          if (d[-1] == '\n' && !AT_STRINGS_END(d))
3834            break;
3835          goto fail;
3836  
3837        case endline:
3838          if (AT_STRINGS_END(d)) {
3839            if (size == 0 || d[-1] != '\n')
3840              break;
3841          }
3842          else if (*d == '\n')
3843            break;
3844          goto fail;
3845  
3846          /* Match at the very beginning of the string. */
3847        case begbuf:
3848          if (AT_STRINGS_BEG(d))
3849            break;
3850          goto fail;
3851  
3852          /* Match at the very end of the data. */
3853        case endbuf:
3854          if (AT_STRINGS_END(d))
3855            break;
3856          goto fail;
3857  
3858          /* Match at the very end of the data. */
3859        case endbuf2:
3860          if (AT_STRINGS_END(d)) {
3861            if (size == 0 || d[-1] != '\n')
3862              break;
3863          }
3864          /* .. or newline just before the end of the data. */
3865          if (*d == '\n' && AT_STRINGS_END(d+1))
3866            break;
3867          goto fail;
3868  
3869          /* `or' constructs are handled by starting each alternative with
3870             an on_failure_jump that points to the start of the next
3871             alternative.  Each alternative except the last ends with a
3872             jump to the joining point.  (Actually, each jump except for
3873             the last one really jumps to the following jump, because
3874             tensioning the jumps is a hassle.)  */
3875  
3876          /* The start of a stupid repeat has an on_failure_jump that points
3877             past the end of the repeat text. This makes a failure point so 
3878             that on failure to match a repetition, matching restarts past
3879             as many repetitions have been found with no way to fail and
3880             look for another one.  */
3881  
3882          /* A smart repeat is similar but loops back to the on_failure_jump
3883             so that each repetition makes another failure point.  */
3884  
3885          /* Match at the starting position. */
3886        case begpos:
3887          if (d - string == pos)
3888            break;
3889          goto fail;
3890  
3891        case on_failure_jump:
3892        on_failure:
3893        EXTRACT_NUMBER_AND_INCR(mcnt, p);
3894        PUSH_FAILURE_POINT(p + mcnt, d);
3895        continue;
3896  
3897        /* The end of a smart repeat has a maybe_finalize_jump back.
3898           Change it either to a finalize_jump or an ordinary jump.  */
3899        case maybe_finalize_jump:
3900          EXTRACT_NUMBER_AND_INCR(mcnt, p);
3901          p1 = p;
3902  
3903          /* Compare the beginning of the repeat with what in the
3904             pattern follows its end. If we can establish that there
3905             is nothing that they would both match, i.e., that we
3906             would have to backtrack because of (as in, e.g., `a*a')
3907             then we can change to finalize_jump, because we'll
3908             never have to backtrack.
3909  
3910             This is not true in the case of alternatives: in
3911             `(a|ab)*' we do need to backtrack to the `ab' alternative
3912             (e.g., if the string was `ab').  But instead of trying to
3913             detect that here, the alternative has put on a dummy
3914             failure point which is what we will end up popping.  */
3915  
3916          /* Skip over open/close-group commands.  */
3917          while (p1 + 2 < pend) {
3918            if ((enum regexpcode)*p1 == stop_memory ||
3919                (enum regexpcode)*p1 == start_memory)
3920              p1 += 3;    /* Skip over args, too.  */
3921            else if (/*(enum regexpcode)*p1 == start_paren ||*/
3922                     (enum regexpcode)*p1 == stop_paren)
3923                p1 += 1;
3924            else
3925              break;
3926          }
3927  
3928          if (p1 == pend)
3929            p[-3] = (unsigned char)finalize_jump;
3930          else if (*p1 == (unsigned char)exactn ||
3931                   *p1 == (unsigned char)endline) {
3932            register int c = *p1 == (unsigned char)endline ? '\n' : p1[2];
3933            register unsigned char *p2 = p + mcnt;
3934              /* p2[0] ... p2[2] are an on_failure_jump.
3935                 Examine what follows that.  */
3936            if (p2[3] == (unsigned char)exactn && p2[5] != c)
3937              p[-3] = (unsigned char)finalize_jump;
3938            else if (p2[3] == (unsigned char)charset ||
3939                     p2[3] == (unsigned char)charset_not) {
3940              int not;
3941              if (ismbchar(c)) {
3942                unsigned char *pp = p1+3;
3943                MBC2WC(c, pp);
3944              }
3945              /* `is_in_list()' is TRUE if c would match */
3946              /* That means it is not safe to finalize.  */
3947              not = is_in_list(c, p2 + 4);
3948              if (p2[3] == (unsigned char)charset_not)
3949                not = !not;
3950              if (!not)
3951                p[-3] = (unsigned char)finalize_jump;
3952            }
3953          }
3954          p -= 2;         /* Point at relative address again.  */
3955          if (p[-1] != (unsigned char)finalize_jump) {
3956            p[-1] = (unsigned char)jump;  
3957            goto nofinalize;
3958          }
3959          /* Note fall through.  */
3960  
3961          /* The end of a stupid repeat has a finalize_jump back to the
3962             start, where another failure point will be made which will
3963             point to after all the repetitions found so far.  */
3964  
3965          /* Take off failure points put on by matching on_failure_jump 
3966             because didn't fail.  Also remove the register information
3967             put on by the on_failure_jump.  */
3968        case finalize_jump:
3969          if (stackp > stackb && stackp[-3] == d) {
3970            p = stackp[-4];
3971            POP_FAILURE_POINT();
3972            continue;
3973          }
3974          POP_FAILURE_POINT(); 
3975          /* Note fall through.  */
3976  
3977        /* We need this opcode so we can detect where alternatives end
3978           in `group_match_null_string_p' et al.  */
3979        case jump_past_alt:
3980          /* fall through */
3981  
3982          /* Jump without taking off any failure points.  */
3983        case jump:
3984        nofinalize:
3985          EXTRACT_NUMBER_AND_INCR(mcnt, p);
3986          if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
3987             goto fail;
3988          p += mcnt;
3989          continue;
3990  
3991        case dummy_failure_jump:
3992          /* Normally, the on_failure_jump pushes a failure point, which
3993             then gets popped at finalize_jump.  We will end up at
3994             finalize_jump, also, and with a pattern of, say, `a+', we
3995             are skipping over the on_failure_jump, so we have to push
3996             something meaningless for finalize_jump to pop.  */
3997          PUSH_FAILURE_POINT(0, 0);
3998          goto nofinalize;
3999  
4000          /* At the end of an alternative, we need to push a dummy failure
4001             point in case we are followed by a `finalize_jump', because
4002             we don't want the failure point for the alternative to be
4003             popped.  For example, matching `(a|ab)*' against `aab'
4004             requires that we match the `ab' alternative.  */
4005        case push_dummy_failure:
4006          /* See comments just above at `dummy_failure_jump' about the
4007             two zeroes.  */
4008          p1 = p;
4009          /* Skip over open/close-group commands.  */
4010          while (p1 + 2 < pend) {
4011            if ((enum regexpcode)*p1 == stop_memory ||
4012                (enum regexpcode)*p1 == start_memory)
4013              p1 += 3;    /* Skip over args, too.  */
4014            else if (/*(enum regexpcode)*p1 == start_paren ||*/
4015                     (enum regexpcode)*p1 == stop_paren)
4016                p1 += 1;
4017            else
4018              break;
4019          }
4020          if ((enum regexpcode)*p1 == jump)
4021            p[-1] = unused;
4022          else
4023            PUSH_FAILURE_POINT(0, 0);
4024          break;
4025  
4026          /* Have to succeed matching what follows at least n times.  Then
4027             just handle like an on_failure_jump.  */
4028        case succeed_n: 
4029          EXTRACT_NUMBER(mcnt, p + 2);
4030          /* Originally, this is how many times we HAVE to succeed.  */
4031          if (mcnt != 0) {
4032            mcnt--;
4033            p += 2;
4034            PUSH_FAILURE_COUNT(p);
4035            STORE_NUMBER_AND_INCR(p, mcnt);
4036            PUSH_FAILURE_POINT(0, 0);
4037          }
4038          else  {
4039            goto on_failure;
4040          }
4041          continue;
4042  
4043        case jump_n:
4044          EXTRACT_NUMBER(mcnt, p + 2);
4045          /* Originally, this is how many times we CAN jump.  */
4046          if (mcnt) {
4047            mcnt--;
4048            PUSH_FAILURE_COUNT(p + 2);
4049            STORE_NUMBER(p + 2, mcnt);
4050            goto nofinalize;           /* Do the jump without taking off
4051                                          any failure points.  */
4052          }
4053          /* If don't have to jump any more, skip over the rest of command.  */
4054          else      
4055            p += 4;                    
4056          continue;
4057  
4058        case set_number_at:
4059          EXTRACT_NUMBER_AND_INCR(mcnt, p);
4060          p1 = p + mcnt;
4061          EXTRACT_NUMBER_AND_INCR(mcnt, p);
4062          STORE_NUMBER(p1, mcnt);
4063          continue;
4064  
4065        case try_next:
4066          EXTRACT_NUMBER_AND_INCR(mcnt, p);
4067          if (p + mcnt < pend) {
4068            PUSH_FAILURE_POINT(p, d);
4069            stackp[-1] = NON_GREEDY;
4070          }
4071          p += mcnt;
4072          continue;
4073  
4074        case finalize_push:
4075          POP_FAILURE_POINT();
4076          EXTRACT_NUMBER_AND_INCR(mcnt, p);
4077          if (mcnt < 0 && stackp > stackb  && stackp[-3] == d) /* avoid infinite loop */
4078             goto fail;
4079          PUSH_FAILURE_POINT(p + mcnt, d);
4080          stackp[-1] = NON_GREEDY;
4081          continue;
4082  
4083        case finalize_push_n:
4084          EXTRACT_NUMBER(mcnt, p + 2); 
4085          /* Originally, this is how many times we CAN jump.  */
4086          if (mcnt) {
4087            int pos, i;
4088  
4089            mcnt--;
4090            STORE_NUMBER(p + 2, mcnt);
4091            EXTRACT_NUMBER(pos, p);
4092            EXTRACT_NUMBER(i, p+pos+5);
4093            if (i > 0) goto nofinalize;
4094            POP_FAILURE_POINT();
4095            EXTRACT_NUMBER_AND_INCR(mcnt, p);
4096            PUSH_FAILURE_POINT(p + mcnt, d);
4097            stackp[-1] = NON_GREEDY;
4098            p += 2;               /* skip n */
4099          }
4100          /* If don't have to push any more, skip over the rest of command.  */
4101          else 
4102            p += 4;   
4103          continue;
4104  
4105          /* Ignore these.  Used to ignore the n of succeed_n's which
4106             currently have n == 0.  */
4107        case unused:
4108          continue;
4109  
4110        case casefold_on:
4111          options |= RE_OPTION_IGNORECASE;
4112          continue;
4113  
4114        case casefold_off:
4115          options &= ~RE_OPTION_IGNORECASE;
4116          continue;
4117  
4118        case option_set:
4119          options = *p++;
4120          continue;
4121  
4122        case wordbound:
4123          if (AT_STRINGS_BEG(d)) {
4124            if (IS_A_LETTER(d)) break;
4125            else goto fail;
4126          }
4127          if (AT_STRINGS_END(d)) {
4128            if (PREV_IS_A_LETTER(d)) break;
4129            else goto fail;
4130          }
4131          if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
4132            break;
4133          goto fail;
4134  
4135        case notwordbound:
4136          if (AT_STRINGS_BEG(d)) {
4137            if (IS_A_LETTER(d)) goto fail;
4138            else break;
4139          }
4140          if (AT_STRINGS_END(d)) {
4141            if (PREV_IS_A_LETTER(d)) goto fail;
4142            else break;
4143          }
4144          if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
4145            goto fail;
4146          break;
4147  
4148        case wordbeg:
4149          if (IS_A_LETTER(d) && (AT_STRINGS_BEG(d) || !PREV_IS_A_LETTER(d)))
4150            break;
4151          goto fail;
4152  
4153        case wordend:
4154          if (!AT_STRINGS_BEG(d) && PREV_IS_A_LETTER(d)
4155              && (!IS_A_LETTER(d) || AT_STRINGS_END(d)))
4156            break;
4157          goto fail;
4158  
4159        case wordchar:
4160          PREFETCH;
4161          if (!IS_A_LETTER(d))
4162            goto fail;
4163          if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
4164            d += mbclen(*d) - 1;
4165          d++;
4166          SET_REGS_MATCHED;
4167          break;
4168  
4169        case notwordchar:
4170          PREFETCH;
4171          if (IS_A_LETTER(d))
4172            goto fail;
4173          if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
4174            d += mbclen(*d) - 1;
4175          d++;
4176          SET_REGS_MATCHED;
4177          break;
4178  
4179        case exactn:
4180          /* Match the next few pattern characters exactly.
4181             mcnt is how many characters to match.  */
4182          mcnt = *p++;
4183          /* This is written out as an if-else so we don't waste time
4184             testing `translate' inside the loop.  */
4185          if (TRANSLATE_P()) {
4186            do {
4187              unsigned char c;
4188  
4189              PREFETCH;
4190              if (*p == 0xff) {
4191                p++;  
4192                if (!--mcnt
4193                    || AT_STRINGS_END(d)
4194                    || (unsigned char)*d++ != (unsigned char)*p++)
4195                  goto fail;
4196                continue;
4197              }
4198              c = *d++;
4199              if (ismbchar(c)) {
4200                int n;
4201  
4202                if (c != (unsigned char)*p++)
4203                  goto fail;
4204                for (n = mbclen(c) - 1; n > 0; n--)
4205                  if (!--mcnt     /* redundant check if pattern was
4206                                     compiled properly. */
4207                      || AT_STRINGS_END(d)
4208                      || (unsigned char)*d++ != (unsigned char)*p++)
4209                    goto fail;
4210                continue;
4211              }
4212              /* compiled code translation needed for ruby */
4213              if ((unsigned char)translate[c] != (unsigned char)translate[*p++])
4214                goto fail;
4215            }
4216            while (--mcnt);
4217          }
4218          else {
4219            do {
4220              PREFETCH;
4221              if (*p == 0xff) {p++; mcnt--;}
4222              if (*d++ != *p++) goto fail;
4223            }
4224            while (--mcnt);
4225          }
4226          SET_REGS_MATCHED;
4227          break;
4228        }
4229  #ifdef RUBY
4230      CHECK_INTS;
4231  #endif
4232      continue;  /* Successfully executed one pattern command; keep going.  */
4233  
4234      /* Jump here if any matching operation fails. */
4235    fail:
4236      if (stackp != stackb) {
4237        /* A restart point is known.  Restart there and pop it. */
4238        short last_used_reg, this_reg;
4239  
4240        /* If this failure point is from a dummy_failure_point, just
4241           skip it.  */
4242        if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
4243          POP_FAILURE_POINT();
4244          goto fail;
4245        }
4246        stackp--;         /* discard greedy flag */
4247        options = (long)*--stackp;
4248        d = *--stackp;
4249        p = *--stackp;
4250        /* Restore register info.  */
4251        last_used_reg = (long)*--stackp;
4252  
4253        /* Make the ones that weren't saved -1 or 0 again. */
4254        for (this_reg = num_regs - 1; this_reg > last_used_reg; this_reg--) {
4255          regend[this_reg] = REG_UNSET_VALUE;
4256          regstart[this_reg] = REG_UNSET_VALUE;
4257          IS_ACTIVE(reg_info[this_reg]) = 0;
4258          MATCHED_SOMETHING(reg_info[this_reg]) = 0;
4259        }
4260  
4261        /* And restore the rest from the stack.  */
4262        for ( ; this_reg > 0; this_reg--) {
4263          reg_info[this_reg].word = *--stackp;
4264          regend[this_reg] = *--stackp;
4265          regstart[this_reg] = *--stackp;
4266        }
4267        mcnt = (long)*--stackp;
4268        while (mcnt--) {
4269          POP_FAILURE_COUNT();
4270        }
4271        if (p < pend) {
4272          int is_a_jump_n = 0;
4273          int failed_paren = 0;
4274  
4275          p1 = p;
4276          /* If failed to a backwards jump that's part of a repetition
4277             loop, need to pop this failure point and use the next one.  */
4278          switch ((enum regexpcode)*p1) {
4279          case jump_n:
4280          case finalize_push_n:
4281            is_a_jump_n = 1;
4282          case maybe_finalize_jump:
4283          case finalize_jump:
4284          case finalize_push:
4285          case jump:
4286            p1++;
4287            EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4288  
4289            if (mcnt >= 0) break; /* should be backward jump */
4290            p1 += mcnt;
4291  
4292            if (( is_a_jump_n && (enum regexpcode)*p1 == succeed_n) ||
4293                (!is_a_jump_n && (enum regexpcode)*p1 == on_failure_jump)) {
4294              if (failed_paren) {
4295                p1++;
4296                EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4297                PUSH_FAILURE_POINT(p1 + mcnt, d);
4298              }
4299              goto fail;
4300            }
4301            break;
4302          default:
4303            /* do nothing */;
4304          }
4305        }
4306      }
4307      else
4308        break;   /* Matching at this starting point really fails.  */
4309    }
4310  
4311    if (best_regs_set)
4312      goto restore_best_regs;
4313  
4314    FREE_AND_RETURN(stackb,(-1));         /* Failure to match.  */
4315  }
4316  
4317  
4318  static int
4319  memcmp_translate(s1, s2, len)
4320       unsigned char *s1, *s2;
4321       register int len;
4322  {
4323    register unsigned char *p1 = s1, *p2 = s2, c;
4324    while (len) {
4325      c = *p1++;
4326      if (ismbchar(c)) {
4327        int n;
4328  
4329        if (c != *p2++) return 1;
4330        for (n = mbclen(c) - 1; n > 0; n--)
4331          if (!--len || *p1++ != *p2++)
4332            return 1;
4333      }
4334      else
4335        if (translate[c] != translate[*p2++])
4336          return 1;
4337      len--;
4338    }
4339    return 0;
4340  }
4341  
4342  void
4343  re_copy_registers(regs1, regs2)
4344       struct re_registers *regs1, *regs2;
4345  {
4346    int i;
4347  
4348    if (regs1 == regs2) return;
4349    if (regs1->allocated == 0) {
4350      regs1->beg = TMALLOC(regs2->num_regs, int);
4351      regs1->end = TMALLOC(regs2->num_regs, int);
4352      regs1->allocated = regs2->num_regs;
4353    }
4354    else if (regs1->allocated < regs2->num_regs) {
4355      TREALLOC(regs1->beg, regs2->num_regs, int);
4356      TREALLOC(regs1->end, regs2->num_regs, int);
4357      regs1->allocated = regs2->num_regs;
4358    }
4359    for (i=0; i<regs2->num_regs; i++) {
4360      regs1->beg[i] = regs2->beg[i];
4361      regs1->end[i] = regs2->end[i];
4362    }
4363    regs1->num_regs = regs2->num_regs;
4364  }
4365  
4366  void
4367  re_free_registers(regs)
4368       struct re_registers *regs;
4369  {
4370    if (regs->allocated == 0) return;
4371    if (regs->beg) xfree(regs->beg);
4372    if (regs->end) xfree(regs->end);
4373  }
4374  
4375  /* Functions for multi-byte support.
4376     Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
4377     Last change: Jul. 9, 1993 by t^2  */
4378  static const unsigned char mbctab_ascii[] = {
4379    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4380    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4381    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4382    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4383    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4384    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4385    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4386    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4387    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4388    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4389    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4390    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4391    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4392    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4393    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4394    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4395  };
4396  
4397  static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
4398    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4399    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4400    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4401    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4402    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4403    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4404    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4405    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4406    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
4407    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4408    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4409    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4410    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4411    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4412    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4413    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
4414  };
4415  
4416  static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFC */
4417    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4418    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4419    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4420    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4421    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4422    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4423    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4424    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4425    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4426    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4427    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4428    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4429    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4430    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4431    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4432    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
4433  };
4434  
4435  static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
4436    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4437    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4438    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4439    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4440    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4441    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4442    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4443    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
4444    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4445    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4446    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4447    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4448    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4449    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4450    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4451    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
4452  };
4453  
4454  static const unsigned char mbctab_utf8[] = {
4455    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4456    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4457    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4458    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4459    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4460    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4461    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4462    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4463    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4464    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4465    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4466    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4467    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4468    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4469    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4470    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
4471  };
4472  
4473  const unsigned char *re_mbctab = mbctab_ascii;
4474  
4475  void
4476  re_mbcinit(mbctype)
4477       int mbctype;
4478  {
4479    switch (mbctype) {
4480    case MBCTYPE_ASCII:
4481      re_mbctab = mbctab_ascii;
4482      current_mbctype = MBCTYPE_ASCII;
4483      break;
4484    case MBCTYPE_EUC:
4485      re_mbctab = mbctab_euc;
4486      current_mbctype = MBCTYPE_EUC;
4487      break;
4488    case MBCTYPE_SJIS:
4489      re_mbctab = mbctab_sjis;
4490      current_mbctype = MBCTYPE_SJIS;
4491      break;
4492    case MBCTYPE_UTF8:
4493      re_mbctab = mbctab_utf8;
4494      current_mbctype = MBCTYPE_UTF8;
4495      break;
4496    }
4497  }
4498  
4499  #define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
4500  #define mbc_len(t, c)     ((t)[(unsigned char)(c)]+1)
4501  
4502  static unsigned int
4503  asc_startpos(string, pos)
4504       const char *string;
4505       unsigned int pos;
4506  {
4507    return pos;
4508  }
4509  
4510  #define euc_islead(c)  ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
4511  #define euc_mbclen(c)  mbc_len(mbctab_euc, (c))
4512  static unsigned int
4513  euc_startpos(string, pos)
4514       const char *string;
4515       unsigned int pos;
4516  {
4517    unsigned int i = pos, w;
4518  
4519    while (i > 0 && !euc_islead(string[i])) {
4520      --i;
4521    }
4522    if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
4523      return i;
4524    }
4525    i += w;
4526    return i + ((pos - i) & ~1);
4527  }
4528  
4529  #define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
4530  #define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
4531  #define sjis_mbclen(c)  mbc_len(mbctab_sjis, (c))
4532  static unsigned int
4533  sjis_startpos(string, pos)
4534       const char *string;
4535       unsigned int pos;
4536  {
4537    unsigned int i = pos, w;
4538  
4539    if (i > 0 && sjis_istrail(string[i])) {
4540      do {
4541        if (!sjis_isfirst(string[--i])) {
4542          ++i;
4543          break;
4544        }
4545      } while (i > 0);
4546    }
4547    if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
4548      return i;
4549    }
4550    i += w;
4551    return i + ((pos - i) & ~1);
4552  }
4553  
4554  #define utf8_islead(c)  ((unsigned char)((c) & 0xc0) != 0x80)
4555  #define utf8_mbclen(c)  mbc_len(mbctab_utf8, (c))
4556  static unsigned int
4557  utf8_startpos(string, pos)
4558       const char *string;
4559       unsigned int pos;
4560  {
4561    unsigned int i = pos, w;
4562  
4563    while (i > 0 && !utf8_islead(string[i])) {
4564      --i;
4565    }
4566    if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
4567      return i;
4568    }
4569    return i + w;
4570  }
4571  
4572  /*
4573    vi: sw=2 ts=8
4574    Local variables:
4575    mode           : C
4576    c-file-style   : "gnu"
4577    tab-width      : 8
4578    End            :
4579  */