#ifdef SCCID
static char *sccsid = "@(#)regexp.c 	2.1 7/29/92";
static char *copyright = "Copyright (c) 1986 by University of Toronto.";
#endif

/***

* program name:
	wvi
* function:
	PD version of UNIX "vi" editor for WIN32, with extensions.
* module name:
	regexp.c
* module function:
	Regular expression routines.
* history:
	Regular expression routines by Henry Spencer.
	Modfied for use with STEVIE (ST Editor for VI Enthusiasts,
	Version 3.10) by Tony Andrews.
	Adapted for use with Xvi by Chris & John Downey.
	Original copyright notice appears below.
	Please note that this is a modified version.
	modified for WIN32 / UNICODE / C++ by K.Yoshizawa
		(PAF02413.niftyserve.or.jp)
***/

// regcomp and regexec -- regsub and regerror are elsewhere
//
//		Copyright (c) 1986 by University of Toronto.
//		Written by Henry Spencer.  Not derived from licensed software.
//
//		Permission is granted to anyone to use this software for any
//		purpose on any computer system, and to redistribute it freely,
//		subject to the following restrictions:
//
//		1. The author is not responsible for the consequences of use of
//		this software, no matter how awful, even if they arise
//		from defects in it.
//
//		2. The origin of this software must not be misrepresented, either
//		by explicit claim or by omission.
//
//		3. Altered versions must be plainly marked as such, and must not
//		be misrepresented as being the original software.
//
// Beware that some of this code is subtly aware of the way operator
// precedence is structured in regular expressions.  Serious changes in
// regular-expression syntax might require a total rethink.
//
// $Log:		regexp.c,v $
// Revision 1.2  88/04/28  08:09:45  tony
// First modification of the regexp library. Added an external variable
// 'reg_ic' which can be set to indicate that case should be ignored.
// Added a new parameter to regexec() to indicate that the given string
// comes from the beginning of a line and is thus eligible to match
// 'beginning-of-line'.
//
// xvi, version 1.7:
//
// Pb(P_ignorecase) replaces reg_ic.
//
// BWORD (beginning of word) & EWORD (end of word) implemented.
//
// Some strings passed to regerror() are altered slightly, for
// consistency with other error messages in xvi.

#include "xvi.h"
#include "regexp.h"
#include "regmagic.h"

// Exported functions.
int 	cwcsncmp(WCHAR *s1, WCHAR *s2, int n);
WCHAR	*cwcschr(WCHAR *s, int c);

// The "internal use only" fields in regexp.h are present to pass info from
// compile to execute that permits the execute phase to run lots faster on
// simple cases.  They are:
//
// regstart 	char that must begin a match; L'\0' if none obvious
// reganch		is the match anchored (at beginning-of-line only)?
// regmust		string (pointer into program) that match must include, or NULL
// regmlen		length of regmust string
//
// Regstart and reganch permit very fast decisions on suitable starting points
// for a match, cutting down the work a lot.  Regmust permits fast rejection
// of lines that cannot possibly match.  The regmust tests are costly enough
// that regcomp() supplies a regmust only if the r.e. contains something
// potentially expensive (at present, the only such thing detected is * or +
// at the start of the r.e., which can involve a lot of backup).  Regmlen is
// supplied because the test in regexec() needs it and regcomp() is computing
// it anyway.

// Structure for regexp "program".	This is essentially a linear encoding
// of a nondeterministic finite-state machine (aka syntax charts or
// "railroad normal form" in parsing technology).  Each node is an opcode
// plus a "next" pointer, possibly plus an operand.  "Next" pointers of
// all nodes except BRANCH implement concatenation; a "next" pointer with
// a BRANCH on both ends of it is connecting two alternatives.	(Here we
// have one of the subtle syntax dependencies:	an individual BRANCH (as
// opposed to a collection of them) is never concatenated with anything
// because of operator precedence.)  The operand of some types of node is
// a literal string; for others, it is a node leading into a sub-FSM.  In
// particular, the operand of a BRANCH node is the first node of the branch.
// (NB this is *not* a tree structure:	the tail of the branch connects
// to the thing following the set of BRANCHes.)  The opcodes are:

// definition	number	opnd?	meaning 
#define END 	0		// no	End of program. 
#define BOL 	1		// no	Match L"" at beginning of line. 
#define EOL 	2		// no	Match L"" at end of line. 
#define ANY 	3		// no	Match any one character. 
#define ANYOF	4		// str	Match any character in this string. 
#define ANYBUT	5		// str	Match any character not in this string. 
#define BRANCH	6		// node Match this alternative, or the next... 
#define BACK	7		// no	Match L"", "next" ptr points backward. 
#define EXACTLY 8		// str	Match this string. 
#define NOTHING 9		// no	Match empty string. 
#define STAR	10		// node Match this (simple) thing 0 or more times. 
#define PLUS	11		// node Match this (simple) thing 1 or more times. 
#define OPEN	20		// no	Mark this point in input as start of #n. 
						//	OPEN+1 is number 1, etc. 
#define CLOSE	30		// no	Analogous to OPEN. 
#define BWORD	64		// no	Beginning of word. 
#define EWORD	65		// no	End of word. 

// Opcode notes:
//
// BRANCH		The set of branches constituting a single choice are hooked
//				together with their "next" pointers, since precedence prevents
//				anything being concatenated to any individual branch.  The
//				"next" pointer of the last BRANCH in a choice points to the
//				thing following the whole choice.  This is also where the
//				final "next" pointer of each individual branch points; each
//				branch starts with the operand node of a BRANCH node.
//
// BACK 		Normal "next" pointers all implicitly point forward; BACK
//				exists to make loop structures possible.
//
// STAR,PLUS	L'?', and complex L'*' and L'+', are implemented as circular
//				BRANCH structures using BACK.  Simple cases (one character
//				per match) are implemented with STAR and PLUS for speed
//				and to minimize recursive plunges.
//
// OPEN,CLOSE	...are numbered at compile time.

// A node is one char of opcode followed by two chars of "next" pointer.
// "Next" pointers are stored as two 8-bit pieces, high order first.  The
// value is a positive offset from the opcode of the node containing it.
// An operand, if any, simply follows the node.  (Note that much of the
// code generation knows about this implicit relationship.)
//
// Using two bytes for the "next" pointer is vast overkill for most things,
// but allows patterns to get big without disasters.
#define OP(p)			(*(p))
#define NEXT(p) 		(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
#define OPERAND(p)		((p) + 3)

// See regmagic.h for one further detail of program structure.


// Utility definitions.
#ifndef CHARBITS
#define UCHARAT(p)		((int)*(WCHAR *)(p))
#else
#define UCHARAT(p)		((int)*(p)&CHARBITS)
#endif

#define FAIL(m) 		{ regerror(m); return(NULL); }
#define ISMULT(c)		((c) == L'*' || (c) == L'+' || (c) == L'?')
#define META			L"^$.[()|?+*\\"

// Flags to be passed up and down.
#define HASWIDTH		01		// Known never to match null string. 
#define SIMPLE			02		// Simple enough to be STAR/PLUS operand. 
#define SPSTART 		04		// Starts with * or +. 
#define WORST			0		// Worst case. 

// mkup - convert to upper case IF we're doing caseless compares
#define mkup(c) 		((Pb(P_ignorecase) && iswlower(c)) ? \
												towupper(c) : (c))

// Global work variables for regcomp().
static WCHAR *regparse;			// Input-scan pointer. 
static int regnpar; 			// () count. 
static WCHAR regdummy;
static WCHAR *regcode;			// Code-emit pointer; &regdummy = don't. 
static long regsize;			// Code size. 

// Forward declarations for regcomp()'s friends.
#ifndef STATIC
#		define	STATIC	static
#endif

STATIC	WCHAR	*reg(int paren, int *flagp);
STATIC	WCHAR	*regbranch(int *flagp);
STATIC	WCHAR	*regpiece(int *flagp);
STATIC	WCHAR	*regatom(int *flagp);
STATIC	WCHAR	*regnode(int op);
STATIC	WCHAR	*regnext(WCHAR *p);
STATIC	void	regc(int b);
STATIC	void	reginsert(int op, WCHAR *opnd);
STATIC	void	regtail(WCHAR *p, WCHAR *val);
STATIC	void	regoptail(WCHAR *p, WCHAR *val);

#ifdef STRCSPN
	static		int 	wcscspn(WCHAR *s1, WCHAR *s2);
#endif

// - regcomp - compile a regular expression into internal code
//
// We can't allocate space until we know how big the compiled form will be,
// but we can't compile it (and thus know how big it is) until we've got a
// place to put the code.  So we cheat:  we compile it twice, once with code
// generation turned off and size counting turned on, and once "for real".
// This also means that we don't allocate space until we are sure that the
// thing really will compile successfully, and we never have to move the
// code and thus invalidate pointers into it.  (Note that it has to be in
// one piece because free() must be able to free it all.)
//
// Beware that the optimization-preparation code in here knows about some
// of the structure of the compiled regexp.
regexp *regcomp(WCHAR *exp)
{
	regexp *r;
	WCHAR *scan;
	WCHAR *longest;
	int len;
	int flags;

	if (exp == NULL)
		FAIL(L"NULL argument");

	// First pass: determine size, legality. 
	regparse = exp;
	regnpar = 1;
	regsize = 0L;
	regcode = &regdummy;
	regc(MAGIC);
	if (reg(0, &flags) == NULL)
		return(NULL);

	// Small enough for pointer-storage convention? 
	if (regsize >= 32767L)				// Probably could be 65535L. 
		FAIL(L"Regular expression too big");

	// Allocate space. 
	r = (regexp *) alloc_byte(sizeof(regexp) + (unsigned)regsize * sizeof(WCHAR));
	if (r == NULL)
		return NULL;

	// Second pass: emit code. 
	regparse = exp;
	regnpar = 1;
	regcode = r->program;
	regc(MAGIC);
	if (reg(0, &flags) == NULL)
		return(NULL);

	// Dig out information for optimizations. 
	r->regstart = L'\0'; // Worst-case defaults. 
	r->reganch = 0;
	r->regmust = NULL;
	r->regmlen = 0;
	scan = r->program+1;						// First BRANCH. 
	if (OP(regnext(scan)) == END) { 			// Only one top-level choice. 
		scan = OPERAND(scan);

		// Starting-point info. 
		if (OP(scan) == EXACTLY)
			r->regstart = *OPERAND(scan);
		else if (OP(scan) == BOL)
			r->reganch++;

		// If there's something expensive in the r.e., find the
		// longest literal string that must appear and make it the
		// regmust.  Resolve ties in favor of later strings, since
		// the regstart check works with the beginning of the r.e.
		// and avoiding duplication strengthens checking.  Not a
		// strong reason, but sufficient in the absence of others.
		if (flags&SPSTART) {
			longest = NULL;
			len = 0;
			for (; scan != NULL; scan = regnext(scan))
				if (OP(scan) == EXACTLY && wcslen(OPERAND(scan)) >= len) {
					longest = OPERAND(scan);
					len = wcslen(OPERAND(scan));
				}
			r->regmust = longest;
			r->regmlen = len;
		}
	}

	return(r);
}

// - reg - regular expression, i.e. main body or parenthesized thing
//
// Caller must absorb opening parenthesis.
//
// Combining parenthesis handling with the base level of regular expression
// is a trifle forced, but the need to tie the tails of the branches to what
// follows makes it hard to avoid.
static WCHAR *reg(int paren, int *flagp)
// int paren;					Parenthesized? 
{
	WCHAR *ret;
	WCHAR *br;
	WCHAR *ender;
	int parno;
	int flags;

	*flagp = HASWIDTH;	// Tentatively. 

	// Make an OPEN node, if parenthesized. 
	if (paren) {
		if (regnpar >= NSUBEXP)
			FAIL(L"Too many ()");
		parno = regnpar;
		regnpar++;
		ret = regnode(OPEN+parno);
	} else
		ret = NULL;

	// Pick up the branches, linking them together. 
	br = regbranch(&flags);
	if (br == NULL)
		return(NULL);
	if (ret != NULL)
		regtail(ret, br);		// OPEN -> first. 
	else
		ret = br;
	if (!(flags&HASWIDTH))
		*flagp &= ~HASWIDTH;
	*flagp |= flags&SPSTART;
	while (*regparse == L'|') {
		regparse++;
		br = regbranch(&flags);
		if (br == NULL)
			return(NULL);
		regtail(ret, br);		// BRANCH -> BRANCH. 
		if (!(flags&HASWIDTH))
			*flagp &= ~HASWIDTH;
		*flagp |= flags&SPSTART;
	}

	// Make a closing node, and hook it on the end. 
	ender = regnode((paren) ? CLOSE+parno : END);
	regtail(ret, ender);

	// Hook the tails of the branches to the closing node. 
	for (br = ret; br != NULL; br = regnext(br))
		regoptail(br, ender);

	// Check for proper termination. 
	if (paren && *regparse++ != L')') {
		FAIL(L"Unmatched ()");
	} else if (!paren && *regparse != L'\0') {
		if (*regparse == L')') {
			FAIL(L"Unmatched ()");
		} else
			FAIL(L"Junk on end");		// "Can't happen". 
		// NOTREACHED 
	}

	return(ret);
}

// - regbranch - one alternative of an | operator
//
// Implements the concatenation operator.
static WCHAR *regbranch(int *flagp)
{
	WCHAR *ret;
	WCHAR *chain;
	WCHAR *latest;
	int flags;

	*flagp = WORST; 			// Tentatively. 

	ret = regnode(BRANCH);
	chain = NULL;
	while (*regparse != L'\0' && *regparse != L'|' && *regparse != L')') {
		latest = regpiece(&flags);
		if (latest == NULL)
			return(NULL);
		*flagp |= flags&HASWIDTH;
		if (chain == NULL)		// First piece. 
			*flagp |= flags&SPSTART;
		else
			regtail(chain, latest);
		chain = latest;
	}
	if (chain == NULL)	// Loop ran zero times. 
		(void) regnode(NOTHING);

	return(ret);
}

// - regpiece - something followed by possible [*+?]
//
// Note that the branching code sequences used for ? and the general cases
// of * and + are somewhat optimized:  they use the same NOTHING node as
// both the endmarker for their branch list and the body of the last branch.
// It might seem that this node could be dispensed with entirely, but the
// endmarker role is not redundant.
static WCHAR *regpiece(int *flagp)
{
	WCHAR *ret;
	WCHAR op;
	WCHAR *next;
	int flags;

	ret = regatom(&flags);
	if (ret == NULL)
		return(NULL);

	op = *regparse;
	if (!ISMULT(op)) {
		*flagp = flags;
		return(ret);
	}

	if (!(flags&HASWIDTH) && op != L'?')
		FAIL(L"*+ operand could be empty");
	*flagp = (op != L'+') ? (WORST|SPSTART) : (WORST|HASWIDTH);

	if (op == L'*' && (flags&SIMPLE))
		reginsert(STAR, ret);
	else if (op == L'*') {
		// Emit x* as (x&|), where & means "self". 
		reginsert(BRANCH, ret); 				// Either x 
		regoptail(ret, regnode(BACK));			// and loop 
		regoptail(ret, ret);					// back 
		regtail(ret, regnode(BRANCH));			// or 
		regtail(ret, regnode(NOTHING)); 		// null. 
	} else if (op == L'+' && (flags&SIMPLE))
		reginsert(PLUS, ret);
	else if (op == L'+') {
		// Emit x+ as x(&|), where & means "self". 
		next = regnode(BRANCH); 				// Either 
		regtail(ret, next);
		regtail(regnode(BACK), ret);			// loop back 
		regtail(next, regnode(BRANCH)); 		// or 
		regtail(ret, regnode(NOTHING)); 		// null. 
	} else if (op == L'?') {
		// Emit x? as (x|) 
		reginsert(BRANCH, ret); 				// Either x 
		regtail(ret, regnode(BRANCH));			// or 
		next = regnode(NOTHING);				// null. 
		regtail(ret, next);
		regoptail(ret, next);
	}
	regparse++;
	if (ISMULT(*regparse))
		FAIL(L"Nested *?+");

	return(ret);
}

// This is called by regatom() for characters with no special meaning.
static WCHAR *regdefault(int *flagp)
{
	int len;
	WCHAR ender;
	WCHAR *ret;

	len = wcscspn(regparse, META);
	if (len <= 0)
		FAIL(L"Internal disaster");
	ender = regparse[len];
	if (len > 1 && ISMULT(ender))
		len--;			// Back off clear of ?+* operand. 
	*flagp |= HASWIDTH;
	if (len == 1)
		*flagp |= SIMPLE;
	ret = regnode(EXACTLY);
	while (len > 0) {
		regc(*regparse++);
		len--;
	}
	regc(L'\0');
	return ret;
}

// - regatom - the lowest level
//
// Optimization:  gobbles an entire sequence of ordinary characters so that
// it can turn them into a single node, which is smaller to store and
// faster to run.  Backslashed characters are exceptions, each becoming a
// separate node; the code is simpler that way and it's not worth fixing.
static WCHAR *regatom(int *flagp)
{
	WCHAR *ret;
	int flags;

	*flagp = WORST; 			// Tentatively. 

#if 0
	if (Pn(P_regextype) == rt_TAGS)
	{
		switch (*regparse)
		{
			case L'^':
			case L'$':
				break;
			case L'\\':
				switch (*++regparse)
				{
					case L'^':
					case L'$':
						ret = regnode(EXACTLY);
						regc(*regparse);
						regc(L'\0');
						*flagp |= HASWIDTH|SIMPLE;
						regparse++;
						return ret;
				}
				break;
			default:
				return regdefault(flagp);
		}
	}
#endif
	switch (*regparse++) {
	case L'^':
		ret = regnode(BOL);
		break;
	case L'$':
		ret = regnode(EOL);
		break;
	case L'.':
		ret = regnode(ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case L'[':
	{
		int class1;
		int classend;

		if (*regparse == L'^') { // Complement of range. 
			ret = regnode(ANYBUT);
			regparse++;
		} else
			ret = regnode(ANYOF);
		if (*regparse == L']' || *regparse == L'-')
			regc(*regparse++);
		while (*regparse != L'\0' && *regparse != L']') {
			if (*regparse == L'-') {
				regparse++;
				if (*regparse == L']' || *regparse == L'\0')
					regc(L'-');
				else {
					class1 = UCHARAT(regparse-2)+1;
					classend = UCHARAT(regparse);
					if (class1 > classend+1)
						FAIL(L"Invalid [] range");
					for (; class1 <= classend; class1++)
						regc(class1);
					regparse++;
				}
			} else
				regc(*regparse++);
		}
		regc(L'\0');
		if (*regparse != L']')
			FAIL(L"Unmatched []");
		regparse++;
		*flagp |= HASWIDTH|SIMPLE;
	}
		break;
	case L'(':
		ret = reg(1, &flags);
		if (ret == NULL)
			return(NULL);
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case L'\0':
	case L'|':
	case L')':
		FAIL(L"Internal urp");	// Supposed to be caught earlier. 
		break;
	case L'?':
	case L'+':
	case L'*':
		FAIL(L"?+* follows nothing");
		break;
	case L'\\':
		switch (*regparse)
		{
			case L'\0':
				FAIL(L"Trailing \\");
			case L'<':
				ret = regnode(BWORD);
				break;
			case L'>':
				ret = regnode(EWORD);
				break;
			default:
				ret = regnode(EXACTLY);
				regc(*regparse);
				regc(L'\0');
				*flagp |= HASWIDTH|SIMPLE;
		}
		regparse++;
		break;
	default:
		regparse--;
		ret = regdefault(flagp);
	}

	return(ret);
}

// - regnode - emit a node
// Location. 
static WCHAR *regnode(int op)
{
	WCHAR *ret;
	WCHAR *ptr;

	ret = regcode;
	if (ret == &regdummy) {
		regsize += 3;
		return(ret);
	}

	ptr = ret;
	*ptr++ = op;
	*ptr++ = L'\0';				// Null "next" pointer. 
	*ptr++ = L'\0';
	regcode = ptr;

	return(ret);
}

// - regc - emit (if appropriate) a byte of code
static void regc(int b)
{
	if (regcode != &regdummy)
		*regcode++ = b;
	else
		regsize++;
}

// - reginsert - insert an operator in front of already-emitted operand
//
// Means relocating the operand.
static void reginsert(int op, WCHAR *opnd)
{
	WCHAR *src;
	WCHAR *dst;
	WCHAR *place;

	if (regcode == &regdummy) {
		regsize += 3;
		return;
	}

	src = regcode;
	regcode += 3;
	dst = regcode;
	while (src > opnd)
		*--dst = *--src;

	place = opnd;				// Op node, where operand used to be. 
	*place++ = op;
	*place++ = L'\0';
	*place++ = L'\0';
}

// - regtail - set the next-pointer at the end of a node chain
static void regtail(WCHAR *p, WCHAR *val)
{
	WCHAR *scan;
	WCHAR *temp;
	int offset;

	if (p == &regdummy)
		return;

	// Find last node. 
	scan = p;
	for (;;) {
		temp = regnext(scan);
		if (temp == NULL)
			break;
		scan = temp;
	}

	if (OP(scan) == BACK)
		offset = scan - val;
	else
		offset = val - scan;
	*(scan+1) = (offset>>8)&0377;
	*(scan+2) = offset&0377;
}

// - regoptail - regtail on operand of first argument; nop if operandless
static void regoptail(WCHAR *p, WCHAR *val)
{
	// "Operandless" and "op != BRANCH" are synonymous in practice. 
	if (p == NULL || p == &regdummy || OP(p) != BRANCH)
		return;
	regtail(OPERAND(p), val);
}

// regexec and friends

// Global work variables for regexec().
static WCHAR *reginput;			// String-input pointer. 
static WCHAR *regbol;			// Beginning of input, for ^ check. 
static WCHAR **regstartp;		// Pointer to startp array. 
static WCHAR **regendp;			// Ditto for endp. 

// Forwards.
STATIC	int 	regtry(regexp *prog, WCHAR *string);
STATIC	int 	regmatch(WCHAR *prog);
STATIC	int 	regrepeat(WCHAR *p);

#ifdef DEBUG
	int 		regnarrate = 0;
	void		regdump(regexp *r);
	STATIC WCHAR *regprop(WCHAR *op);
#endif

// - regexec - match a regexp against a string
int regexec( regexp *prog,  WCHAR *string, int at_bol)
{
	WCHAR *s;

	// Be paranoid... 
	if (prog == NULL || string == NULL) {
		regerror(L"NULL parameter");
		return(0);
	}

	// Check validity of program. 
	if (UCHARAT(prog->program) != MAGIC) {
		regerror(L"Corrupted program");
		return(0);
	}

	// If there is a "must appear" string, look for it. 
	if (prog->regmust != NULL) {
		s = string;
		while ((s = cwcschr(s, prog->regmust[0])) != NULL) {
			if (cwcsncmp(s, prog->regmust, prog->regmlen) == 0)
				break;	// Found it. 
			s++;
		}
		if (s == NULL)	// Not present. 
			return(0);
	}

	// Mark beginning of line for ^ . 
	if (at_bol)
		regbol = string;		// is possible to match bol 
	else
		regbol = NULL;			// we aren't there, so don't match it 

	// Simplest case:  anchored match need be tried only once. 
	if (prog->reganch)
		return(regtry(prog, string));

	// Messy cases: 	 unanchored match. 
	s = string;
	if (prog->regstart != L'\0')
		// We know what char it must start with. 
		while ((s = cwcschr(s, prog->regstart)) != NULL) {
			if (regtry(prog, s))
				return(1);
			s++;
		}
	else
		// We don't -- general case. 
		do {
			if (regtry(prog, s))
				return(1);
		} while (*s++ != L'\0');

	// Failure. 
	return(0);
}

// - regtry - try match at specific point
// 0 failure, 1 success 
static int regtry(regexp *prog, WCHAR *string)
{
	int i;
	WCHAR **sp;
	WCHAR **ep;

	reginput = string;
	regstartp = prog->startp;
	regendp = prog->endp;

	sp = prog->startp;
	ep = prog->endp;
	for (i = NSUBEXP; i > 0; i--) {
		*sp++ = NULL;
		*ep++ = NULL;
	}
	if (regmatch(prog->program + 1)) {
		prog->startp[0] = string;
		prog->endp[0] = reginput;
		return(1);
	} else
		return(0);
}

// A word is defined, for BWORD & EWORD, as any sequence of
// alphanumeric characters and/or underscores.
#define inword(c)		(iswalnum(c) || (c) == L'_')

// - regmatch - main matching routine
//
// Conceptually the strategy is simple:  check to see whether the current
// node matches, call self recursively to see whether the rest matches,
// and then act accordingly.  In practice we make some effort to avoid
// recursion, in particular by going through "ordinary" nodes (that don't
// need to know whether the rest of the match failed) by a loop instead of
// by recursion.
// return val. = 0 failure, 1 success 
static int regmatch(WCHAR *prog)
{
	WCHAR *scan;		// Current node. 
	WCHAR *next; 		// Next node. 

	scan = prog;
#ifdef DEBUG
	if (scan != NULL && regnarrate)
		ConsoleHprintf(hStdErr, L"%s(\n", regprop(scan));
#endif
	while (scan != NULL) {
#ifdef DEBUG
		if (regnarrate)
			ConsoleHprintf(hStdErr, L"%s...\n", regprop(scan));
#endif
		next = regnext(scan);

		switch (OP(scan)) {
		case BOL:
			if (reginput != regbol)
				return(0);
			break;
		case EOL:
			if (*reginput != L'\0')
				return(0);
			break;
		case BWORD:
		{
			int		c;

			// Test for beginning of word.
			if (
				(c = *reginput) == L'\0'
				||
				!inword(c)
				||
				(
					reginput != regbol
					&&
					inword(reginput[-1])
				)
			)
				return 0;
			break;
		}
		case EWORD:
		{
			int		c;

			// Test for end of word.
			if (
				(
					(c = *reginput) != L'\0'
					&&
					inword(c)
				)
				||
				reginput == regbol
				||
				!inword(reginput[-1])
			)
				return 0;
			break;
		}
		case ANY:
			if (*reginput == L'\0')
				return(0);
			reginput++;
			break;
		case EXACTLY:
		{
			int len;
			WCHAR *opnd;

			opnd = OPERAND(scan);
			// Inline the first character, for speed. 
			if (mkup(*opnd) != mkup(*reginput))
				return(0);
			len = wcslen(opnd);
			if (len > 1
				&& cwcsncmp(opnd, reginput, len) != 0)
				return(0);
			reginput += len;
			break;
		}
		case ANYOF:
			if (*reginput == L'\0'
				|| wcschr(OPERAND(scan), *reginput) == NULL)
				return(0);
			reginput++;
			break;
		case ANYBUT:
			if (*reginput == L'\0'
				|| wcschr(OPERAND(scan), *reginput) != NULL)
				return(0);
			reginput++;
			break;
		case NOTHING:
			break;
		case BACK:
			break;
		case OPEN+1:
		case OPEN+2:
		case OPEN+3:
		case OPEN+4:
		case OPEN+5:
		case OPEN+6:
		case OPEN+7:
		case OPEN+8:
		case OPEN+9:
		{
			int no;
			WCHAR *save;

			no = OP(scan) - OPEN;
			save = reginput;

			if (regmatch(next)) {
				// Don't set startp if some later
				// invocation of the same parentheses
				// already has.
				if (regstartp[no] == NULL)
					regstartp[no] = save;
				return(1);
			} else
				return(0);
			break;
		}
		case CLOSE+1:
		case CLOSE+2:
		case CLOSE+3:
		case CLOSE+4:
		case CLOSE+5:
		case CLOSE+6:
		case CLOSE+7:
		case CLOSE+8:
		case CLOSE+9:
		{
			int no;
			WCHAR *save;

			no = OP(scan) - CLOSE;
			save = reginput;

			if (regmatch(next)) {
				// Don't set endp if some later
				// invocation of the same parentheses
				// already has.
				if (regendp[no] == NULL)
					regendp[no] = save;
				return(1);
			} else
				return(0);
			break;
		}
		case BRANCH:
		{
			WCHAR *save;

			if (OP(next) != BRANCH) 			// No choice. 
				next = OPERAND(scan);
						// Avoid recursion. 
			else {
				do {
					save = reginput;
					if (regmatch(OPERAND(scan)))
						return(1);
					reginput = save;
					scan = regnext(scan);
				} while (scan != NULL
					&& OP(scan) == BRANCH);
				return(0);
				// NOTREACHED 
			}
			break;
		}
		case STAR:
		case PLUS:
		{
			WCHAR nextch;
			int no;
			WCHAR *save;
			int min;

			// Lookahead to avoid useless match attempts
			// when we know what character comes next.
			nextch = L'\0';
			if (OP(next) == EXACTLY)
				nextch = *OPERAND(next);
			min = (OP(scan) == STAR) ? 0 : 1;
			save = reginput;
			no = regrepeat(OPERAND(scan));
			while (no >= min) {
				// If it could work, try it. 
				if (nextch == L'\0' || *reginput == nextch)
					if (regmatch(next))
						return(1);
				// Couldn't or didn't -- back up. 
				no--;
				reginput = save + no;
			}
			return(0);
			break;
		}
		case END:
			return(1);	// Success! 
			break;
		default:
			regerror(L"Memory corruption");
			return(0);
			break;
		}

		scan = next;
	}

	// We get here only if there's trouble -- normally "case END" is
	// the terminating point.
	regerror(L"Corrupted pointers");
	return(0);
}

// - regrepeat - repeatedly match something simple, report how many
static int regrepeat(WCHAR *p)
{
	int count = 0;
	WCHAR *scan;
	WCHAR *opnd;

	scan = reginput;
	opnd = OPERAND(p);
	switch (OP(p)) {
	case ANY:
		count = wcslen(scan);
		scan += count;
		break;
	case EXACTLY:
		while (mkup(*opnd) == mkup(*scan)) {
			count++;
			scan++;
		}
		break;
	case ANYOF:
		while (*scan != L'\0' && wcschr(opnd, *scan) != NULL) {
			count++;
			scan++;
		}
		break;
	case ANYBUT:
		while (*scan != L'\0' && wcschr(opnd, *scan) == NULL) {
			count++;
			scan++;
		}
		break;
	default:			// Oh dear.  Called inappropriately. 
		regerror(L"Internal foulup");
		count = 0;		// Best compromise. 
		break;
	}
	reginput = scan;

	return(count);
}

// - regnext - dig the "next" pointer out of a node
static WCHAR *regnext( WCHAR *p)
{
	int offset;

	if (p == &regdummy)
		return(NULL);

	offset = NEXT(p);
	if (offset == 0)
		return(NULL);

	if (OP(p) == BACK)
		return(p-offset);
	else
		return(p+offset);
}

#ifdef DEBUG

STATIC WCHAR *regprop();

// - regdump - dump a regexp onto stdout in vaguely comprehensible form
void regdump(regexp *r)
{
	WCHAR *s;
	WCHAR op = EXACTLY; // Arbitrary non-END op. 
	WCHAR *next;
	extern WCHAR *wcschr();


	s = r->program + 1;
	while (op != END) { // While that wasn't END last time... 
		op = OP(s);
		ConsolePrintf(L"%2d%s", s-r->program, regprop(s));		// Where, what. 
		next = regnext(s);
		if (next == NULL)				// Next ptr. 
			ConsolePrintf(L"(0)");
		else
			ConsolePrintf(L"(%d)", (s-r->program)+(next-s));
		s += 3;
		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
			// Literal string, where present. 
			while (*s != L'\0') {
				putchar(*s);
				s++;
			}
			s++;
		}
		putchar(L'\n');
	}

	// Header fields of interest. 
	if (r->regstart != L'\0')
		ConsolePrintf(L"start `%c' ", r->regstart);
	if (r->reganch)
		ConsolePrintf(L"anchored ");
	if (r->regmust != NULL)
		ConsolePrintf(L"must have \"%s\"", r->regmust);
	ConsolePrintf(L"\n");
}

// - regprop - printable representation of opcode
static WCHAR *regprop(WCHAR *op)
{
	WCHAR *p;
	static WCHAR buf[50];

	(void) wcscpy(buf, L":");

	switch (OP(op)) {
	case BOL:
		p = L"BOL";
		break;
	case EOL:
		p = L"EOL";
		break;
	case ANY:
		p = L"ANY";
		break;
	case ANYOF:
		p = L"ANYOF";
		break;
	case ANYBUT:
		p = L"ANYBUT";
		break;
	case BRANCH:
		p = L"BRANCH";
		break;
	case EXACTLY:
		p = L"EXACTLY";
		break;
	case NOTHING:
		p = L"NOTHING";
		break;
	case BACK:
		p = L"BACK";
		break;
	case END:
		p = L"END";
		break;
	case OPEN+1:
	case OPEN+2:
	case OPEN+3:
	case OPEN+4:
	case OPEN+5:
	case OPEN+6:
	case OPEN+7:
	case OPEN+8:
	case OPEN+9:
		wsprintf(buf+wcslen(buf), L"OPEN%d", OP(op)-OPEN);
		p = NULL;
		break;
	case CLOSE+1:
	case CLOSE+2:
	case CLOSE+3:
	case CLOSE+4:
	case CLOSE+5:
	case CLOSE+6:
	case CLOSE+7:
	case CLOSE+8:
	case CLOSE+9:
		wsprintf(buf+wcslen(buf), L"CLOSE%d", OP(op)-CLOSE);
		p = NULL;
		break;
	case STAR:
		p = L"STAR";
		break;
	case PLUS:
		p = L"PLUS";
		break;
	default:
		regerror(L"Corrupted opcode");
		break;
	}
	if (p != NULL)
		(void) wcscat(buf, p);
	return(buf);
}
#endif

// The following is provided for those people who do not have wcscspn() in
// their C libraries.  They should get off their butts and do something
// about it; at least one public-domain implementation of those (highly
// useful) string routines has been published on Usenet.
#ifdef STRCSPN
// wcscspn - find length of initial segment of s1 consisting entirely
// of characters not from s2

static int wcscspn(WCHAR *s1, WCHAR *s2)
{
	WCHAR *scan1;
	WCHAR *scan2;
	int count;

	count = 0;
	for (scan1 = s1; *scan1 != L'\0'; scan1++) {
		for (scan2 = s2; *scan2 != L'\0';)		// ++ moved down. 
			if (*scan1 == *scan2++)
				return(count);
		count++;
	}
	return(count);
}
#endif

int cwcsncmp( WCHAR *s1,  WCHAR* s2,  int n)
{
	if (!Pb(P_ignorecase)) {
		return(wcsncmp(s1, s2, n));
	}

	while (
			n > 0
			&&
			*s1 != L'\0'
			&&
			*s2 != L'\0'
			&&
			mkup(*s1) == mkup(*s2)
	) {
		s1++;
		s2++;
		n--;
	}
	if (n == 0) {
		return(0);
	} else {
		return(mkup(*s1) - mkup(*s2));
	}
}

WCHAR *cwcschr(WCHAR *s, int c)
{
	WCHAR		*p;
	int		uc;

	uc = mkup(c);
	for (p = s; *p != L'\0'; p++) {
		if (mkup(*p) == uc)
			return(p);
	}
	return(NULL);
}
