/* Copyright (c) 1988,1989 by Sozobon, Limited.  Author: Johann Ruegg
 *           (c) 1990 - 2009 by H. Robbers.   ANSI upgrade.
 *
 * This file is part of AHCC.
 *
 * AHCC is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * AHCC is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with AHCC; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

short alert_msg(char *t, ... );

/*	tok.c
 *
 *	Basic level token routines
 *
 *	Read lines and splice lines
 *
 *	Then:
 *	at this level, we return the following things:
 *		id's - strings of alpha-alnum
 *		integer constants
 *		real constants
 *		string constants
 *		multi-char tokens
 *
 *	We DONT know about:
 *		keywords
 *		#defined id's
 *		any other meaning of a name
 *
 *	Interface:
 *		call tok_next() to get next token
 *		look at 'curtok' for current token
 *		if EOF is seen, we call end_L0_file() before
 *		  giving up
 *
 *	Special flags:  (tk_flags)
 *		These special flags are needed for the pre-processor.
 *		All but TK_SEENL are 1-shot.
 *
 *		TK_SEENL - want to see \n
 *		TK_SEEWS - want to see white space (for #define).
 *		TK_LTSTR - '<' is a string starter
 *		TK_ONLY1 - skip to token after \n  (for #if--)
 *
 *	Discarding of white space and other irregular tokens
 *		is delayed and performed by advnode() (in EXPR.C)
 *		DELETE token introduced to get things smoother.
 *		Escape sequences in strings are now dealt with AFTER ALL preprocessing
 *			by advnode()
 */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <tos.h>
#include <ctype.h>
#include "param.h"

#include "expr.h"
#include "e2.h"				/* for #if expressions */
#include "c_lex.h"
#include "pre.h"
#include "tok.h"

global XP curtok;

#define ONE_ERROR 0

#if FLOAT
static VP curdbl;
#endif

global ustring Ct;

global
LEX_RECORD * cur_LEX;			/* tokenizing */

#define debugP (G.xflags['p'-'a'])
#define debugB (G.xflags['b'-'a'])
#define debug_b (G.yflags['b'-'a'])

typedef LEX_RECORD *DO_TOKEN(LEX_RECORD *r, LEX_RECORD *n, XP tokcur);

typedef struct catagories
{
	char name[4];
	DO_TOKEN *do_tok;
} CATTBL;

DO_TOKEN
	do_eof,
	do_ide,
	do_dig,
	do_dig,
	do_nl,
	do_op,
	do_op2,
	do_op3,
	do_bad,
	do_ws,
	do_ws,
	do_quo,
	do_apo
	;

CATTBL cattbl[] =
{
	"eof", do_eof,
	"ide", do_ide,
	"oct", nil,
	"dig", do_dig,
	"hex", do_dig,
	"spl", nil,
	" nl", do_nl,
	"com", do_ws,
	" op", do_op,
	"mop", nil,
	"op2", do_op2,
	"op3", do_op3,
	"  X", do_bad,
	"tab", do_ws,
	" ws", do_ws,
	"quo", do_quo,
	"apo", do_apo,
	"cok", nil,
	"last",nil
};

static struct in_out
{
	short in,out;
} trigraphs[] =
{
	{'=','#'},
	{'/',ESCAPE},
	{'\'','^'},
	{'(','['},
	{')',']'},
	{'!','|'},
	{'<','{'},
	{'>','}'},
	{'-','~'},
	{0,0}
};

static struct in_out
escapes[] =
{
	{LETTERALRM,ALARM    },
	{LETTERBS,  BACKSPACE},
	{LETTERNL,  NEWLINE  },
	{LETTERHT,  HORTAB   },
	{LETTERCRT, CARRETURN},
	{LETTERFF,  FORMFEED },
	{LETTERVT,  VERTAB   },
	{0,0}
};

static char ctrans(short c, struct in_out io[])
{
	short i;
	for (i = 0; io[i].in; i++)
		if (c eq io[i].in)
			return io[i].out;
	return c;
}

global char is_escape(short c)
{
	return ctrans(c, escapes);
}

static char is_tri(short c)
{
	return ctrans(c, trigraphs);
}

global char ishexa(short c)
{
	if ( c >= DIGITNAUGHT and c <= DIGITNINE )
		return (c-DIGITNAUGHT);
	c = tolower(c);
	if ( c >= DIGITTEN and c <= DIGITFIFTEEN )
		return (c-DIGITTEN+10);
	return -1;
}

static
long tok_dig(LEX_RECORD *r)
{
	long val = 0;
	char *s = r->text;

	while (*s)
		val = (val*10)+(*s++ - DIGITNAUGHT);

	return val;
}

#if LONGLONG
static
__ll tok_digll(LEX_RECORD *r)
{
	__ll val = {0,0};
	char *s = r->text;

	while (*s)
	{
		ushort c = *s++ - DIGITNAUGHT;
		val = _ullsmul(val, 10, c);		/* c is added to the result */
	}

	return val;
}
#endif

static
long tok_oct(LEX_RECORD *r)
{
	long val = 0;
	char *s = r->text;

	while (*s)
		if (*s <= DIGITSEVEN)
			val = (val<<3)+(*s++ - DIGITNAUGHT);
		else
		{
			error("illegal octal digit in '%s'", r->text);
			return 0;
		}

	return val;
}

#if LONGLONG
static
__ll tok_octll(LEX_RECORD *r)
{
	__ll val = {0,0};
	char *s = r->text;

	while (*s)
		if (*s <= DIGITSEVEN)
		{
			ushort c = (uchar)*s++ - DIGITNAUGHT;
			val = _ullsmul(val, 8, c);		/* c is added to the result */
		othw
			error("illegal octal digit in '%s'", r->text);
			val.hi = 0;
			val.lo = 0;
		}
	
	return val;
}
#endif

static
long tok_hex(LEX_RECORD *r)
{
	long val = 0;
	char *s = r->text;

	s += 2;				/* we know its 0x */
	while (*s)
	{
		char c = ishexa(*s++);
		val = (val<<4)+ c;
	}

	return val;
}

#if LONGLONG
static
__ll tok_hexll(LEX_RECORD *r)
{
	__ll val = {0,0};
	char *s = r->text;

	s += 2;				/* we know its 0x */
	while (*s)
	{
		ushort c = (uchar)ishexa(*s++);
		val = _ullsmul(val, 16, c);		/* c is added to the result */
	}

	return val;
}
#endif

#define is_point(r) ((r)->cat eq op and *(r)->text eq DECIMALP)

static
bool is_digits(char *s)
{
	while(*s)
		if (*s < DIGITNAUGHT or *s > DIGITNINE)
			return false;
		else
			s++;
	return true;
}

#if FLOAT
static
bool is_exp(LEX_RECORD *r)
{
	return 	r->cat eq ide
		and tolower(*r->text) eq ExpoNent
		and is_digits(r->text + 1)
	   ;
}

static
bool is_L_or_F(char c)
{
	c = tolower(c);
	return c eq REALTOK or c eq LONGTOK;
}

static
bool is_LF(LEX_RECORD *r)	/* L, F, LF, FL */
{
	char *s = r->text;
	if (r->cat eq ide)
	{
		/* 03'11 HR: NB! rl includes \0 character */
		if (r->rl-size_LEX eq 2 and is_L_or_F(*s))
			return true;
		if (    r->rl-size_LEX eq 3
		    and is_L_or_F(* s)
		    and is_L_or_F(*(s+1))
		   )
			return true;
	}

	return false;
}

/* tok_real always starts with a dig */
static
LEX_RECORD * tok_real(LEX_RECORD *r, LEX_RECORD *n, char *f)
{
	bool okp = is_point(n),
	     oke = is_exp(n);

	if (!okp and !oke)
		return false;

	strcpy(f, r->text);

	if (okp)
	{
		strcat(f, n->text);
		r = next_LEX(n);
		if (r->cat eq dig)
		{
			strcat(f, r->text);
			LEX_next(r);
		}

		oke = is_exp(r);
	}
	else
		r = n;

	if (oke)
	{
		strcat(f, r->text);
		if (r->rl-size_LEX > 1)
			LEX_next(r);
		else					/* single e allows for - */
		{
			LEX_next(r);
			if (     r->cat eq op
		        and (   *r->text eq MINUSSIGN
		             or *r->text eq PLUSSIGN
		            )
		       )
			{
				strcat(f, r->text);
				LEX_next(r);
			}
			if (r->cat eq dig)
			{
				strcat(f, r->text);
				LEX_next(r);
			}
		}
	}

	if (is_LF(r))
		LEX_next(r);		/* following L or F or both is OK */

	curtok->token = RCON;
	curdbl = allocVn(RLNODE);
	curdbl->rval = atof(f);
	curtok->val.dbl = curdbl;

	return r;
}
#endif

#define ASP 0		/* precedence of assign operators; until now 0 */

global string graphic[ASSIGN 0];
global TOKEN *C_tok  [ASSIGN 0];


/*  if flag=ASOP: add  precedence to token value,
				  add  ASSIGN     to token value,
			  and move ASP        to precedence
*/

#define DEF_LEX 1
global LEX C_lex[] =
{
#include "defs.h"
	{0}
};
#undef DEF_LEX

#define DEF_TAB 1
global TOKEN tok_tab[] =
{
#include "defs.h"
	{0}
};
#undef DEF_TAB

static
LEX *op_lex[256],
	*kw_lex[256];
#if FOR_A
LEX	*ow_lex[256];
#endif

char graphemp[] = "???";

static void mo_init(void)
{
	LEX *pt;
	uint c;

	for (c = 0; c<256;        c++) op_lex[c] = 0;
	for (c = 0; c<(ASSIGN 0); c++) C_tok [c] = 0, graphic[c] = graphemp;

	for (pt = C_lex; pt->text; pt++)
	{
		short tok = pt->value;
		if (!C_tok[tok])
			 C_tok[tok] = &tok_tab[tok-FIRST_OP];
		if (strcmp(graphic[tok], graphemp) eq 0)
			graphic[tok] = pt->text,
			C_tok[tok]->text = pt->text;

		if (*pt->text and !is_alpha(*pt->text))
		{
			c = (uchar)pt->text[0];
			if (!op_lex[c])
				 op_lex[c] = pt;
		}
	}
}

static void kw_init(void)
{
	LEX *pt;
	uint c;

	for (c = 0; c<256; c++) kw_lex[c] = 0
#if FOR_A
						  , ow_lex[c] = 0
#endif
						;
	for (pt = C_lex; pt->text; pt++)
	{
		if (*pt->text and is_alpha(*pt->text))
		{
			c = pt->text[0];
#if ! FOR_A
			if (kw_lex[c] eq 0)
				kw_lex[c] = pt;
#else
			if (tok_tab[pt->value-FIRST_OP].flags&KWD)
			{
				if (kw_lex[c] eq 0)
					kw_lex[c] = pt;
			othw
				if (ow_lex[c] eq 0)
					ow_lex[c] = pt;
			}
#endif
		}
	}
}

static
void node_from_op(XP tp, LEX *kp)
{
	short tok = kp->value;
	TOKEN *ct = C_tok[tok];

	if (ct->flags&ASOP)
	{
		tok += ct->prec;
		tp->cflgs.f.prec = ASP;
		tok = ASSIGN tok;
	othw
		tp->cflgs.f.prec = ct->prec;
	}

	tp->category = ct->category;
	tp->cflgs.f.rlop = ct->flags&RLOP;
	tp->zflgs.i = 0;
	free_name(tp);
	tp->name = kp->text;

#if FOR_A
	if (tok eq IS_DEF_AS)
		if (!G.for_A)
			tok = ASSIGNMT;
#else
	if (tok eq BECOMES)
	{
		warn("'%s' is not C; Assume '%s'", graphic[BECOMES], graphic[ASSIGNMT]);
		tok = ASSIGNMT;
	}
#endif
	tp->token = tok;
}

static
void tok_BAD(short which)
{
	D_(B, "tok_BAD");
	curtok->token = BADTOK;
}

static
void tok_op(LEX_RECORD *r)
{
	uchar c = *r->text;
	LEX *pt = op_lex[c];

	if (pt)
	{
		/* the single char operator is always the last (or the only :-) */
		while (*(pt->text + 1) ne 0) pt++;

		node_from_op(curtok, pt);

#if NODESTATS
		G.operators++;
#endif
		return;
	}

	tok_BAD(0);
}

static
void tok_op3(LEX_RECORD *r)
{
	char *s = r->text, *t;
	uchar c = *s;
	LEX *pt = op_lex[c];

	if (pt)
	{
		/* the triple char operator is always the first and the only :-) */
		if (*(pt->text + 2) ne 0)
		{
			t = pt->text;
			if (    *(s+1) eq *(t+1)
			    and *(s+2) eq *(t+2)
			   )
			{
				node_from_op(curtok, pt);
#if NODESTATS
				G.operators++;
#endif
				return;
			}
		}
	}

	tok_BAD(1);
}

static
void tok_op2(LEX_RECORD *r)
{
	uchar c = *r->text, d;
	LEX *pt = op_lex[c];

	if (pt)
	{
		/* the triple char operator is always the first (or the only :-) */
		if (*(pt->text + 2) ne 0)
			pt++;
		while(   pt->text
		      and *pt->text eq c
		      and (d = *(pt->text + 1)) ne 0
		     )
		{
			if (*(r->text+1) eq d)
			{
				node_from_op(curtok, pt);
#if NODESTATS
				G.operators++;
#endif
				return;
			}
			pt++;
		}
	}

	tok_BAD(2);
}

global void tok_init(void)
{
	static bool in = false;
	if (!in)
	{
		mo_init();
		kw_init();
		in = true;
	}
}

short cmpl(const char *s1, const char *s2, short l);
									/* l at least 1 */

bool is_pre_kw(char *s, short l);

global short is_bold_word(void *bc, char *s)
/* Determine if a word in source is a reserved word.
   Used by the editor for displaying these in bold. */
{
	LEX *kp;
	char *to = s; long l;
#if BIP_ASM
	while (isalnum(*to)) to++;
#else
	while (islower(*to)) to++;
	if (!is_alpha(*to))
#endif
	{
		uint u = *(uchar *)s;
		l = to-s;

		if (l)
		{
			kp = kw_lex[u];

			if (kp)
				while (kp->text)
				{
					short i = cmpl(s, kp->text, l);
					if ( i eq 0)
						return l;
					elif (i < 0)
						break;
					++kp;
				}

	#if FOR_A
			kp = ow_lex[u];

			if (kp)
				while (kp->text)
				{
					short i = cmpl(s, kp->text, l);
					if ( i eq 0)
						return l;
					elif (i < 0)
						break;
					++kp;
				}
	#endif

			if (is_pre_kw(s, l))
				return l;
		}
	}
	return -1;
}

/* It seems like a nice idee to put all keywords in the dictionary,
   but it appeared that it only saved a fraction of a second, even
   on 50Kb+ files with 200Kb+ of header files.
   The strcmp version is also a little bit more failsafe,
   and it knows the end of the search range (i<0).
*/
global
void kw_tok(XP tp)			/* NB after macro expansion */
{
	LEX *kp = kw_lex[*(uchar *)tp->name];

	if (kp)
	{
		while (kp->text)
		{
			short i = strcmp(tp->name, kp->text);
			if (i eq 0)
			{
				TOKEN *tok = C_tok[kp->value];

				tp->token = kp->value;
				tp->category = tok->category;
				tp->cflgs.f.rlop = tok->flags&RLOP;
				if (   (tp->cflgs.f.rlop and !G.use_FPU)
#if BIP_ASM
					or (!G.for_S and (tok->category & ASM) ne 0)
#endif
				   )
				{
					tp->token = ID,		/* then library software or ID */
					tp->cflgs.i = 0;
				}
				free_name((NP)tp);
				tp->name = kp->text;
				return;
			}
			elif (i < 0)
				return;

			++kp;
		}
	}
}

#if FOR_A
global void ow_tok(XP tp)			/* NB before macro expansion */
{
	LEX *kp = ow_lex[*(uchar *)tp->name];

	if (kp)
		while (kp->text)
		{
			if ((tok_tab[kp->value-FIRST_OP].flags&KWD) eq 0)
			{
				short i = strcmp(tp->name, kp->text);
				if (i eq 0)
				{
					node_from_op(tp, kp);
					return;
				}
				elif (i < 0)
					return;
			}
			++kp;
		}
}
#endif

static short tok_EOF(void)
{
	D_(B, "tok_EOF");
	curtok->token = EOFTOK;
	name_to_str(curtok, "end of file");
	return EOFTOK;
}

static short tok_NL(void)
{
	D_(B, "tok_NL");
	curtok->token = NL;
	name_to_str(curtok, "\n");
	G.tk_flags &= TK_KEEP;			/* 03'09 */
	G.tk_flags |= TK_SAWNL;
	return NL;
}

extern char in_name[];

static
bool end_L0_file(short which)
{
	VP ip = G.inctab;
	VpV cur_name;
	char *sp;

	D_D(P, (send_msg("[%d]end_lvl-%d_file\tl%d-t%d\n",which,ip->p.lvl,G.iftab->p.nest,G.iftab->p.truth));)

	if (ip->p.lvl eq 0)
	{
		if (!G.iftab)			/* in optdef() no endof's here */
			return false;
		if (G.iftab->p.nest)
			error("%d %sendif%s missing", G.iftab->p.nest, graphic[PREP], pluralis(G.iftab->p.nest));
		return true;		/* EOF */
	}

	delete_from_cache(ip->name, G.ac_cache_headers);		/* 12'09 HR: for ST */
	
	G.inctab = ip->next;
	ip->next = nil;
	freeVn(ip);

	DIRcpy(&G.includer, G.inctab->name);
	sp = strrslash(G.includer.s);
	if (sp)
		*(sp+1) = 0;

	lineno = G.inctab->vval;
	cur_name();
	cur_LEX = G.inctab->p.cur_LEX;

	G.tk_flags |= TK_SAWNL;    /* 03'09 */
	return false;
}

/* these are called from within the preprocessor */
global void tok_to_nl(bool after)
{
	LEX_RECORD *r = cur_LEX;

	D_(B, "tok_to_nl");

	if (SAW_NL)
		SEEN_NL;
	else
	{
		while (r->cat ne nl)
		{
			if (r->cat eq eof)
				break;
			else
				LEX_next(r);
		}

		if (r->cat eq eof)
			if (end_L0_file(1))
			{
				tok_EOF();
				cur_LEX = r;
				return;
			}
			else
				r = cur_LEX;
		{
			char *o = r->text;
			if ((long)o & 1)
				o++;
			lineno = *(long *)o;
		}

		if (after)
		{
			LEX_next(r);
			G.tk_flags |= TK_SAWNL;    /* 03'09 */
		}
	}

	if (r->cat eq ws)	/* skip spaces after nl */
		LEX_next(r);

	cur_LEX = r;
}

static bool find_hardnl(short which)
{
	LEX_RECORD *r = cur_LEX;

	while (r->cat ne eof)
	{
		if (r->cat eq nl)
			break;

		LEX_next(r);
	}

	if (r->cat eq eof)
		if ( end_L0_file(2) )
			return false;
		else
			r = cur_LEX;

	{
		char *o = r->text;
		if ((long)o & 1)
			o++;
		lineno = *(long *)o;
	}

	cur_LEX = next_LEX(r);

	return true;
}

char * pr_tk(void)
{
	static char b[40];
	short f = G.tk_flags;

	b[0]=0;
	if   (f & TK_SEENL)
		strcat(b, "SEENL|");
	if (f & TK_SEEWS)
		strcat(b, "SEEWS|");
	if (f & TK_SAWNL)
		strcat(b, "SAWNL|");
	if (b[0])
		*(b + strlen(b) -1) = 0;
	return b;
}

bool is_swit(LEX_RECORD *r);

global short tok_prep(void)
{
	LEX_RECORD *r;

	D_(B, "tok_prep");

	if (SAW_NL)
		SEEN_NL;
	elif (!find_hardnl(1))
		return tok_EOF();

	do
	{
		r = cur_LEX;
		while(r->cat eq ws)		/* skip spaces after nl */
			LEX_next(r);

		if (    G.in_S
		    and r->cat eq ide	/* 10'10 HR else endif endc fi */
			and is_swit(r)
		   )
			return PREP;

		if (r->cat eq op and *r->text eq PREPWARN)
		{
			tok_op(r);		/* found a single # */
			cur_LEX = next_LEX(r);
			return curtok->token;
		}

		if (!find_hardnl(2))
			return tok_EOF();
	}
	od		/* until prep  or EOF */
}

#include "tok_do.h"		/* DO_TOKEN functions via cattbl[] */

global short tok_next(void)
{
	D_(B, "tok_next");

#if ONE_ERROR
	if (G.nmerrors)
		return tok_EOF();
#endif

	if (curtok->nt ne STNODE)		/* token unused */
		curtok = allocXn(1);		/* 1: with clear */

	curtok->token = NOTOK;
	curtok->nflags.f.nheap = 1;		 /* special case: no name yet */

	do
	{
		LEX_RECORD *this, *nxt;

		lineno = n_lineno;			/* 03'09 (was out of loop) */

		this = cur_LEX;
		if (G.for_S)
			G.tk_flags |= TK_SEENL; /* tok_NL */
		nxt = next_LEX(this);
		cur_LEX = cattbl[this->cat].do_tok(this, nxt, curtok);
	}
	while tok_is(NOTOK);			/* mostly not requested white space and newline ~(TK_SEEWS|TK_SEENL) */

#if C_DEBUG
	if break_in						/* <RSHIFT+LSHIFT> */
		return tok_EOF();
#else
	{
		static long breaks = 0;
		breaks++;
		if (breaks&0xfffffe00)		/* once every 512 tokens */
		{
			breaks = 0;
			if break_in
				return tok_EOF();
		}
	}
#endif

	D_D(_b,send_msg("<%s>\n", curtok->name));
/*	send_msg("<%s> %s\n", curtok->name ? curtok->name : "<nil>", pr_tk()); */
	return curtok->token;
}
