Commit e7227322 authored by Richard Levitte's avatar Richard Levitte
Browse files

Allow 8-bit characters. This is not really complete, it only marks

characters with the highest bit set as HIGHBIT.  We need to expand
this to support the UTF-8 character set properly.  However, this
solves the problem that the character 0x80 (which is common in UTF-8)
gets masked to 0x00.
Patch submitted by "Huang Yuzhen" <huangyuzhen@bj.tom.com>
parent a5595fde
Loading
Loading
Loading
Loading
+89 −54
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@
#define CONF_COMMENT		128
#define CONF_FCOMMENT		2048
#define CONF_EOF		8
#define CONF_HIGHBIT		4096
#define CONF_ALPHA		(CONF_UPPER|CONF_LOWER)
#define CONF_ALPHA_NUMERIC	(CONF_ALPHA|CONF_NUMBER|CONF_UNDER)
#define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \
@@ -78,68 +79,102 @@

#define KEYTYPES(c)		((unsigned short *)((c)->meth_data))
#ifndef CHARSET_EBCDIC
#define IS_COMMENT(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \
				(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE)
				(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT)

#else /*CHARSET_EBCDIC*/

#define IS_COMMENT(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \
				(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE)
				(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT)
#endif /*CHARSET_EBCDIC*/

static unsigned short CONF_type_default[128]={
	0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000,
	0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x010,0x200,0x040,0x080,0x000,0x200,0x200,0x040,
	0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200,
	0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001,
	0x001,0x001,0x000,0x200,0x000,0x000,0x000,0x200,
	0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x000,0x020,0x000,0x200,0x100,
	0x040,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000,
static unsigned short CONF_type_default[256]={
	0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000,
	0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0010,0x0200,0x0040,0x0080,0x0000,0x0200,0x0200,0x0040,
	0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,
	0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
	0x0001,0x0001,0x0000,0x0200,0x0000,0x0000,0x0000,0x0200,
	0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0000,0x0020,0x0000,0x0200,0x0100,
	0x0040,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	};

static unsigned short CONF_type_win32[128]={
	0x008,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x000,0x010,0x010,0x000,0x000,0x010,0x000,0x000,
	0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x000,0x000,0x000,0x000,0x000,0x000,0x000,0x000,
	0x010,0x200,0x400,0x000,0x000,0x200,0x200,0x000,
	0x000,0x000,0x200,0x200,0x200,0x200,0x200,0x200,
	0x001,0x001,0x001,0x001,0x001,0x001,0x001,0x001,
	0x001,0x001,0x000,0xA00,0x000,0x000,0x000,0x200,
	0x200,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x002,0x002,0x002,0x002,0x002,
	0x002,0x002,0x002,0x000,0x000,0x000,0x200,0x100,
	0x000,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x004,0x004,0x004,0x004,0x004,
	0x004,0x004,0x004,0x000,0x200,0x000,0x200,0x000,
static unsigned short CONF_type_win32[256]={
	0x0008,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0000,0x0010,0x0010,0x0000,0x0000,0x0010,0x0000,0x0000,
	0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
	0x0010,0x0200,0x0400,0x0000,0x0000,0x0200,0x0200,0x0000,
	0x0000,0x0000,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,
	0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,
	0x0001,0x0001,0x0000,0x0A00,0x0000,0x0000,0x0000,0x0200,
	0x0200,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,
	0x0002,0x0002,0x0002,0x0000,0x0000,0x0000,0x0200,0x0100,
	0x0000,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,
	0x0004,0x0004,0x0004,0x0000,0x0200,0x0000,0x0200,0x0000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,
	};
+34 −28
Original line number Diff line number Diff line
@@ -12,8 +12,9 @@ $DQUOTE=0x400;
$COMMENT=0x80;
$FCOMMENT=0x800;
$EOF=0x08;
$HIGHBIT=0x1000;

foreach (0 .. 127)
foreach (0 .. 255)
	{
	$v=0;
	$c=sprintf("%c",$_);
@@ -27,11 +28,12 @@ foreach (0 .. 127)
	$v|=$QUOTE	if ($c =~ /['`"]/); # for emacs: "`'}/)
	$v|=$COMMENT	if ($c =~ /\#/);
	$v|=$EOF	if ($c =~ /\0/);
	$v|=$HIGHBIT	if ($c =~/[\x80-\xff]/);

	push(@V_def,$v);
	}

foreach (0 .. 127)
foreach (0 .. 255)
	{
	$v=0;
	$c=sprintf("%c",$_);
@@ -44,6 +46,7 @@ foreach (0 .. 127)
	$v|=$DQUOTE	if ($c =~ /["]/); # for emacs: "}/)
	$v|=$FCOMMENT	if ($c =~ /;/);
	$v|=$EOF	if ($c =~ /\0/);
	$v|=$HIGHBIT	if ($c =~/[\x80-\xff]/);

	push(@V_w32,$v);
	}
@@ -122,6 +125,7 @@ print <<"EOF";
#define CONF_COMMENT		$COMMENT
#define CONF_FCOMMENT		$FCOMMENT
#define CONF_EOF		$EOF
#define CONF_HIGHBIT		$HIGHBIT
#define CONF_ALPHA		(CONF_UPPER|CONF_LOWER)
#define CONF_ALPHA_NUMERIC	(CONF_ALPHA|CONF_NUMBER|CONF_UNDER)
#define CONF_ALPHA_NUMERIC_PUNCT (CONF_ALPHA|CONF_NUMBER|CONF_UNDER| \\
@@ -129,51 +133,53 @@ print <<"EOF";

#define KEYTYPES(c)		((unsigned short *)((c)->meth_data))
#ifndef CHARSET_EBCDIC
#define IS_COMMENT(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[(a)&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[(a)&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\
				(KEYTYPES(c)[(a)&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[(a)&0x7f]&CONF_DQUOTE)
				(KEYTYPES(c)[(a)&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a)		(KEYTYPES(c)[(a)&0xff]&CONF_HIGHBIT)

#else /*CHARSET_EBCDIC*/

#define IS_COMMENT(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC)
#define IS_COMMENT(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_COMMENT)
#define IS_FCOMMENT(c,a)	(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_FCOMMENT)
#define IS_EOF(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_EOF)
#define IS_ESC(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ESC)
#define IS_NUMBER(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_NUMBER)
#define IS_WS(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_WS)
#define IS_ALPHA_NUMERIC(c,a)	(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC)
#define IS_ALPHA_NUMERIC_PUNCT(c,a) \\
				(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0x7f]&CONF_DQUOTE)
				(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_ALPHA_NUMERIC_PUNCT)
#define IS_QUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_QUOTE)
#define IS_DQUOTE(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_DQUOTE)
#define IS_HIGHBIT(c,a)		(KEYTYPES(c)[os_toascii[a]&0xff]&CONF_HIGHBIT)
#endif /*CHARSET_EBCDIC*/

EOF

print "static unsigned short CONF_type_default[128]={";
print "static unsigned short CONF_type_default[256]={";

for ($i=0; $i<128; $i++)
for ($i=0; $i<256; $i++)
	{
	print "\n\t" if ($i % 8) == 0;
	printf "0x%03X,",$V_def[$i];
	printf "0x%04X,",$V_def[$i];
	}

print "\n\t};\n\n";

print "static unsigned short CONF_type_win32[128]={";
print "static unsigned short CONF_type_win32[256]={";

for ($i=0; $i<128; $i++)
for ($i=0; $i<256; $i++)
	{
	print "\n\t" if ($i % 8) == 0;
	printf "0x%03X,",$V_w32[$i];
	printf "0x%04X,",$V_w32[$i];
	}

print "\n\t};\n\n";