/* * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved. * Use is subject to license terms. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T * All Rights Reserved * * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Code moved from regexp.h */ #include "apr.h" #include "apr_lib.h" #if APR_HAVE_LIMITS_H #include #endif #if APR_HAVE_STDLIB_H #include #endif #include "libsed.h" #include "regexp.h" #include "sed.h" #define GETC() ((unsigned char)*sp++) #define PEEKC() ((unsigned char)*sp) #define UNGETC(c) (--sp) #define SEDCOMPILE_ERROR(c) { \ regerrno = c; \ goto out; \ } #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) #define uletter(c) (isalpha(c) || c == '_') static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; static int regerr(sed_commands_t *commands, int err); static void comperr(sed_commands_t *commands, char *msg); static void getrnge(char *str, step_vars_storage *vars); static int _advance(char *, char *, step_vars_storage *); extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars); static void comperr(sed_commands_t *commands, char *msg) { command_errf(commands, msg, commands->linebuf); } /* */ static int regerr(sed_commands_t *commands, int err) { switch(err) { case 0: /* No error */ break; case 11: comperr(commands, "Range endpoint too large: %s"); break; case 16: comperr(commands, "Bad number: %s"); break; case 25: comperr(commands, "``\\digit'' out of range: %s"); break; case 36: comperr(commands, "Illegal or missing delimiter: %s"); break; case 41: comperr(commands, "No remembered search string: %s"); break; case 42: comperr(commands, "\\( \\) imbalance: %s"); break; case 43: comperr(commands, "Too many \\(: %s"); break; case 44: comperr(commands, "More than 2 numbers given in \\{ \\}: %s"); break; case 45: comperr(commands, "} expected after \\: %s"); break; case 46: comperr(commands, "First number exceeds second in \\{ \\}: %s"); break; case 49: comperr(commands, "[ ] imbalance: %s"); break; case 50: comperr(commands, SEDERR_TMMES); break; default: comperr(commands, "Unknown regexp error code %s\n"); break; } return (0); } char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs, char *ep, char *endbuf, int seof) { int c; int eof = seof; char *lastep; int cclcnt; char bracket[NBRA], *bracketp; int closed; int neg; int lc; int i, cflg; int iflag; /* used for non-ascii characters in brackets */ char *sp = commands->cp; int regerrno = 0; lastep = 0; if ((c = GETC()) == eof || c == '\n') { if (c == '\n') { UNGETC(c); } commands->cp = sp; goto out; } bracketp = bracket; compargs->circf = closed = compargs->nbra = 0; if (c == '^') compargs->circf++; else UNGETC(c); while (1) { if (ep >= endbuf) SEDCOMPILE_ERROR(50); c = GETC(); if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) lastep = ep; if (c == eof) { *ep++ = CCEOF; if (bracketp != bracket) SEDCOMPILE_ERROR(42); commands->cp = sp; goto out; } switch (c) { case '.': *ep++ = CDOT; continue; case '\n': SEDCOMPILE_ERROR(36); commands->cp = sp; goto out; case '*': if (lastep == 0 || *lastep == CBRA || *lastep == CKET) goto defchar; *lastep |= STAR; continue; case '$': if (PEEKC() != eof && PEEKC() != '\n') goto defchar; *ep++ = CDOL; continue; case '[': if (&ep[17] >= endbuf) SEDCOMPILE_ERROR(50); *ep++ = CCL; lc = 0; for (i = 0; i < 16; i++) ep[i] = 0; neg = 0; if ((c = GETC()) == '^') { neg = 1; c = GETC(); } iflag = 1; do { c &= 0377; if (c == '\0' || c == '\n') SEDCOMPILE_ERROR(49); if ((c & 0200) && iflag) { iflag = 0; if (&ep[32] >= endbuf) SEDCOMPILE_ERROR(50); ep[-1] = CXCL; for (i = 16; i < 32; i++) ep[i] = 0; } if (c == '-' && lc != 0) { if ((c = GETC()) == ']') { PLACE('-'); break; } if ((c & 0200) && iflag) { iflag = 0; if (&ep[32] >= endbuf) SEDCOMPILE_ERROR(50); ep[-1] = CXCL; for (i = 16; i < 32; i++) ep[i] = 0; } while (lc < c) { PLACE(lc); lc++; } } lc = c; PLACE(c); } while ((c = GETC()) != ']'); if (iflag) iflag = 16; else iflag = 32; if (neg) { if (iflag == 32) { for (cclcnt = 0; cclcnt < iflag; cclcnt++) ep[cclcnt] ^= 0377; ep[0] &= 0376; } else { ep[-1] = NCCL; /* make nulls match so test fails */ ep[0] |= 01; } } ep += iflag; continue; case '\\': switch (c = GETC()) { case '(': if (compargs->nbra >= NBRA) SEDCOMPILE_ERROR(43); *bracketp++ = compargs->nbra; *ep++ = CBRA; *ep++ = compargs->nbra++; continue; case ')': if (bracketp <= bracket) SEDCOMPILE_ERROR(42); *ep++ = CKET; *ep++ = *--bracketp; closed++; continue; case '{': if (lastep == (char *) 0) goto defchar; *lastep |= RNGE; cflg = 0; nlim: c = GETC(); i = 0; do { if ('0' <= c && c <= '9') i = 10 * i + c - '0'; else SEDCOMPILE_ERROR(16); } while (((c = GETC()) != '\\') && (c != ',')); if (i >= 255) SEDCOMPILE_ERROR(11); *ep++ = i; if (c == ',') { if (cflg++) SEDCOMPILE_ERROR(44); if ((c = GETC()) == '\\') *ep++ = (char) 255; else { UNGETC(c); goto nlim; /* get 2'nd number */ } } if (GETC() != '}') SEDCOMPILE_ERROR(45); if (!cflg) /* one number */ *ep++ = i; else if ((ep[-1] & 0377) < (ep[-2] & 0377)) SEDCOMPILE_ERROR(46); continue; case '\n': SEDCOMPILE_ERROR(36); case 'n': c = '\n'; goto defchar; default: if (c >= '1' && c <= '9') { if ((c -= '1') >= closed) SEDCOMPILE_ERROR(25); *ep++ = CBACK; *ep++ = c; continue; } } /* Drop through to default to use \ to turn off special chars */ defchar: default: lastep = ep; *ep++ = CCHR; *ep++ = c; } } out: if (regerrno) { regerr(commands, regerrno); return (char*) NULL; } /* XXX : Basant : what extra */ /* int reglength = (int)(ep - expbuf); */ return ep; } int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars) { int c; if (circf) { vars->loc1 = p1; return (_advance(p1, p2, vars)); } /* fast check for first character */ if (*p2 == CCHR) { c = p2[1]; do { if (*p1 != c) continue; if (_advance(p1, p2, vars)) { vars->loc1 = p1; return (1); } } while (*p1++); return (0); } /* regular algorithm */ do { if (_advance(p1, p2, vars)) { vars->loc1 = p1; return (1); } } while (*p1++); return (0); } static int _advance(char *lp, char *ep, step_vars_storage *vars) { char *curlp; int c; char *bbeg; char neg; int ct; int epint; /* int value of *ep */ while (1) { neg = 0; switch (*ep++) { case CCHR: if (*ep++ == *lp++) continue; return (0); case CDOT: if (*lp++) continue; return (0); case CDOL: if (*lp == 0) continue; return (0); case CCEOF: vars->loc2 = lp; return (1); case CXCL: c = (unsigned char)*lp++; if (ISTHERE(c)) { ep += 32; continue; } return (0); case NCCL: neg = 1; case CCL: c = *lp++; if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { ep += 16; continue; } return (0); case CBRA: epint = (int) *ep; vars->braslist[epint] = lp; ep++; continue; case CKET: epint = (int) *ep; vars->braelist[epint] = lp; ep++; continue; case CCHR | RNGE: c = *ep++; getrnge(ep, vars); while (vars->low--) if (*lp++ != c) return (0); curlp = lp; while (vars->size--) if (*lp++ != c) break; if (vars->size < 0) lp++; ep += 2; goto star; case CDOT | RNGE: getrnge(ep, vars); while (vars->low--) if (*lp++ == '\0') return (0); curlp = lp; while (vars->size--) if (*lp++ == '\0') break; if (vars->size < 0) lp++; ep += 2; goto star; case CXCL | RNGE: getrnge(ep + 32, vars); while (vars->low--) { c = (unsigned char)*lp++; if (!ISTHERE(c)) return (0); } curlp = lp; while (vars->size--) { c = (unsigned char)*lp++; if (!ISTHERE(c)) break; } if (vars->size < 0) lp++; ep += 34; /* 32 + 2 */ goto star; case NCCL | RNGE: neg = 1; case CCL | RNGE: getrnge(ep + 16, vars); while (vars->low--) { c = *lp++; if (((c & 0200) || !ISTHERE(c)) ^ neg) return (0); } curlp = lp; while (vars->size--) { c = *lp++; if (((c & 0200) || !ISTHERE(c)) ^ neg) break; } if (vars->size < 0) lp++; ep += 18; /* 16 + 2 */ goto star; case CBACK: epint = (int) *ep; bbeg = vars->braslist[epint]; ct = vars->braelist[epint] - bbeg; ep++; if (ecmp(bbeg, lp, ct)) { lp += ct; continue; } return (0); case CBACK | STAR: epint = (int) *ep; bbeg = vars->braslist[epint]; ct = vars->braelist[epint] - bbeg; ep++; curlp = lp; while (ecmp(bbeg, lp, ct)) lp += ct; while (lp >= curlp) { if (_advance(lp, ep, vars)) return (1); lp -= ct; } return (0); case CDOT | STAR: curlp = lp; while (*lp++); goto star; case CCHR | STAR: curlp = lp; while (*lp++ == *ep); ep++; goto star; case CXCL | STAR: curlp = lp; do { c = (unsigned char)*lp++; } while (ISTHERE(c)); ep += 32; goto star; case NCCL | STAR: neg = 1; case CCL | STAR: curlp = lp; do { c = *lp++; } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); ep += 16; goto star; star: do { if (--lp == vars->locs) break; if (_advance(lp, ep, vars)) return (1); } while (lp > curlp); return (0); } } } static void getrnge(char *str, step_vars_storage *vars) { vars->low = *str++ & 0377; vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low; }