GODOT IS OPEN SOURCE

This commit is contained in:
Juan Linietsky 2014-02-09 22:10:30 -03:00
parent 0e49da1687
commit 0b806ee0fc
3138 changed files with 1294441 additions and 0 deletions

10
drivers/trex/SCsub Normal file
View file

@ -0,0 +1,10 @@
Import('env')
sources = [
'trex.c',
'regex.cpp',
]
env.add_source_files(env.drivers_sources, sources)

75
drivers/trex/TRexpp.h Normal file
View file

@ -0,0 +1,75 @@
#ifndef _TREXPP_H_
#define _TREXPP_H_
/***************************************************************
T-Rex a tiny regular expression library
Copyright (C) 2003-2004 Alberto Demichelis
This software is provided 'as-is', without any express
or implied warranty. In no event will the authors be held
liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for
any purpose, including commercial applications, and to alter
it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented;
you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment
in the product documentation would be appreciated but
is not required.
2. Altered source versions must be plainly marked as such,
and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any
source distribution.
****************************************************************/
extern "C" {
#include "trex.h"
}
struct TRexParseException{TRexParseException(const TRexChar *c):desc(c){}const TRexChar *desc;};
class TRexpp {
public:
TRexpp() { _exp = (TRex *)0; }
~TRexpp() { CleanUp(); }
// compiles a regular expression
void Compile(const TRexChar *pattern) {
const TRexChar *error;
CleanUp();
if(!(_exp = trex_compile(pattern,&error)))
throw TRexParseException(error);
}
// return true if the given text match the expression
bool Match(const TRexChar* text) {
return _exp?(trex_match(_exp,text) != 0):false;
}
// Searches for the first match of the expression in a zero terminated string
bool Search(const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end) {
return _exp?(trex_search(_exp,text,out_begin,out_end) != 0):false;
}
// Searches for the first match of the expression in a string sarting at text_begin and ending at text_end
bool SearchRange(const TRexChar* text_begin,const TRexChar* text_end,const TRexChar** out_begin, const TRexChar** out_end) {
return _exp?(trex_searchrange(_exp,text_begin,text_end,out_begin,out_end) != 0):false;
}
bool GetSubExp(int n, const TRexChar** out_begin, int *out_len)
{
TRexMatch match;
TRexBool res = _exp?(trex_getsubexp(_exp,n,&match)):TRex_False;
if(res) {
*out_begin = match.begin;
*out_len = match.len;
return true;
}
return false;
}
int GetSubExpCount() { return _exp?trex_getsubexpcount(_exp):0; }
private:
void CleanUp() { if(_exp) trex_free(_exp); _exp = (TRex *)0; }
TRex *_exp;
};
#endif //_TREXPP_H_

15
drivers/trex/history.txt Normal file
View file

@ -0,0 +1,15 @@
===version 1.3
-fixed a bug for GCC users(thx Brendan)
===version 1.2
-added word boundary match \b and \B
-added vertical tab escape \v
-\w now also matches '_' (underscore)
-fixed greediness for * and +
===version 1.1 , April 1, 2004
-fixed some minor bug
-added predefined character classes(\w,\W,\s,\S etc...)
===version 1.0 , February 23, 2004
-first public realase

171
drivers/trex/readme.txt Normal file
View file

@ -0,0 +1,171 @@
T-REX 1.3 http://tiny-rex.sourceforge.net
----------------------------------------------------------------------
T-Rex a tiny regular expression library
Copyright (C) 2003-2006 Alberto Demichelis
This software is provided 'as-is', without any express
or implied warranty. In no event will the authors be held
liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for
any purpose, including commercial applications, and to alter
it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented;
you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment
in the product documentation would be appreciated but
is not required.
2. Altered source versions must be plainly marked as such,
and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any
source distribution.
----------------------------------------------------------------------
TRex implements the following expressions
\ Quote the next metacharacter
^ Match the beginning of the string
. Match any character
$ Match the end of the string
| Alternation
() Grouping (creates a capture)
[] Character class
==GREEDY CLOSURES==
* Match 0 or more times
+ Match 1 or more times
? Match 1 or 0 times
{n} Match exactly n times
{n,} Match at least n times
{n,m} Match at least n but not more than m times
==ESCAPE CHARACTERS==
\t tab (HT, TAB)
\n newline (LF, NL)
\r return (CR)
\f form feed (FF)
==PREDEFINED CLASSES==
\l lowercase next char
\u uppercase next char
\a letters
\A non letters
\w alphanimeric [0-9a-zA-Z]
\W non alphanimeric
\s space
\S non space
\d digits
\D non nondigits
\x exadecimal digits
\X non exadecimal digits
\c control charactrs
\C non control charactrs
\p punctation
\P non punctation
\b word boundary
\B non word boundary
----------------------------------------------------------------------
API DOC
----------------------------------------------------------------------
TRex *trex_compile(const TRexChar *pattern,const TRexChar **error);
compiles an expression and returns a pointer to the compiled version.
in case of failure returns NULL.The returned object has to be deleted
through the function trex_free().
pattern
a pointer to a zero terminated string containing the pattern that
has to be compiled.
error
apointer to a string pointer that will be set with an error string
in case of failure.
----------------------------------------------------------------------
void trex_free(TRex *exp)
deletes a expression structure created with trex_compile()
exp
the expression structure that has to be deleted
----------------------------------------------------------------------
TRexBool trex_match(TRex* exp,const TRexChar* text)
returns TRex_True if the string specified in the parameter text is an
exact match of the expression, otherwise returns TRex_False.
exp
the compiled expression
text
the string that has to be tested
----------------------------------------------------------------------
TRexBool trex_search(TRex* exp,const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end)
searches the first match of the expressin in the string specified in the parameter text.
if the match is found returns TRex_True and the sets out_begin to the beginning of the
match and out_end at the end of the match; otherwise returns TRex_False.
exp
the compiled expression
text
the string that has to be tested
out_begin
a pointer to a string pointer that will be set with the beginning of the match
out_end
a pointer to a string pointer that will be set with the end of the match
----------------------------------------------------------------------
TREX_API TRexBool trex_searchrange(TRex* exp,const TRexChar* text_begin,const TRexChar* text_end,const TRexChar** out_begin, const TRexChar** out_end)
searches the first match of the expressin in the string delimited
by the parameter text_begin and text_end.
if the match is found returns TRex_True and the sets out_begin to the beginning of the
match and out_end at the end of the match; otherwise returns TRex_False.
exp
the compiled expression
text_begin
a pointer to the beginnning of the string that has to be tested
text_end
a pointer to the end of the string that has to be tested
out_begin
a pointer to a string pointer that will be set with the beginning of the match
out_end
a pointer to a string pointer that will be set with the end of the match
----------------------------------------------------------------------
int trex_getsubexpcount(TRex* exp)
returns the number of sub expressions matched by the expression
exp
the compiled expression
---------------------------------------------------------------------
TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch *submatch)
retrieve the begin and and pointer to the length of the sub expression indexed
by n. The result is passed trhough the struct TRexMatch:
typedef struct {
const TRexChar *begin;
int len;
} TRexMatch;
the function returns TRex_True if n is valid index otherwise TRex_False.
exp
the compiled expression
n
the index of the submatch
submatch
a pointer to structure that will store the result
this function works also after a match operation has been performend.

163
drivers/trex/regex.cpp Normal file
View file

@ -0,0 +1,163 @@
/*************************************************/
/* regex.cpp */
/*************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/*************************************************/
/* Source code within this file is: */
/* (c) 2007-2010 Juan Linietsky, Ariel Manzur */
/* All Rights Reserved. */
/*************************************************/
#include "regex.h"
extern "C" {
#define _UNICODE
#include "trex.h"
};
void RegEx::_bind_methods() {
ObjectTypeDB::bind_method(_MD("compile","pattern"),&RegEx::compile);
ObjectTypeDB::bind_method(_MD("find","text", "start","end"),&RegEx::_bind_find, DEFVAL(0), DEFVAL(-1));
ObjectTypeDB::bind_method(_MD("get_captures"),&RegEx::_bind_get_captures);
};
Error RegEx::compile(const String& p_pattern) {
clear();
const TRexChar* error;
exp = trex_compile(p_pattern.c_str(), &error);
ERR_FAIL_COND_V(!exp, FAILED);
return OK;
};
int RegEx::_bind_find(const String& p_text, int p_start, int p_end) const {
int start, end;
bool ret = find(p_text, start, end, NULL, p_start, p_end);
return ret?start:-1;
};
bool RegEx::find(const String& p_text, int& p_rstart, int &p_rend, List<String>* p_captures, int p_start, int p_end) const {
ERR_FAIL_COND_V( !exp, false );
text=p_text;
const CharType* str = p_text.c_str();
const CharType* start = str + p_start;
const CharType* end = str + (p_end == -1?p_text.size():p_end);
const CharType* out_begin;
const CharType* out_end;
bool ret = trex_searchrange(exp, start, end, &out_begin, &out_end);
if (ret) {
p_rstart = out_begin - str;
p_rend = out_end - str;
if (p_captures) {
int count = get_capture_count();
for (int i=0; i<count; i++) {
int start, len;
get_capture_limits(i, start, len);
p_captures->push_back(p_text.substr(start, len));
};
};
} else {
p_rstart = -1;
};
return ret;
};
bool RegEx::match(const String& p_text, List<String>* p_captures, int p_start, int p_end) const {
ERR_FAIL_COND_V( !exp, false );
int start, end;
return find(p_text, start, end, p_captures, p_start, p_end);
};
int RegEx::get_capture_count() const {
ERR_FAIL_COND_V( exp == NULL, -1 );
return trex_getsubexpcount(exp);
};
Error RegEx::get_capture_limits(int p_capture, int& p_start, int& p_len) const {
ERR_FAIL_COND_V( exp == NULL, ERR_UNCONFIGURED );
TRexMatch match;
TRexBool res = trex_getsubexp(exp, p_capture, &match);
ERR_FAIL_COND_V( !res, FAILED );
p_start = (int)(match.begin - text.c_str());
p_len = match.len;
return OK;
};
String RegEx::get_capture(int p_idx) const {
ERR_FAIL_COND_V( exp == NULL, "" );
int start, len;
Error ret = get_capture_limits(p_idx, start, len);
ERR_FAIL_COND_V(ret != OK, "");
if (len == 0)
return "";
return text.substr(start, len);
};
StringArray RegEx::_bind_get_captures() const {
StringArray ret;
int count = get_capture_count();
for (int i=0; i<count; i++) {
String c = get_capture(i);
ret.push_back(c);
};
return ret;
};
bool RegEx::is_valid() const {
return exp != NULL;
};
void RegEx::clear() {
if (exp) {
trex_free(exp);
exp = NULL;
};
};
RegEx::RegEx(const String& p_pattern) {
exp = NULL;
compile(p_pattern);
};
RegEx::RegEx() {
exp = NULL;
};
RegEx::~RegEx() {
clear();
};

50
drivers/trex/regex.h Normal file
View file

@ -0,0 +1,50 @@
/*************************************************/
/* regex.h */
/*************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/*************************************************/
/* Source code within this file is: */
/* (c) 2007-2010 Juan Linietsky, Ariel Manzur */
/* All Rights Reserved. */
/*************************************************/
#ifndef REGEX_H
#define REGEX_H
#include "ustring.h"
#include "list.h"
#include "core/reference.h"
struct TRex;
class RegEx : public Reference {
OBJ_TYPE(RegEx, Reference);
mutable String text;
TRex *exp;
protected:
static void _bind_methods();
int _bind_find(const String& p_text, int p_start = 0, int p_end = -1) const;
StringArray _bind_get_captures() const;
public:
void clear();
Error compile(const String& p_pattern);
bool is_valid() const;
bool match(const String& p_text, List<String>* p_captures = NULL, int p_start = 0, int p_end = -1) const;
bool find(const String& p_text, int& p_rstart, int &p_rend, List<String>* p_captures = NULL, int p_start = 0, int p_end = -1) const;
int get_capture_count() const;
Error get_capture_limits(int p_capture, int& p_start, int& p_len) const;
String get_capture(int p_idx) const;
RegEx();
RegEx(const String& p_pattern);
~RegEx();
};
#endif // REGEX_H

41
drivers/trex/test.c Normal file
View file

@ -0,0 +1,41 @@
#include "trex.h"
#include <stdio.h>
#include <string.h>
#ifdef _UNICODE
#define trex_sprintf swprintf
#else
#define trex_sprintf sprintf
#endif
int main(int argc, char* argv[])
{
const TRexChar *begin,*end;
TRexChar sTemp[200];
const TRexChar *error = NULL;
TRex *x = trex_compile(_TREXC("(x{1,5})xx"),&error);
if(x) {
trex_sprintf(sTemp,_TREXC("xxxxxxx"));
if(trex_search(x,sTemp,&begin,&end))
{
int i,n = trex_getsubexpcount(x);
TRexMatch match;
for(i = 0; i < n; i++)
{
TRexChar t[200];
trex_getsubexp(x,i,&match);
trex_sprintf(t,_TREXC("[%%d]%%.%ds\n"),match.len);
trex_printf(t,i,match.begin);
}
trex_printf(_TREXC("match! %d sub matches\n"),trex_getsubexpcount(x));
}
else {
trex_printf(_TREXC("no match!\n"));
}
trex_free(x);
}
else {
trex_printf(_TREXC("compilation error [%s]!\n"),error?error:_TREXC("undefined"));
}
return 0;
}

643
drivers/trex/trex.c Normal file
View file

@ -0,0 +1,643 @@
/* see copyright notice in trex.h */
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <setjmp.h>
#include "trex.h"
#ifdef _UINCODE
#define scisprint iswprint
#define scstrlen wcslen
#define scprintf wprintf
#define _SC(x) L##c
#else
#define scisprint isprint
#define scstrlen strlen
#define scprintf printf
#define _SC(x) (x)
#endif
#ifdef _DEBUG
#include <stdio.h>
static const TRexChar *g_nnames[] =
{
_SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"),
_SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"),
_SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),
_SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB")
};
#endif
#define OP_GREEDY (MAX_CHAR+1) // * + ? {n}
#define OP_OR (MAX_CHAR+2)
#define OP_EXPR (MAX_CHAR+3) //parentesis ()
#define OP_NOCAPEXPR (MAX_CHAR+4) //parentesis (?:)
#define OP_DOT (MAX_CHAR+5)
#define OP_CLASS (MAX_CHAR+6)
#define OP_CCLASS (MAX_CHAR+7)
#define OP_NCLASS (MAX_CHAR+8) //negates class the [^
#define OP_RANGE (MAX_CHAR+9)
#define OP_CHAR (MAX_CHAR+10)
#define OP_EOL (MAX_CHAR+11)
#define OP_BOL (MAX_CHAR+12)
#define OP_WB (MAX_CHAR+13)
#define TREX_SYMBOL_ANY_CHAR ('.')
#define TREX_SYMBOL_GREEDY_ONE_OR_MORE ('+')
#define TREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*')
#define TREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?')
#define TREX_SYMBOL_BRANCH ('|')
#define TREX_SYMBOL_END_OF_STRING ('$')
#define TREX_SYMBOL_BEGINNING_OF_STRING ('^')
#define TREX_SYMBOL_ESCAPE_CHAR ('\\')
typedef int TRexNodeType;
typedef struct tagTRexNode{
TRexNodeType type;
int left;
int right;
int next;
}TRexNode;
struct TRex{
const TRexChar *_eol;
const TRexChar *_bol;
const TRexChar *_p;
int _first;
int _op;
TRexNode *_nodes;
int _nallocated;
int _nsize;
int _nsubexpr;
TRexMatch *_matches;
int _currsubexp;
void *_jmpbuf;
const TRexChar **_error;
};
static int trex_list(TRex *exp);
static int trex_newnode(TRex *exp, TRexNodeType type)
{
TRexNode n;
int newid;
n.type = type;
n.next = n.right = n.left = -1;
if(type == OP_EXPR)
n.right = exp->_nsubexpr++;
if(exp->_nallocated < (exp->_nsize + 1)) {
//int oldsize = exp->_nallocated;
exp->_nallocated *= 2;
exp->_nodes = (TRexNode *)realloc(exp->_nodes, exp->_nallocated * sizeof(TRexNode));
}
exp->_nodes[exp->_nsize++] = n;
newid = exp->_nsize - 1;
return (int)newid;
}
static void trex_error(TRex *exp,const TRexChar *error)
{
if(exp->_error) *exp->_error = error;
longjmp(*((jmp_buf*)exp->_jmpbuf),-1);
}
static void trex_expect(TRex *exp, int n){
if((*exp->_p) != n)
trex_error(exp, _SC("expected paren"));
exp->_p++;
}
static TRexChar trex_escapechar(TRex *exp)
{
if(*exp->_p == TREX_SYMBOL_ESCAPE_CHAR){
exp->_p++;
switch(*exp->_p) {
case 'v': exp->_p++; return '\v';
case 'n': exp->_p++; return '\n';
case 't': exp->_p++; return '\t';
case 'r': exp->_p++; return '\r';
case 'f': exp->_p++; return '\f';
default: return (*exp->_p++);
}
} else if(!scisprint(*exp->_p)) trex_error(exp,_SC("letter expected"));
return (*exp->_p++);
}
static int trex_charclass(TRex *exp,int classid)
{
int n = trex_newnode(exp,OP_CCLASS);
exp->_nodes[n].left = classid;
return n;
}
static int trex_charnode(TRex *exp,TRexBool isclass)
{
TRexChar t;
if(*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) {
exp->_p++;
switch(*exp->_p) {
case 'n': exp->_p++; return trex_newnode(exp,'\n');
case 't': exp->_p++; return trex_newnode(exp,'\t');
case 'r': exp->_p++; return trex_newnode(exp,'\r');
case 'f': exp->_p++; return trex_newnode(exp,'\f');
case 'v': exp->_p++; return trex_newnode(exp,'\v');
case 'a': case 'A': case 'w': case 'W': case 's': case 'S':
case 'd': case 'D': case 'x': case 'X': case 'c': case 'C':
case 'p': case 'P': case 'l': case 'u':
{
t = *exp->_p; exp->_p++;
return trex_charclass(exp,t);
}
case 'b':
case 'B':
if(!isclass) {
int node = trex_newnode(exp,OP_WB);
exp->_nodes[node].left = *exp->_p;
exp->_p++;
return node;
} //else default
default:
t = *exp->_p; exp->_p++;
return trex_newnode(exp,t);
}
}
else if(!scisprint(*exp->_p)) {
trex_error(exp,_SC("letter expected"));
}
t = *exp->_p; exp->_p++;
return trex_newnode(exp,t);
}
static int trex_class(TRex *exp)
{
int ret = -1;
int first = -1,chain;
if(*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING){
ret = trex_newnode(exp,OP_NCLASS);
exp->_p++;
}else ret = trex_newnode(exp,OP_CLASS);
if(*exp->_p == ']') trex_error(exp,_SC("empty class"));
chain = ret;
while(*exp->_p != ']' && exp->_p != exp->_eol) {
if(*exp->_p == '-' && first != -1){
int r,t;
if(*exp->_p++ == ']') trex_error(exp,_SC("unfinished range"));
r = trex_newnode(exp,OP_RANGE);
if(first>*exp->_p) trex_error(exp,_SC("invalid range"));
if(exp->_nodes[first].type == OP_CCLASS) trex_error(exp,_SC("cannot use character classes in ranges"));
exp->_nodes[r].left = exp->_nodes[first].type;
t = trex_escapechar(exp);
exp->_nodes[r].right = t;
exp->_nodes[chain].next = r;
chain = r;
first = -1;
}
else{
if(first!=-1){
int c = first;
exp->_nodes[chain].next = c;
chain = c;
first = trex_charnode(exp,TRex_True);
}
else{
first = trex_charnode(exp,TRex_True);
}
}
}
if(first!=-1){
int c = first;
exp->_nodes[chain].next = c;
chain = c;
first = -1;
}
/* hack? */
exp->_nodes[ret].left = exp->_nodes[ret].next;
exp->_nodes[ret].next = -1;
return ret;
}
static int trex_parsenumber(TRex *exp)
{
int ret = *exp->_p-'0';
int positions = 10;
exp->_p++;
while(isdigit(*exp->_p)) {
ret = ret*10+(*exp->_p++-'0');
if(positions==1000000000) trex_error(exp,_SC("overflow in numeric constant"));
positions *= 10;
};
return ret;
}
static int trex_element(TRex *exp)
{
int ret = -1;
switch(*exp->_p)
{
case '(': {
int expr,newn;
exp->_p++;
if(*exp->_p =='?') {
exp->_p++;
trex_expect(exp,':');
expr = trex_newnode(exp,OP_NOCAPEXPR);
}
else
expr = trex_newnode(exp,OP_EXPR);
newn = trex_list(exp);
exp->_nodes[expr].left = newn;
ret = expr;
trex_expect(exp,')');
}
break;
case '[':
exp->_p++;
ret = trex_class(exp);
trex_expect(exp,']');
break;
case TREX_SYMBOL_END_OF_STRING: exp->_p++; ret = trex_newnode(exp,OP_EOL);break;
case TREX_SYMBOL_ANY_CHAR: exp->_p++; ret = trex_newnode(exp,OP_DOT);break;
default:
ret = trex_charnode(exp,TRex_False);
break;
}
{
int op;
TRexBool isgreedy = TRex_False;
unsigned short p0 = 0, p1 = 0;
switch(*exp->_p){
case TREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; isgreedy = TRex_True; break;
case TREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; isgreedy = TRex_True; break;
case TREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; isgreedy = TRex_True; break;
case '{':
exp->_p++;
if(!isdigit(*exp->_p)) trex_error(exp,_SC("number expected"));
p0 = (unsigned short)trex_parsenumber(exp);
/*******************************/
switch(*exp->_p) {
case '}':
p1 = p0; exp->_p++;
break;
case ',':
exp->_p++;
p1 = 0xFFFF;
if(isdigit(*exp->_p)){
p1 = (unsigned short)trex_parsenumber(exp);
}
trex_expect(exp,'}');
break;
default:
trex_error(exp,_SC(", or } expected"));
}
/*******************************/
isgreedy = TRex_True;
break;
}
if(isgreedy) {
int nnode = trex_newnode(exp,OP_GREEDY);
op = OP_GREEDY;
exp->_nodes[nnode].left = ret;
exp->_nodes[nnode].right = ((p0)<<16)|p1;
ret = nnode;
}
}
if((*exp->_p != TREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != TREX_SYMBOL_GREEDY_ZERO_OR_MORE) && (*exp->_p != TREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) {
int nnode = trex_element(exp);
exp->_nodes[ret].next = nnode;
}
return ret;
}
static int trex_list(TRex *exp)
{
int ret=-1,e;
if(*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) {
exp->_p++;
ret = trex_newnode(exp,OP_BOL);
}
e = trex_element(exp);
if(ret != -1) {
exp->_nodes[ret].next = e;
}
else ret = e;
if(*exp->_p == TREX_SYMBOL_BRANCH) {
int temp,tright;
exp->_p++;
temp = trex_newnode(exp,OP_OR);
exp->_nodes[temp].left = ret;
tright = trex_list(exp);
exp->_nodes[temp].right = tright;
ret = temp;
}
return ret;
}
static TRexBool trex_matchcclass(int cclass,TRexChar c)
{
switch(cclass) {
case 'a': return isalpha(c)?TRex_True:TRex_False;
case 'A': return !isalpha(c)?TRex_True:TRex_False;
case 'w': return (isalnum(c) || c == '_')?TRex_True:TRex_False;
case 'W': return (!isalnum(c) && c != '_')?TRex_True:TRex_False;
case 's': return isspace(c)?TRex_True:TRex_False;
case 'S': return !isspace(c)?TRex_True:TRex_False;
case 'd': return isdigit(c)?TRex_True:TRex_False;
case 'D': return !isdigit(c)?TRex_True:TRex_False;
case 'x': return isxdigit(c)?TRex_True:TRex_False;
case 'X': return !isxdigit(c)?TRex_True:TRex_False;
case 'c': return iscntrl(c)?TRex_True:TRex_False;
case 'C': return !iscntrl(c)?TRex_True:TRex_False;
case 'p': return ispunct(c)?TRex_True:TRex_False;
case 'P': return !ispunct(c)?TRex_True:TRex_False;
case 'l': return islower(c)?TRex_True:TRex_False;
case 'u': return isupper(c)?TRex_True:TRex_False;
}
return TRex_False; /*cannot happen*/
}
static TRexBool trex_matchclass(TRex* exp,TRexNode *node,TRexChar c)
{
do {
switch(node->type) {
case OP_RANGE:
if(c >= node->left && c <= node->right) return TRex_True;
break;
case OP_CCLASS:
if(trex_matchcclass(node->left,c)) return TRex_True;
break;
default:
if(c == node->type)return TRex_True;
}
} while((node->next != -1) && (node = &exp->_nodes[node->next]));
return TRex_False;
}
static const TRexChar *trex_matchnode(TRex* exp,TRexNode *node,const TRexChar *str,TRexNode *next)
{
TRexNodeType type = node->type;
switch(type) {
case OP_GREEDY: {
//TRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL;
TRexNode *greedystop = NULL;
int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;
const TRexChar *s=str, *good = str;
if(node->next != -1) {
greedystop = &exp->_nodes[node->next];
}
else {
greedystop = next;
}
while((nmaches == 0xFFFF || nmaches < p1)) {
const TRexChar *stop;
if(!(s = trex_matchnode(exp,&exp->_nodes[node->left],s,greedystop)))
break;
nmaches++;
good=s;
if(greedystop) {
//checks that 0 matches satisfy the expression(if so skips)
//if not would always stop(for instance if is a '?')
if(greedystop->type != OP_GREEDY ||
(greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0))
{
TRexNode *gnext = NULL;
if(greedystop->next != -1) {
gnext = &exp->_nodes[greedystop->next];
}else if(next && next->next != -1){
gnext = &exp->_nodes[next->next];
}
stop = trex_matchnode(exp,greedystop,s,gnext);
if(stop) {
//if satisfied stop it
if(p0 == p1 && p0 == nmaches) break;
else if(nmaches >= p0 && p1 == 0xFFFF) break;
else if(nmaches >= p0 && nmaches <= p1) break;
}
}
}
if(s >= exp->_eol)
break;
}
if(p0 == p1 && p0 == nmaches) return good;
else if(nmaches >= p0 && p1 == 0xFFFF) return good;
else if(nmaches >= p0 && nmaches <= p1) return good;
return NULL;
}
case OP_OR: {
const TRexChar *asd = str;
TRexNode *temp=&exp->_nodes[node->left];
while( (asd = trex_matchnode(exp,temp,asd,NULL)) ) {
if(temp->next != -1)
temp = &exp->_nodes[temp->next];
else
return asd;
}
asd = str;
temp = &exp->_nodes[node->right];
while( (asd = trex_matchnode(exp,temp,asd,NULL)) ) {
if(temp->next != -1)
temp = &exp->_nodes[temp->next];
else
return asd;
}
return NULL;
break;
}
case OP_EXPR:
case OP_NOCAPEXPR:{
TRexNode *n = &exp->_nodes[node->left];
const TRexChar *cur = str;
int capture = -1;
if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
capture = exp->_currsubexp;
exp->_matches[capture].begin = cur;
exp->_currsubexp++;
}
do {
TRexNode *subnext = NULL;
if(n->next != -1) {
subnext = &exp->_nodes[n->next];
}else {
subnext = next;
}
if(!(cur = trex_matchnode(exp,n,cur,subnext))) {
if(capture != -1){
exp->_matches[capture].begin = 0;
exp->_matches[capture].len = 0;
}
return NULL;
}
} while((n->next != -1) && (n = &exp->_nodes[n->next]));
if(capture != -1)
exp->_matches[capture].len = cur - exp->_matches[capture].begin;
return cur;
}
case OP_WB:
if(str == exp->_bol && !isspace(*str)
|| (str == exp->_eol && !isspace(*(str-1)))
|| (!isspace(*str) && isspace(*(str+1)))
|| (isspace(*str) && !isspace(*(str+1))) ) {
return (node->left == 'b')?str:NULL;
}
return (node->left == 'b')?NULL:str;
case OP_BOL:
if(str == exp->_bol) return str;
return NULL;
case OP_EOL:
if(str == exp->_eol) return str;
return NULL;
case OP_DOT:{
*str++;
}
return str;
case OP_NCLASS:
case OP_CLASS:
if(trex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?TRex_True:TRex_False):(type == OP_NCLASS?TRex_True:TRex_False)) {
*str++;
return str;
}
return NULL;
case OP_CCLASS:
if(trex_matchcclass(node->left,*str)) {
*str++;
return str;
}
return NULL;
default: /* char */
if(*str != node->type) return NULL;
*str++;
return str;
}
return NULL;
}
/* public api */
TRex *trex_compile(const TRexChar *pattern,const TRexChar **error)
{
TRex *exp = (TRex *)malloc(sizeof(TRex));
exp->_eol = exp->_bol = NULL;
exp->_p = pattern;
exp->_nallocated = (int)scstrlen(pattern) * sizeof(TRexChar);
exp->_nodes = (TRexNode *)malloc(exp->_nallocated * sizeof(TRexNode));
exp->_nsize = 0;
exp->_matches = 0;
exp->_nsubexpr = 0;
exp->_first = trex_newnode(exp,OP_EXPR);
exp->_error = error;
exp->_jmpbuf = malloc(sizeof(jmp_buf));
if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
int res = trex_list(exp);
exp->_nodes[exp->_first].left = res;
if(*exp->_p!='\0')
trex_error(exp,_SC("unexpected character"));
#ifdef _DEBUG
{
int nsize,i;
TRexNode *t;
nsize = exp->_nsize;
t = &exp->_nodes[0];
scprintf(_SC("\n"));
for(i = 0;i < nsize; i++) {
if(exp->_nodes[i].type>MAX_CHAR)
scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);
else
scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type);
scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next);
}
scprintf(_SC("\n"));
}
#endif
exp->_matches = (TRexMatch *) malloc(exp->_nsubexpr * sizeof(TRexMatch));
memset(exp->_matches,0,exp->_nsubexpr * sizeof(TRexMatch));
}
else{
trex_free(exp);
return NULL;
}
return exp;
}
void trex_free(TRex *exp)
{
if(exp) {
if(exp->_nodes) free(exp->_nodes);
if(exp->_jmpbuf) free(exp->_jmpbuf);
if(exp->_matches) free(exp->_matches);
free(exp);
}
}
TRexBool trex_match(TRex* exp,const TRexChar* text)
{
const TRexChar* res = NULL;
exp->_bol = text;
exp->_eol = text + scstrlen(text);
exp->_currsubexp = 0;
res = trex_matchnode(exp,exp->_nodes,text,NULL);
if(res == NULL || res != exp->_eol)
return TRex_False;
return TRex_True;
}
TRexBool trex_searchrange(TRex* exp,const TRexChar* text_begin,const TRexChar* text_end,const TRexChar** out_begin, const TRexChar** out_end)
{
const TRexChar *cur = NULL;
int node = exp->_first;
if(text_begin >= text_end) return TRex_False;
exp->_bol = text_begin;
exp->_eol = text_end;
do {
cur = text_begin;
while(node != -1) {
exp->_currsubexp = 0;
cur = trex_matchnode(exp,&exp->_nodes[node],cur,NULL);
if(!cur)
break;
node = exp->_nodes[node].next;
}
*text_begin++;
} while(cur == NULL && text_begin != text_end);
if(cur == NULL)
return TRex_False;
--text_begin;
if(out_begin) *out_begin = text_begin;
if(out_end) *out_end = cur;
return TRex_True;
}
TRexBool trex_search(TRex* exp,const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end)
{
return trex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);
}
int trex_getsubexpcount(TRex* exp)
{
return exp->_nsubexpr;
}
TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch *subexp)
{
if( n<0 || n >= exp->_nsubexpr) return TRex_False;
*subexp = exp->_matches[n];
return TRex_True;
}

70
drivers/trex/trex.h Normal file
View file

@ -0,0 +1,70 @@
#ifndef _TREX_H_
#define _TREX_H_
/***************************************************************
T-Rex a tiny regular expression library
Copyright (C) 2003-2006 Alberto Demichelis
This software is provided 'as-is', without any express
or implied warranty. In no event will the authors be held
liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for
any purpose, including commercial applications, and to alter
it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented;
you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment
in the product documentation would be appreciated but
is not required.
2. Altered source versions must be plainly marked as such,
and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any
source distribution.
****************************************************************/
#define _UNICODE
#ifdef _UNICODE
#define TRexChar wchar_t
#define MAX_CHAR 0xFFFF
#define _TREXC(c) L##c
#define trex_strlen wcslen
#define trex_printf wprintf
#else
#define TRexChar char
#define MAX_CHAR 0xFF
#define _TREXC(c) (c)
#define trex_strlen strlen
#define trex_printf printf
#endif
#ifndef TREX_API
#define TREX_API extern
#endif
#define TRex_True 1
#define TRex_False 0
typedef unsigned int TRexBool;
typedef struct TRex TRex;
typedef struct {
const TRexChar *begin;
int len;
} TRexMatch;
TREX_API TRex *trex_compile(const TRexChar *pattern,const TRexChar **error);
TREX_API void trex_free(TRex *exp);
TREX_API TRexBool trex_match(TRex* exp,const TRexChar* text);
TREX_API TRexBool trex_search(TRex* exp,const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end);
TREX_API TRexBool trex_searchrange(TRex* exp,const TRexChar* text_begin,const TRexChar* text_end,const TRexChar** out_begin, const TRexChar** out_end);
TREX_API int trex_getsubexpcount(TRex* exp);
TREX_API TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch *subexp);
#endif