23 #define ENABLE_DEBUG 0
25 #define DBG(fmt,...) do { fprintf(stderr,fmt,__VA_ARGS__); } while(0)
27 #define DBG(fmt,...) do {} while(0)
35 return c==
' ' || c==
'\t' || c==
'\n' || c==
'\r';
40 return static_cast<unsigned char>(c)>=128 || (c>=
'a' && c<=
'z') || (c>=
'A' && c<=
'Z');
45 return c>=
'0' && c<=
'9';
131 static_cast<uint32_t>(c)) {}
135 static_cast<uint32_t>(v)) {}
150 uint16_t
to()
const {
return m_rep & 0xFFFF; }
181 bool matchAt(
size_t tokenPos,
const std::string &str,
Match &
match,
size_t pos,
int level)
const;
201 const char *start =
pattern.c_str();
202 const char *ps = start;
208 auto addToken = [&](
PToken tok)
211 data.emplace_back(tok);
214 auto getNextCharacter = [&]() ->
PToken
224 case 'n': result =
PToken(
'\n');
break;
225 case 'r': result =
PToken(
'\r');
break;
226 case 't': result =
PToken(
'\t');
break;
237 for (
int i=0;i<2 && (cs=(*(ps+1)));i++)
239 int d = (cs>=
'a' && cs<=
'f') ? cs-
'a'+10 :
240 (cs>=
'A' && cs<=
'F') ? cs-
'A'+10 :
241 (cs>=
'0' && cs<=
'9') ? cs-
'0' :
243 if (d>=0) { v<<=4; v|=d; ps++; }
else break;
248 case '\0': ps--;
break;
262 prevTokenPos = tokenPos;
267 prevTokenPos = tokenPos;
272 prevTokenPos = tokenPos;
276 prevTokenPos = tokenPos;
280 prevTokenPos = tokenPos;
285 prevTokenPos = tokenPos;
287 if (*ps==0) {
error=
true;
return; }
288 bool esc = *ps==
'\\';
289 PToken tok = getNextCharacter();
295 if (*ps==0) {
error=
true;
return; }
296 tok = getNextCharacter();
303 uint16_t numTokens=0;
306 if (c==
'-' && *(ps+1)!=
']' && *(ps+1)!=0)
310 PToken endTok = getNextCharacter();
334 if (*ps==0) {
error=
true;
return; }
336 tok = getNextCharacter();
338 tok.
value()==
static_cast<uint16_t
>(
']'))
342 if (*ps==0) {
error=
true;
return; }
346 data[prevTokenPos].setValue(numTokens);
353 if (prevTokenPos==-1)
358 switch (
data[prevTokenPos].kind())
370 int ddiff =
static_cast<int>(tokenPos-prevTokenPos);
376 std::copy_n(
data.begin()+prevTokenPos,ddiff,
data.begin()+tokenPos);
380 data.insert(
data.begin()+prevTokenPos,
390 prevTokenPos = tokenPos;
391 addToken(getNextCharacter());
401 void Ex::Private::dump()
403 size_t l = data.size();
405 DBG(
"==== compiled token stream for pattern '%s' ===\n",pattern.c_str());
408 DBG(
"[%s:%04x]\n",data[i].kindStr(),data[i].value());
411 uint16_t num = data[i].value();
415 if (data[i].isRange())
417 DBG(
"[%04x(%c)-%04x(%c)]\n",data[i].from(),data[i].from(),data[i].to(),data[i].to());
421 DBG(
"[%s:%04x]\n",data[i].kindStr(),data[i].value());
440 DBG(
"%d:matchAt(tokenPos=%zu, str='%s', pos=%zu)\n",level,tokenPos,str.c_str(),pos);
441 auto isStartIdChar = [](
char c) {
return isalpha(c) || c==
'_'; };
443 auto matchCharClass = [
this,isStartIdChar,
isIdChar](
size_t tp,
char c) ->
bool
447 uint16_t numFields = tok.
value();
449 for (uint16_t i=0;i<numFields;i++)
464 uint16_t v =
static_cast<uint16_t
>(c);
465 if (tok.
from()<=v && v<=tok.
to())
472 DBG(
"matchCharClass(tp=%zu,c=%c (x%02x))=%d\n",tp,c,c,negate?!found:found);
473 return negate ? !found : found;
476 enum SequenceType { Star, Optional };
477 auto processSequence = [
this,&tokenPos,&index,&str,&matchCharClass,
478 &isStartIdChar,&
isIdChar,&
match,&level,&pos](SequenceType type) ->
bool
480 size_t startIndex = index;
481 PToken tok = data[++tokenPos];
485 while (index<=str.length() && str[index]==c_tok) { index++;
if (type==Optional)
break; }
490 while (index<=str.length() && matchCharClass(tokenPos,str[index])) { index++;
if (type==Optional)
break; }
491 tokenPos+=tok.
value()+1;
495 while (index<=str.length() && isStartIdChar(str[index])) { index++;
if (type==Optional)
break; }
500 while (index<=str.length() &&
isIdChar(str[index])) { index++;
if (type==Optional)
break; }
505 while (index<=str.length() &&
isspace(str[index])) { index++;
if (type==Optional)
break; }
510 while (index<=str.length() &&
isdigit(str[index])) { index++;
if (type==Optional)
break; }
515 if (type==Optional) index++;
else index = str.length();
519 while ((
int)index>=(int)startIndex)
522 bool found = matchAt(tokenPos,str,
match,index,level+1);
533 while (tokenPos<data.size())
535 PToken tok = data[tokenPos];
540 if (index>=str.length() || str[index]!=c_tok)
return false;
545 if (index>=str.length() || !matchCharClass(tokenPos,str[index]))
return false;
546 index++,tokenPos+=tok.
value()+1;
553 if (index>=str.length() || !isStartIdChar(str[index]))
return false;
557 if (index>=str.length() || !
isIdChar(str[index]))
return false;
561 if (index>=str.length() || !
isspace(str[index]))
return false;
565 if (index>=str.length() || !
isdigit(str[index]))
return false;
569 if (index!=pos)
return false;
572 if (index<str.length())
return false;
575 DBG(
"BeginOfWord: index=%zu isIdChar(%c)=%d prev.isIdChar(%c)=%d\n",
576 index,str[index],
isIdChar(str[index]),
577 index>0?str[index]-1:0,
579 if (index>=str.length() ||
581 (index>0 &&
isIdChar(str[index-1])))
return false;
584 DBG(
"EndOfWord: index=%zu pos=%zu idIdChar(%c)=%d prev.isIsChar(%c)=%d\n",
585 index,pos,str[index],
isIdChar(str[index]),
586 index==0 ? 0 : str[index-1],
587 index==0 ? -1 :
isIdChar(str[index-1]));
588 if (index<str.length() &&
589 (
isIdChar(str[index]) || index==0 || !
isIdChar(str[index-1])))
return false;
592 DBG(
"BeginCapture(%zu)\n",index);
593 match.startCapture(index);
596 DBG(
"EndCapture(%zu)\n",index);
597 match.endCapture(index);
600 if (index>=str.length())
return false;
604 return processSequence(Star);
606 return processSequence(Optional);
613 match.setMatch(pos,index-pos);
619 std::string result=
"^";
621 const char *
p = pattern.c_str();
639 result+=
'\\'; result+=c;
679 if (
p->data.size()==0 ||
p->error)
return found;
685 found =
p->matchAt(0,str,
match,pos,0);
691 size_t index = str.find(tok.
asciiValue(),pos);
692 if (index==std::string::npos)
694 DBG(
"Ex::match(str='%s',pos=%zu)=false (no start char '%c')\n",str.c_str(),pos,tok.
asciiValue());
697 DBG(
"pos=%zu str='%s' char='%c' index=%zu\n",index,str.c_str(),tok.
asciiValue(),index);
700 while (pos<str.length())
702 found =
p->matchAt(0,str,
match,pos,0);
707 DBG(
"Ex::match(str='%s',pos=%zu)=%d\n",str.c_str(),pos,found);
713 return !
p->pattern.empty() && !
p->error;
723 bool search(
const std::string &str,
const Ex &re,
size_t pos)
734 bool match(
const std::string &str,
const Ex &re)
740 std::string
replace(
const std::string &str,
const Ex &re,
const std::string &replacement)
747 size_t i=
match.position();
748 size_t l=
match.length();
749 if (i>p) result+=str.substr(p,i-p);
753 if (p<str.length()) result+=str.substr(p);