1 /******************************************************************************
3 * Copyright (C) 1997-2020 by Dimitri van Heesch.
5 * Permission to use, copy, modify, and distribute this software and its
6 * documentation under the terms of the GNU General Public License is hereby
7 * granted. No representations are made about the suitability of this software
8 * for any purpose. It is provided "as is" without express or implied warranty.
9 * See the GNU General Public License for more details.
11 * Documents produced by Doxygen are derivative works derived from the
12 * input used in their production; they are not affected by this license.
15 /******************************************************************************
16 * Minimal flex based parser for XML
17 ******************************************************************************/
19 %option never-interactive
20 %option prefix="xmlYY"
22 %option extra-type="struct xmlYY_state *"
34 //#include "message.h"
36 #define YY_NEVER_INTERACTIVE 1
38 #define YY_NO_UNISTD_H 1
44 const char * inputString = 0; //!< the code fragment as text
45 yy_size_t inputPosition = 0; //!< read offset during parsing
48 bool selfClose = false;
50 std::string attrValue;
52 XMLHandlers::Attributes attrs;
57 std::vector<std::string> xpath;
61 static const char *stateToString(int state);
64 static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size);
65 static void initElement(yyscan_t yyscanner);
66 static void addCharacters(yyscan_t yyscanner);
67 static void addElement(yyscan_t yyscanner);
68 static void addAttribute(yyscan_t yyscanner);
69 static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len);
70 static void reportError(yyscan_t yyscanner, const std::string &msg);
71 static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len);
74 #define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
83 CLOSESPECIAL "?>"{NL}?
84 NAMESTART [:A-Za-z\200-\377_]
85 NAMECHAR [:A-Za-z\200-\377_0-9.-]
86 NAME {NAMESTART}{NAMECHAR}*
87 ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
91 COMMENTEND "--"{CLOSE}
92 STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
93 DOCTYPE {SP}?"<!DOCTYPE"{SP}
94 CDATA {SP}?"<![CDATA["
112 {SP} { countLines(yyscanner,yytext,yyleng); }
113 {DOCTYPE} { countLines(yyscanner,yytext,yyleng); }
114 {OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
115 {OPEN} { countLines(yyscanner,yytext,yyleng);
116 initElement(yyscanner);
118 {COMMENT} { yyextra->commentContext = YY_START;
123 {CDATA} { countLines(yyscanner,yytext,yyleng);
124 yyextra->cdataContext = YY_START;
127 {PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); }
128 {OPEN} { countLines(yyscanner,yytext,yyleng);
129 addCharacters(yyscanner);
130 initElement(yyscanner);
133 {COMMENT} { yyextra->commentContext = YY_START;
134 countLines(yyscanner,yytext,yyleng);
139 "/" { yyextra->isEnd = true; }
140 {NAME} { yyextra->name = yytext;
142 {CLOSE} { addElement(yyscanner);
143 countLines(yyscanner,yytext,yyleng);
147 {SP} { countLines(yyscanner,yytext,yyleng); }
150 "/" { yyextra->selfClose = true; }
151 {NAME} { yyextra->attrName = yytext; }
152 "=" { BEGIN(AttributeValue); }
153 {CLOSE} { addElement(yyscanner);
154 countLines(yyscanner,yytext,yyleng);
158 {SP} { countLines(yyscanner,yytext,yyleng); }
161 {SP} { countLines(yyscanner,yytext,yyleng); }
162 ['"] { yyextra->stringChar = *yytext;
163 yyextra->attrValue = "";
166 . { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found";
167 reportError(yyscanner,msg);
173 [^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); }
174 ['"] { if (*yytext==yyextra->stringChar)
176 addAttribute(yyscanner);
181 yyextra->attrValue += processData(yyscanner,yytext,yyleng);
184 \n { yyextra->lineNr++; yyextra->attrValue+=' '; }
187 {ENDCDATA} { BEGIN(yyextra->cdataContext); }
188 [^]\n]+ { yyextra->data += yytext; }
189 \n { yyextra->data += yytext;
192 . { yyextra->data += yytext; }
195 {CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng);
199 \n { yyextra->lineNr++; }
203 {COMMENTEND} { countLines(yyscanner,yytext,yyleng);
204 BEGIN(yyextra->commentContext);
207 \n { yyextra->lineNr++; }
210 \n { yyextra->lineNr++; }
211 . { std::string msg = "Unexpected character `";
214 reportError(yyscanner,msg);
219 //----------------------------------------------------------------------------------------
221 static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size)
223 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
224 yy_size_t inputPosition = yyextra->inputPosition;
225 const char *s = yyextra->inputString + inputPosition;
227 while( c < max_size && *s)
232 yyextra->inputPosition += c;
236 static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len)
238 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
239 for (yy_size_t i=0;i<len;i++)
241 if (txt[i]=='\n') yyextra->lineNr++;
245 static void initElement(yyscan_t yyscanner)
247 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
248 yyextra->isEnd = false; // true => </tag>
249 yyextra->selfClose = false; // true => <tag/>
251 yyextra->attrs.clear();
254 static void checkAndUpdatePath(yyscan_t yyscanner)
256 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
257 if (yyextra->xpath.empty())
259 std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag";
260 reportError(yyscanner,msg);
264 std::string expectedTagName = yyextra->xpath.back();
265 if (expectedTagName!=yyextra->name)
267 std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level";
268 reportError(yyscanner,msg);
270 else // matching end tag
272 yyextra->xpath.pop_back();
277 static void addElement(yyscan_t yyscanner)
279 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
282 yyextra->xpath.push_back(yyextra->name);
283 if (yyextra->handlers.startElement)
285 yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
289 fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
290 for (auto attr : yyextra->attrs)
292 fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str());
294 fprintf(stderr,"])\n");
297 if (yyextra->isEnd || yyextra->selfClose)
301 fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
303 checkAndUpdatePath(yyscanner);
304 if (yyextra->handlers.endElement)
306 yyextra->handlers.endElement(yyextra->name);
311 static std::string trimSpaces(const std::string &str)
313 const int l = static_cast<int>(str.length());
315 while (s<l && isspace(str.at(s))) s++;
316 while (e>s && isspace(str.at(e))) e--;
317 return str.substr(s,1+e-s);
320 static void addCharacters(yyscan_t yyscanner)
322 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
323 std::string data = trimSpaces(yyextra->data);
324 if (yyextra->handlers.characters)
326 yyextra->handlers.characters(data);
332 fprintf(stderr,"characters(%s)\n",data.c_str());
337 static void addAttribute(yyscan_t yyscanner)
339 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
340 yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue));
343 static void reportError(yyscan_t yyscanner,const std::string &msg)
345 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
348 fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str());
350 if (yyextra->handlers.error)
352 yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg);
356 static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" };
357 static const char entities_dec[] = { '&', '"', '>', '<', '\'' };
358 static const int num_entities = 5;
360 // replace character entities such as & in txt and return the string where entities
362 static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len)
366 for (yy_size_t i=0; i<len; i++)
371 const int maxEntityLen = 10;
372 char entity[maxEntityLen+1];
373 entity[maxEntityLen]='\0';
374 for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
378 entity[j]=txt[i+j+1];
387 for (int e=0; !found && e<num_entities; e++)
389 if (strcmp(entity,entities_enc[e])==0)
391 result+=entities_dec[e];
392 i+=strlen(entities_enc[e])+1;
398 std::string msg = std::string("Invalid character entity '&") + entity + ";' found\n";
399 reportError(yyscanner,msg);
410 //--------------------------------------------------------------
412 struct XMLParser::Private
415 struct xmlYY_state xmlYY_extra;
418 XMLParser::XMLParser(const XMLHandlers &handlers) : p(new Private)
420 xmlYYlex_init_extra(&p->xmlYY_extra,&p->yyscanner);
421 p->xmlYY_extra.handlers = handlers;
424 XMLParser::~XMLParser()
426 xmlYYlex_destroy(p->yyscanner);
429 void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabled)
431 yyscan_t yyscanner = p->yyscanner;
432 struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
435 xmlYYset_debug(1,p->yyscanner);
438 if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input
441 const char *enter_txt = 0;
442 const char *finished_txt = 0;
443 const char *pre_txt = 0;
444 if (yy_flex_debug) { output=stderr; pre_txt="--"; enter_txt="entering"; finished_txt="finished"; }
445 else if (debugEnabled) { output=stdout; pre_txt=""; enter_txt="Entering"; finished_txt="Finished"; }
449 fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,enter_txt, __FILE__, fileName);
453 yyextra->fileName = fileName;
455 yyextra->inputString = inputStr;
456 yyextra->inputPosition = 0;
458 xmlYYrestart( 0, yyscanner );
460 if (yyextra->handlers.startDocument)
462 yyextra->handlers.startDocument();
465 if (yyextra->handlers.endDocument)
467 yyextra->handlers.endDocument();
470 if (!yyextra->xpath.empty())
472 std::string tagName = yyextra->xpath.back();
473 std::string msg = "End of file reached while expecting closing tag '"+tagName+"'";
474 reportError(yyscanner,msg);
479 fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,finished_txt, __FILE__, fileName);
483 int XMLParser::lineNr() const
485 struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
486 return yyextra->lineNr;
489 std::string XMLParser::fileName() const
491 struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
492 return yyextra->fileName;