Doxygen
xml.l
浏览该文件的文档.
1 /******************************************************************************
2  *
3  * Copyright (C) 1997-2020 by Dimitri van Heesch.
4  *
5  * Permission to use, copy, modify, and distribute this software and its
6  * documentation under the terms of the GNU General Public License is hereby
7  * granted. No representations are made about the suitability of this software
8  * for any purpose. It is provided "as is" without express or implied warranty.
9  * See the GNU General Public License for more details.
10  *
11  * Documents produced by Doxygen are derivative works derived from the
12  * input used in their production; they are not affected by this license.
13  *
14  */
15 /******************************************************************************
16  * Minimal flex based parser for XML
17  ******************************************************************************/
18 
19 %option never-interactive
20 %option prefix="xmlYY"
21 %option reentrant
22 %option extra-type="struct xmlYY_state *"
23 %option 8bit noyywrap
24 %top{
25 #include <stdint.h>
26 }
27 
28 %{
29 
30 #include <ctype.h>
31 #include <vector>
32 #include <stdio.h>
33 #include "xml.h"
34 //#include "message.h"
35 
36 #define YY_NEVER_INTERACTIVE 1
37 #define YY_NO_INPUT 1
38 #define YY_NO_UNISTD_H 1
39 
40 struct xmlYY_state
41 {
42  std::string fileName;
43  int lineNr = 1;
44  const char * inputString = 0; //!< the code fragment as text
45  yy_size_t inputPosition = 0; //!< read offset during parsing
46  std::string name;
47  bool isEnd = false;
48  bool selfClose = false;
49  std::string data;
50  std::string attrValue;
51  std::string attrName;
52  XMLHandlers::Attributes attrs;
53  XMLHandlers handlers;
54  int cdataContext;
55  int commentContext;
56  char stringChar;
57  std::vector<std::string> xpath;
58 };
59 
60 #if USE_STATE2STRING
61 static const char *stateToString(int state);
62 #endif
63 
64 static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size);
65 static void initElement(yyscan_t yyscanner);
66 static void addCharacters(yyscan_t yyscanner);
67 static void addElement(yyscan_t yyscanner);
68 static void addAttribute(yyscan_t yyscanner);
69 static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len);
70 static void reportError(yyscan_t yyscanner, const std::string &msg);
71 static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len);
72 
73 #undef YY_INPUT
74 #define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
75 
76 %}
77 
78 NL (\r\n|\r|\n)
79 SP [ \t\r\n]+
80 OPEN {SP}?"<"
81 OPENSPECIAL {SP}?"<?"
82 CLOSE ">"{NL}?
83 CLOSESPECIAL "?>"{NL}?
84 NAMESTART [:A-Za-z\200-\377_]
85 NAMECHAR [:A-Za-z\200-\377_0-9.-]
86 NAME {NAMESTART}{NAMECHAR}*
87 ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
88 COLON ":"
89 PCDATA [^<]+
90 COMMENT {OPEN}"!--"
91 COMMENTEND "--"{CLOSE}
92 STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
93 DOCTYPE {SP}?"<!DOCTYPE"{SP}
94 CDATA {SP}?"<![CDATA["
95 ENDCDATA "]]>"
96 
97 %option noyywrap
98 
99 %s Initial
100 %s Content
101 %s CDataSection
102 %s Element
103 %s Attributes
104 %s AttributeValue
105 %s AttrValueStr
106 %s Prolog
107 %s Comment
108 
109 %%
110 
111 <Initial>{
112  {SP} { countLines(yyscanner,yytext,yyleng); }
113  {DOCTYPE} { countLines(yyscanner,yytext,yyleng); }
114  {OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
115  {OPEN} { countLines(yyscanner,yytext,yyleng);
116  initElement(yyscanner);
117  BEGIN(Element); }
118  {COMMENT} { yyextra->commentContext = YY_START;
119  BEGIN(Comment);
120  }
121 }
122 <Content>{
123  {CDATA} { countLines(yyscanner,yytext,yyleng);
124  yyextra->cdataContext = YY_START;
125  BEGIN(CDataSection);
126  }
127  {PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); }
128  {OPEN} { countLines(yyscanner,yytext,yyleng);
129  addCharacters(yyscanner);
130  initElement(yyscanner);
131  BEGIN(Element);
132  }
133  {COMMENT} { yyextra->commentContext = YY_START;
134  countLines(yyscanner,yytext,yyleng);
135  BEGIN(Comment);
136  }
137 }
138 <Element>{
139  "/" { yyextra->isEnd = true; }
140  {NAME} { yyextra->name = yytext;
141  BEGIN(Attributes); }
142  {CLOSE} { addElement(yyscanner);
143  countLines(yyscanner,yytext,yyleng);
144  yyextra->data = "";
145  BEGIN(Content);
146  }
147  {SP} { countLines(yyscanner,yytext,yyleng); }
148 }
149 <Attributes>{
150  "/" { yyextra->selfClose = true; }
151  {NAME} { yyextra->attrName = yytext; }
152  "=" { BEGIN(AttributeValue); }
153  {CLOSE} { addElement(yyscanner);
154  countLines(yyscanner,yytext,yyleng);
155  yyextra->data = "";
156  BEGIN(Content);
157  }
158  {SP} { countLines(yyscanner,yytext,yyleng); }
159 }
160 <AttributeValue>{
161  {SP} { countLines(yyscanner,yytext,yyleng); }
162  ['"] { yyextra->stringChar = *yytext;
163  yyextra->attrValue = "";
164  BEGIN(AttrValueStr);
165  }
166  . { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found";
167  reportError(yyscanner,msg);
168  unput(*yytext);
169  BEGIN(Attributes);
170  }
171 }
172 <AttrValueStr>{
173  [^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); }
174  ['"] { if (*yytext==yyextra->stringChar)
175  {
176  addAttribute(yyscanner);
177  BEGIN(Attributes);
178  }
179  else
180  {
181  yyextra->attrValue += processData(yyscanner,yytext,yyleng);
182  }
183  }
184  \n { yyextra->lineNr++; yyextra->attrValue+=' '; }
185 }
186 <CDataSection>{
187  {ENDCDATA} { BEGIN(yyextra->cdataContext); }
188  [^]\n]+ { yyextra->data += yytext; }
189  \n { yyextra->data += yytext;
190  yyextra->lineNr++;
191  }
192  . { yyextra->data += yytext; }
193 }
194 <Prolog>{
195  {CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng);
196  BEGIN(Initial);
197  }
198  [^?\n]+ { }
199  \n { yyextra->lineNr++; }
200  . { }
201 }
202 <Comment>{
203  {COMMENTEND} { countLines(yyscanner,yytext,yyleng);
204  BEGIN(yyextra->commentContext);
205  }
206  [^\n-]+ { }
207  \n { yyextra->lineNr++; }
208  . { }
209 }
210 \n { yyextra->lineNr++; }
211 . { std::string msg = "Unexpected character `";
212  msg+=yytext;
213  msg+="` found";
214  reportError(yyscanner,msg);
215  }
216 
217 %%
218 
219 //----------------------------------------------------------------------------------------
220 
221 static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size)
222 {
223  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
224  yy_size_t inputPosition = yyextra->inputPosition;
225  const char *s = yyextra->inputString + inputPosition;
226  yy_size_t c=0;
227  while( c < max_size && *s)
228  {
229  *buf++ = *s++;
230  c++;
231  }
232  yyextra->inputPosition += c;
233  return c;
234 }
235 
236 static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len)
237 {
238  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
239  for (yy_size_t i=0;i<len;i++)
240  {
241  if (txt[i]=='\n') yyextra->lineNr++;
242  }
243 }
244 
245 static void initElement(yyscan_t yyscanner)
246 {
247  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
248  yyextra->isEnd = false; // true => </tag>
249  yyextra->selfClose = false; // true => <tag/>
250  yyextra->name = "";
251  yyextra->attrs.clear();
252 }
253 
254 static void checkAndUpdatePath(yyscan_t yyscanner)
255 {
256  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
257  if (yyextra->xpath.empty())
258  {
259  std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag";
260  reportError(yyscanner,msg);
261  }
262  else
263  {
264  std::string expectedTagName = yyextra->xpath.back();
265  if (expectedTagName!=yyextra->name)
266  {
267  std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level";
268  reportError(yyscanner,msg);
269  }
270  else // matching end tag
271  {
272  yyextra->xpath.pop_back();
273  }
274  }
275 }
276 
277 static void addElement(yyscan_t yyscanner)
278 {
279  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
280  if (!yyextra->isEnd)
281  {
282  yyextra->xpath.push_back(yyextra->name);
283  if (yyextra->handlers.startElement)
284  {
285  yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
286  }
287  if (yy_flex_debug)
288  {
289  fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
290  for (auto attr : yyextra->attrs)
291  {
292  fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str());
293  }
294  fprintf(stderr,"])\n");
295  }
296  }
297  if (yyextra->isEnd || yyextra->selfClose)
298  {
299  if (yy_flex_debug)
300  {
301  fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
302  }
303  checkAndUpdatePath(yyscanner);
304  if (yyextra->handlers.endElement)
305  {
306  yyextra->handlers.endElement(yyextra->name);
307  }
308  }
309 }
310 
311 static std::string trimSpaces(const std::string &str)
312 {
313  const int l = static_cast<int>(str.length());
314  int s=0, e=l-1;
315  while (s<l && isspace(str.at(s))) s++;
316  while (e>s && isspace(str.at(e))) e--;
317  return str.substr(s,1+e-s);
318 }
319 
320 static void addCharacters(yyscan_t yyscanner)
321 {
322  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
323  std::string data = trimSpaces(yyextra->data);
324  if (yyextra->handlers.characters)
325  {
326  yyextra->handlers.characters(data);
327  }
328  if (!data.empty())
329  {
330  if (yy_flex_debug)
331  {
332  fprintf(stderr,"characters(%s)\n",data.c_str());
333  }
334  }
335 }
336 
337 static void addAttribute(yyscan_t yyscanner)
338 {
339  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
340  yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue));
341 }
342 
343 static void reportError(yyscan_t yyscanner,const std::string &msg)
344 {
345  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
346  if (yy_flex_debug)
347  {
348  fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str());
349  }
350  if (yyextra->handlers.error)
351  {
352  yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg);
353  }
354 }
355 
356 static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" };
357 static const char entities_dec[] = { '&', '"', '>', '<', '\'' };
358 static const int num_entities = 5;
359 
360 // replace character entities such as &amp; in txt and return the string where entities
361 // are replaced
362 static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len)
363 {
364  std::string result;
365  result.reserve(len);
366  for (yy_size_t i=0; i<len; i++)
367  {
368  char c = txt[i];
369  if (c=='&')
370  {
371  const int maxEntityLen = 10;
372  char entity[maxEntityLen+1];
373  entity[maxEntityLen]='\0';
374  for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
375  {
376  if (txt[i+j+1]!=';')
377  {
378  entity[j]=txt[i+j+1];
379  }
380  else
381  {
382  entity[j]=0;
383  break;
384  }
385  }
386  bool found=false;
387  for (int e=0; !found && e<num_entities; e++)
388  {
389  if (strcmp(entity,entities_enc[e])==0)
390  {
391  result+=entities_dec[e];
392  i+=strlen(entities_enc[e])+1;
393  found=true;
394  }
395  }
396  if (!found)
397  {
398  std::string msg = std::string("Invalid character entity '&") + entity + ";' found\n";
399  reportError(yyscanner,msg);
400  }
401  }
402  else
403  {
404  result+=c;
405  }
406  }
407  return result;
408 }
409 
410 //--------------------------------------------------------------
411 
412 struct XMLParser::Private
413 {
414  yyscan_t yyscanner;
415  struct xmlYY_state xmlYY_extra;
416 };
417 
418 XMLParser::XMLParser(const XMLHandlers &handlers) : p(new Private)
419 {
420  xmlYYlex_init_extra(&p->xmlYY_extra,&p->yyscanner);
421  p->xmlYY_extra.handlers = handlers;
422 }
423 
424 XMLParser::~XMLParser()
425 {
426  xmlYYlex_destroy(p->yyscanner);
427 }
428 
429 void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabled)
430 {
431  yyscan_t yyscanner = p->yyscanner;
432  struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
433 
434 #ifdef FLEX_DEBUG
435  xmlYYset_debug(1,p->yyscanner);
436 #endif
437 
438  if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input
439 
440  FILE *output = 0;
441  const char *enter_txt = 0;
442  const char *finished_txt = 0;
443  const char *pre_txt = 0;
444  if (yy_flex_debug) { output=stderr; pre_txt="--"; enter_txt="entering"; finished_txt="finished"; }
445  else if (debugEnabled) { output=stdout; pre_txt=""; enter_txt="Entering"; finished_txt="Finished"; }
446 
447  if (output)
448  {
449  fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,enter_txt, __FILE__, fileName);
450  }
451 
452  BEGIN(Initial);
453  yyextra->fileName = fileName;
454  yyextra->lineNr = 1;
455  yyextra->inputString = inputStr;
456  yyextra->inputPosition = 0;
457 
458  xmlYYrestart( 0, yyscanner );
459 
460  if (yyextra->handlers.startDocument)
461  {
462  yyextra->handlers.startDocument();
463  }
464  xmlYYlex(yyscanner);
465  if (yyextra->handlers.endDocument)
466  {
467  yyextra->handlers.endDocument();
468  }
469 
470  if (!yyextra->xpath.empty())
471  {
472  std::string tagName = yyextra->xpath.back();
473  std::string msg = "End of file reached while expecting closing tag '"+tagName+"'";
474  reportError(yyscanner,msg);
475  }
476 
477  if (output)
478  {
479  fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,finished_txt, __FILE__, fileName);
480  }
481 }
482 
483 int XMLParser::lineNr() const
484 {
485  struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
486  return yyextra->lineNr;
487 }
488 
489 std::string XMLParser::fileName() const
490 {
491  struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
492  return yyextra->fileName;
493 }
494 
495 #if USE_STATE2STRING
496 #include "xml.l.h"
497 #endif