"""Convert HTML to docutils nodes."""from__future__importannotationsimportrefromtypingimportTYPE_CHECKINGfromdocutilsimportnodesfrommyst_parser.parsers.parse_htmlimportData,tokenize_htmlfrommyst_parser.warnings_importMystWarningsifTYPE_CHECKING:from.baseimportDocutilsRenderer
OPTION_KEYS_IMAGE={"class","alt","height","width","align","name"}# note: docutils also has scale and targetOPTION_KEYS_ADMONITION={"class","name"}# See https://github.com/micromark/micromark-extension-gfm-tagfilterRE_FLOW=re.compile(r"<(\/?)(iframe|noembed|noframes|plaintext|script|style|title|textarea|xmp)(?=[\t\n\f\r />])",re.IGNORECASE,)
[文档]defhtml_to_nodes(text:str,line_number:int,renderer:DocutilsRenderer)->list[nodes.Element]:"""Convert HTML to docutils nodes."""ifrenderer.md_config.gfm_only:text,_=RE_FLOW.subn(lambdas:s.group(0).replace("<","<"),text)enable_html_img="html_image"inrenderer.md_config.enable_extensionsenable_html_admonition="html_admonition"inrenderer.md_config.enable_extensionsifnot(enable_html_imgorenable_html_admonition):returndefault_html(text,renderer.document["source"],line_number)# parse the HTML to ASTtry:root=tokenize_html(text).strip(inplace=True,recurse=False)exceptException:msg_node=renderer.create_warning("HTML could not be parsed",MystWarnings.HTML_PARSE,line=line_number)return([msg_node]ifmsg_nodeelse[])+default_html(text,renderer.document["source"],line_number)iflen(root)<1:# if emptyreturndefault_html(text,renderer.document["source"],line_number)ifnotall((enable_html_imgandchild.name=="img")or(enable_html_admonitionandchild.name=="div"and"admonition"inchild.attrs.classes)forchildinroot):returndefault_html(text,renderer.document["source"],line_number)nodes_list=[]forchildinroot:ifchild.name=="img":if"src"notinchild.attrs:return[renderer.reporter.error("<img> missing 'src' attribute",line=line_number)]content="\n".join(f":{k}: {v}"fork,vinsorted(child.attrs.items())ifkinOPTION_KEYS_IMAGE)nodes_list.extend(renderer.run_directive("image",child.attrs["src"],content,line_number))else:children=child.strip().childrentitle=("".join(child.render()forchildinchildren.pop(0))ifchildrenandchildren[0].namein("div","p")and("title"inchildren[0].attrs.classesor"admonition-title"inchildren[0].attrs.classes)else"Note")options="\n".join(f":{k}: {v}"fork,vinsorted(child.attrs.items())ifkinOPTION_KEYS_ADMONITION).rstrip()new_children=[]forchildinchildren:ifchild.name=="p":new_children.extend(child.children)new_children.append(Data("\n\n"))else:new_children.append(child)content=(options+("\n\n"ifoptionselse"")+"".join(child.render()forchildinnew_children).lstrip())nodes_list.extend(renderer.run_directive("admonition",title,content,line_number))returnnodes_list