"""Convert straight quotation marks to typographic ones"""importrefromtypingimportAny,Dict,Listfrom.state_coreimportStateCorefrom..common.utilsimportcharCodeAtfrom..common.utilsimportisWhiteSpace,isPunctChar,isMdAsciiPunctfrom..tokenimportTokenQUOTE_TEST_RE=re.compile(r"['\"]")QUOTE_RE=re.compile(r"['\"]")APOSTROPHE="\u2019"# ’
[文档]defreplaceAt(string:str,index:int,ch:str)->str:# When the index is negative, the behavior is different from the js version.# But basically, the index will not be negative.assertindex>=0returnstring[:index]+ch+string[index+1:]
[文档]defprocess_inlines(tokens:List[Token],state:StateCore)->None:stack:List[Dict[str,Any]]=[]foriinrange(len(tokens)):token=tokens[i]thisLevel=token.levelj=0forjinrange(len(stack))[::-1]:ifstack[j]["level"]<=thisLevel:breakelse:# When the loop is terminated without a "break".# Subtract 1 to get the same index as the js version.j-=1stack=stack[:j+1]iftoken.type!="text":continuetext=token.contentpos=0maximum=len(text)whilepos<maximum:goto_outer=FalselastIndex=post=QUOTE_RE.search(text[lastIndex:])ifnott:breakcanOpen=canClose=Truepos=t.start(0)+lastIndex+1isSingle=t.group(0)=="'"# Find previous character,# default to space if it's the beginning of the linelastChar=0x20ift.start(0)+lastIndex-1>=0:lastChar=charCodeAt(text,t.start(0)+lastIndex-1)else:forjinrange(i)[::-1]:# lastChar defaults to 0x20iftokens[j].type=="softbreak"ortokens[j].type=="hardbreak":break# should skip all tokens except 'text', 'html_inline' or 'code_inline'ifnottokens[j].content:continuelastChar=charCodeAt(tokens[j].content,len(tokens[j].content)-1)break# Find next character,# default to space if it's the end of the linenextChar=0x20ifpos<maximum:nextChar=charCodeAt(text,pos)else:forjinrange(i+1,len(tokens)):# nextChar defaults to 0x20iftokens[j].type=="softbreak"ortokens[j].type=="hardbreak":break# should skip all tokens except 'text', 'html_inline' or 'code_inline'ifnottokens[j].content:continuenextChar=charCodeAt(tokens[j].content,0)breakisLastPunctChar=isMdAsciiPunct(lastChar)orisPunctChar(chr(lastChar))isNextPunctChar=isMdAsciiPunct(nextChar)orisPunctChar(chr(nextChar))isLastWhiteSpace=isWhiteSpace(lastChar)isNextWhiteSpace=isWhiteSpace(nextChar)ifisNextWhiteSpace:canOpen=FalseelifisNextPunctChar:ifnot(isLastWhiteSpaceorisLastPunctChar):canOpen=FalseifisLastWhiteSpace:canClose=FalseelifisLastPunctChar:ifnot(isNextWhiteSpaceorisNextPunctChar):canClose=FalseifnextChar==0x22andt.group(0)=='"':# 0x22: "iflastChar>=0x30andlastChar<=0x39:# 0x30: 0, 0x39: 9# special case: 1"" - count first quote as an inchcanClose=canOpen=FalseifcanOpenandcanClose:# Replace quotes in the middle of punctuation sequence, but not# in the middle of the words, i.e.:## 1. foo " bar " baz - not replaced# 2. foo-"-bar-"-baz - replaced# 3. foo"bar"baz - not replacedcanOpen=isLastPunctCharcanClose=isNextPunctCharifnotcanOpenandnotcanClose:# middle of wordifisSingle:token.content=replaceAt(token.content,t.start(0)+lastIndex,APOSTROPHE)continueifcanClose:# this could be a closing quote, rewind the stack to get a matchforjinrange(len(stack))[::-1]:item=stack[j]ifstack[j]["level"]<thisLevel:breakifitem["single"]==isSingleandstack[j]["level"]==thisLevel:item=stack[j]ifisSingle:openQuote=state.md.options.quotes[2]closeQuote=state.md.options.quotes[3]else:openQuote=state.md.options.quotes[0]closeQuote=state.md.options.quotes[1]# replace token.content *before* tokens[item.token].content,# because, if they are pointing at the same token, replaceAt# could mess up indices when quote length != 1token.content=replaceAt(token.content,t.start(0)+lastIndex,closeQuote)tokens[item["token"]].content=replaceAt(tokens[item["token"]].content,item["pos"],openQuote)pos+=len(closeQuote)-1ifitem["token"]==i:pos+=len(openQuote)-1text=token.contentmaximum=len(text)stack=stack[:j]goto_outer=Truebreakifgoto_outer:goto_outer=FalsecontinueifcanOpen:stack.append({"token":i,"pos":t.start(0)+lastIndex,"single":isSingle,"level":thisLevel,})elifcanCloseandisSingle:token.content=replaceAt(token.content,t.start(0)+lastIndex,APOSTROPHE)