#include <cstdint>
#include <sstream>
#include "utf8.h"
#include "caseconvert.h"
#include "textstream.h"

utf8.cpp 的引用(Include)关系图:

函数
uint8_t	getUTF8CharNumBytes (char c)
	Returns the number of bytes making up a single UTF8 character given the first byte in the sequence. 更多...

static uint32_t	decode_utf8 (const char *data, int numBytes) noexcept
	Decodes a given input of utf8 data to a unicode code point given the number of bytes it's made of 更多...

static uint32_t	convertUTF8CharToUnicode (const char *s, size_t bytesLeft, int &len)

std::string	getUTF8CharAt (const std::string &input, size_t pos)
	Returns the UTF8 character found at byte position pos in the input string. 更多...

uint32_t	getUnicodeForUTF8CharAt (const std::string &input, size_t pos)
	Returns the 32bit Unicode value matching character at byte position pos in the UTF8 encoded input. 更多...

static char	asciiToLower (uint32_t code)

static char	asciiToUpper (uint32_t code)

static std::string	caseConvert (const std::string &input, char(asciiConversionFunc)(uint32_t code), const char (*conversionFunc)(uint32_t code))

std::string	convertUTF8ToLower (const std::string &input)
	Converts the input string into a lower case version, also taking into account non-ASCII characters that has a lower case variant. 更多...

std::string	convertUTF8ToUpper (const std::string &input)
	Converts the input string into a upper case version, also taking into account non-ASCII characters that has a upper case variant. 更多...

const char *	writeUTF8Char (TextStream &t, const char *s)
	Writes the UTF8 character pointed to by s to stream t and returns a pointer to the next character. 更多...

bool	lastUTF8CharIsMultibyte (const std::string &input)
	Returns true iff the last character in input is a multibyte character. 更多...

bool	isUTF8CharUpperCase (const std::string &input, size_t pos)
	Returns true iff the input string at byte position pos holds an upper case character. 更多...

int	isUTF8NonBreakableSpace (const char *input)
	Check if the first character pointed at by input is a non-breakable whitespace character. 更多...

函数说明

◆ asciiToLower()

static char asciiToLower ( uint32_t code )

inlinestatic

在文件 utf8.cpp 第 142 行定义.

 {
   return code>='A' && code<='Z' ? (char)(code+'a'-'A') : (char)code;
 }

被这些函数引用 convertUTF8ToLower().

◆ asciiToUpper()

static char asciiToUpper ( uint32_t code )

inlinestatic

在文件 utf8.cpp 第 147 行定义.

 {
   return code>='a' && code<='z' ? (char)(code+'A'-'a') : (char)code;
 }

被这些函数引用 convertUTF8ToUpper().

◆ caseConvert()

static std::string caseConvert	(	const std::string &	input,
		char(*)(uint32_t code)	asciiConversionFunc,
		const char ()(uint32_t code)	conversionFunc
	)

inlinestatic

在文件 utf8.cpp 第 152 行定义.

 {
   uint32_t code;
   std::string result;
   result.reserve(input.length()); // assume all ASCII characters
   int len;
   size_t bytesLeft = input.length();
   const char *p = input.c_str();
   while ((code=convertUTF8CharToUnicode(p,bytesLeft,len)))
   {
     if (code<128) // ASCII case
     {
       char c = asciiConversionFunc(code);
       result+=c;
     }
     else // generic case
     {
       const char *conv = conversionFunc(code);
       if (conv==nullptr) // no difference between lower and upper case
       {
         result.append(p,len);
       }
       else // replace the input character with the conversion result
       {
         result.append(conv);
       }
     }
     p+=len;
     bytesLeft-=len;
   }
   return result;
 }

引用了 convertUTF8CharToUnicode().

被这些函数引用 convertUTF8ToLower() , 以及 convertUTF8ToUpper().

◆ convertUTF8CharToUnicode()

static uint32_t convertUTF8CharToUnicode	(	const char *	s,
		size_t	bytesLeft,
		int &	len
	)

inlinestatic

在文件 utf8.cpp 第 69 行定义.

 {
   if (s==0 || bytesLeft==0)
   {
     len=0;
     return 0;
   }
   unsigned char uc = static_cast<unsigned char>(*s);
   if (uc<128) // ASCII case
   {
     len=1;
     return uc;
   }
   switch (bytesLeft)
   {
     default:
       if ((uc&0xFEu)==0xFCu)// 1111110X six bytes
       {
         len=6;
         return decode_utf8(s,len);
       }
       // fall through
     case 5:
       if ((uc&0xFCu)==0xF8u) // 111110XX five bytes
       {
         len=5;
         return decode_utf8(s,len);
       }
       // fall through
     case 4:
       if ((uc&0xF8u)==0xF0u) // 11110XXX four bytes
       {
         len=4;
         return decode_utf8(s,len);
       }
       // fall through
     case 3:
       if ((uc&0xF0u)==0xE0u) // 1110XXXX three bytes
       {
         len=3;
         return decode_utf8(s,len);
       }
       // fall through
     case 2:
       if ((uc&0xE0u)==0xC0u) // 110XXXXX two bytes
       {
         len=2;
         return decode_utf8(s,len);
       }
       // fall through
     case 1:
       {
         len=1;
         return uc;
       }
   }
 }

引用了 decode_utf8().

被这些函数引用 caseConvert(), getUnicodeForUTF8CharAt() , 以及 isUTF8CharUpperCase().

◆ convertUTF8ToLower()

std::string convertUTF8ToLower ( const std::string & input )

Converts the input string into a lower case version, also taking into account non-ASCII characters that has a lower case variant.

在文件 utf8.cpp 第 187 行定义.

 {
   return caseConvert(input,asciiToLower,convertUnicodeToLower);
 }

引用了 asciiToLower(), caseConvert() , 以及 convertUnicodeToLower().

被这些函数引用 addClassMemberNameToIndex(), addFileMemberNameToIndex(), addMemberToSearchIndex(), addNamespaceMemberNameToIndex(), createJavaScriptSearchIndex(), QCString::lower() , 以及 searchId().

◆ convertUTF8ToUpper()

std::string convertUTF8ToUpper ( const std::string & input )

Converts the input string into a upper case version, also taking into account non-ASCII characters that has a upper case variant.

在文件 utf8.cpp 第 192 行定义.

 {
   return caseConvert(input,asciiToUpper,convertUnicodeToUpper);
 }

引用了 asciiToUpper(), caseConvert() , 以及 convertUnicodeToUpper().

被这些函数引用 FilterAlphaIndex::determineSortKey(), QCString::upper() , 以及 writeAlphabeticalClassList().

◆ decode_utf8()

static uint32_t decode_utf8	(	const char *	data,
		int	numBytes
	)

inlinestaticnoexcept

Decodes a given input of utf8 data to a unicode code point given the number of bytes it's made of

在文件 utf8.cpp 第 55 行定义.

 {
   uint32_t cp = (unsigned char)*data;
   if (numBytes>1)
   {
     cp &= 0x7F >> numBytes; // Mask out the header bits
     for (int i=1 ; i<numBytes ; i++)
     {
       cp = (cp<<6) | ((unsigned char)data[i]&0x3F);
     }
   }
   return cp;
 }

被这些函数引用 convertUTF8CharToUnicode().

◆ getUnicodeForUTF8CharAt()

uint32_t getUnicodeForUTF8CharAt	(	const std::string &	input,
		size_t	pos
	)

Returns the 32bit Unicode value matching character at byte position pos in the UTF8 encoded input.

在文件 utf8.cpp 第 135 行定义.

 {
   std::string charS = getUTF8CharAt(input,pos);
   int len;
   return convertUTF8CharToUnicode(charS.c_str(),charS.length(),len);
 }

引用了 convertUTF8CharToUnicode() , 以及 getUTF8CharAt().

◆ getUTF8CharAt()

std::string getUTF8CharAt	(	const std::string &	input,
		size_t	pos
	)

Returns the UTF8 character found at byte position pos in the input string.

The resulting string can be a multi byte sequence.

在文件 utf8.cpp 第 127 行定义.

 {
   if (input.length()<=pos) return std::string();
   int numBytes=getUTF8CharNumBytes(input[pos]);
   if (input.length()<pos+numBytes) return std::string();
   return input.substr(pos,pos+numBytes);
 }

引用了 getUTF8CharNumBytes().

被这些函数引用 addClassMemberNameToIndex(), addFileMemberNameToIndex(), addMemberToSearchIndex(), addNamespaceMemberNameToIndex(), createJavaScriptSearchIndex(), FilterAlphaIndex::determineSortKey(), getUnicodeForUTF8CharAt() , 以及 writeAlphabeticalClassList().

◆ getUTF8CharNumBytes()

uint8_t getUTF8CharNumBytes ( char c )

Returns the number of bytes making up a single UTF8 character given the first byte in the sequence.

在文件 utf8.cpp 第 23 行定义.

 {
   uint8_t num=1;
   unsigned char uc = static_cast<unsigned char>(c);
   if (uc>=0x80u) // multibyte character
   {
     if ((uc&0xE0u)==0xC0u)
     {
       num=2; // 110x.xxxx: 2 byte character
     }
     if ((uc&0xF0u)==0xE0u)
     {
       num=3; // 1110.xxxx: 3 byte character
     }
     if ((uc&0xF8u)==0xF0u)
     {
       num=4; // 1111.0xxx: 4 byte character
     }
     if ((uc&0xFCu)==0xF8u)
     {
       num=5; // 1111.10xx: 5 byte character
     }
     if ((uc&0xFEu)==0xFCu)
     {
       num=6; // 1111.110x: 6 byte character
     }
   }
   return num;
 }

被这些函数引用 Markdown::detab(), escapeCharsInString(), getUTF8CharAt(), nextUTF8CharPosition() , 以及 writeUTF8Char().

◆ isUTF8CharUpperCase()

bool isUTF8CharUpperCase	(	const std::string &	input,
		size_t	pos
	)

Returns true iff the input string at byte position pos holds an upper case character.

在文件 utf8.cpp 第 218 行定义.

 {
   if (input.length()<=pos) return false;
   int len;
   // turn the UTF8 character at position pos into a unicode value
   uint32_t code = convertUTF8CharToUnicode(input.c_str()+pos,input.length()-pos,len);
   // check if the character can be converted to lower case, if so it was an upper case character
   return convertUnicodeToLower(code)!=nullptr;
 }

引用了 convertUnicodeToLower() , 以及 convertUTF8CharToUnicode().

被这些函数引用 DefinitionImpl::_setBriefDescription().

◆ isUTF8NonBreakableSpace()

int isUTF8NonBreakableSpace ( const char * input )

Check if the first character pointed at by input is a non-breakable whitespace character.

Returns the byte size of the character if there is match or 0 if not.

在文件 utf8.cpp 第 228 行定义.

 {
   return (static_cast<unsigned char>(input[0])==0xC2 &&
           static_cast<unsigned char>(input[1])==0xA0) ? 2 : 0;
 }

被这些函数引用 Markdown::detab().

◆ lastUTF8CharIsMultibyte()

bool lastUTF8CharIsMultibyte ( const std::string & input )

Returns true iff the last character in input is a multibyte character.

在文件 utf8.cpp 第 212 行定义.

 {
   // last byte is part of a multibyte UTF8 char if bit 8 is set and bit 7 is not
   return !input.empty() && (((unsigned char)input[input.length()-1])&0xC0)==0x80;
 }

被这些函数引用 DefinitionImpl::_setBriefDescription().

◆ writeUTF8Char()

const char* writeUTF8Char	(	TextStream &	t,
		const char *	s
	)

Writes the UTF8 character pointed to by s to stream t and returns a pointer to the next character.

在文件 utf8.cpp 第 197 行定义.

 {
   if (s==0) return 0;
   uint8_t len = getUTF8CharNumBytes(*s);
   for (uint8_t i=0;i<len;i++)
   {
     if (s[i]==0) // detect premature end of string (due to invalid UTF8 char)
     {
       len=i;
     }
   }
   t.write(s,len);
   return s+len;
 }

引用了 getUTF8CharNumBytes() , 以及 TextStream::write().

被这些函数引用 HtmlCodeGenerator::codify(), ManGenerator::codify(), RTFGenerator::codify() , 以及 writeXMLCodeString().