db/d7c/utf8_8h_source.html

/******************************************************************************

 *

 * Copyright (C) 1997-2021 by Dimitri van Heesch.

 *

 * Permission to use, copy, modify, and distribute this software and its

 * documentation under the terms of the GNU General Public License is hereby

 * granted. No representations are made about the suitability of this software

 * for any purpose. It is provided "as is" without express or implied warranty.

 * See the GNU General Public License for more details.

 *

 * Documents produced by Doxygen are derivative works derived from the

 * input used in their production; they are not affected by this license.

 *

 */


#ifndef UTF8_H

#define UTF8_H


#include <cstdint>

#include <string>


class TextStream;


/** @file

 *  @brief Various UTF8 related helper functions.

 *

 *  See https://en.wikipedia.org/wiki/UTF-8 for details on UTF8 encoding.

 */


/** Converts the input string into a lower case version, also taking into account

 *  non-ASCII characters that has a lower case variant.

 */

std::string convertUTF8ToLower(const std::string &input);


/** Converts the input string into a upper case version, also taking into account

 *  non-ASCII characters that has a upper case variant.

 */

std::string convertUTF8ToUpper(const std::string &input);


/** Returns the UTF8 character found at byte position pos in the input string.

 *  The resulting string can be a multi byte sequence.

 */

std::string getUTF8CharAt(const std::string &input,size_t pos);


/** Returns the 32bit Unicode value matching character at byte position pos in

 *  the UTF8 encoded input.

 */

uint32_t getUnicodeForUTF8CharAt(const std::string &input,size_t pos);


/** Returns the number of bytes making up a single UTF8 character given the first byte

 *  in the sequence.

 */

uint8_t getUTF8CharNumBytes(char firstByte);


/** Writes the UTF8 character pointed to by s to stream t and returns a pointer

 *  to the next character.

 */

const char *writeUTF8Char(TextStream &t,const char *s);


/** Returns true iff the last character in input is a multibyte character. */

bool lastUTF8CharIsMultibyte(const std::string &input);


/** Returns true iff the input string at byte position pos holds an upper case character. */

bool isUTF8CharUpperCase(const std::string &input,size_t pos);


/** Check if the first character pointed at by input is a non-breakable whitespace character.

 *  Returns the byte size of the character if there is match or 0 if not.

 */

int isUTF8NonBreakableSpace(const char *input);


#endif