#ifndef SIMDUTF_ENCODING_TYPES_H #define SIMDUTF_ENCODING_TYPES_H #include namespace simdutf { enum encoding_type { UTF8 = 1, // BOM 0xef 0xbb 0xbf UTF16_LE = 2, // BOM 0xff 0xfe UTF16_BE = 4, // BOM 0xfe 0xff UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff Latin1 = 32, unspecified = 0 }; enum endianness { LITTLE = 0, BIG = 1 }; bool match_system(endianness e); std::string to_string(encoding_type bom); // Note that BOM for UTF8 is discouraged. namespace BOM { /** * Checks for a BOM. If not, returns unspecified * @param input the string to process * @param length the length of the string in code units * @return the corresponding encoding */ encoding_type check_bom(const uint8_t *byte, size_t length); encoding_type check_bom(const char *byte, size_t length); /** * Returns the size, in bytes, of the BOM for a given encoding type. * Note that UTF8 BOM are discouraged. * @param bom the encoding type * @return the size in bytes of the corresponding BOM */ size_t bom_byte_size(encoding_type bom); } // namespace BOM } // namespace simdutf #endif