// Copyright (c) 2017 Cloudflare, Inc. and contributors // Licensed under the MIT License: // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef KJ_ENCODING_H_ #define KJ_ENCODING_H_ // Functions for encoding/decoding bytes and text in common formats, including: // - UTF-{8,16,32} // - Hex // - URI encoding // - Base64 #if defined(__GNUC__) && !KJ_HEADER_WARNINGS #pragma GCC system_header #endif #include "string.h" namespace kj { template struct EncodingResult: public ResultType { // Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except // that the bool `hadErrors` can be inspected to see if any errors were encountered in the input. // Each encoding/decoding function that returns this type will "work around" errors in some way, // so an application doesn't strictly have to check for errors. E.g. the Unicode functions // replace errors with U+FFFD in the output. // // Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult // exactly if it were a Maybe that is null in case of errors. inline EncodingResult(ResultType&& result, bool hadErrors) : ResultType(kj::mv(result)), hadErrors(hadErrors) {} const bool hadErrors; }; EncodingResult> encodeUtf16(ArrayPtr text, bool nulTerminate = false); EncodingResult> encodeUtf32(ArrayPtr text, bool nulTerminate = false); // Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32. // // If `nulTerminate` is true, an extra NUL character will be added to the end of the output. // // The `try` versions return null if the input is invalid; the non-`try` versions return data // containing the Unicode replacement character (U+FFFD). // // The returned arrays are in platform-native endianness (otherwise they wouldn't really be // char16_t / char32_t). EncodingResult decodeUtf16(ArrayPtr utf16); EncodingResult decodeUtf32(ArrayPtr utf32); // Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use). // // The input should NOT include a NUL terminator; any NUL characters in the input array will be // preserved in the output. // // The `try` versions return null if the input is invalid; the non-`try` versions return data // containing the Unicode replacement character (U+FFFD). // // The input must be in platform-native endianness. BOMs are NOT recognized by these functions. String encodeHex(ArrayPtr bytes); EncodingResult> decodeHex(ArrayPtr text); // Encode/decode bytes as hex strings. String encodeUriComponent(ArrayPtr bytes); String encodeUriComponent(ArrayPtr bytes); EncodingResult> decodeBinaryUriComponent( ArrayPtr text, bool nulTerminate = false); EncodingResult decodeUriComponent(ArrayPtr text); // Encode/decode URI components using % escapes. See Javascript's encodeURIComponent(). String encodeCEscape(ArrayPtr bytes); String encodeCEscape(ArrayPtr bytes); EncodingResult> decodeBinaryCEscape( ArrayPtr text, bool nulTerminate = false); EncodingResult decodeCEscape(ArrayPtr text); String encodeBase64(ArrayPtr bytes, bool breakLines = false); // Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted // into the output every 72 characters (e.g. for encoding e-mail bodies). Array decodeBase64(ArrayPtr text); // Decode base64 text. Non-base64 characters are ignored and padding characters are not requried; // as such, this function never fails. // ======================================================================================= // inline implementation details namespace _ { // private template NullableValue readMaybe(EncodingResult&& value) { if (value.hadErrors) { return nullptr; } else { return kj::mv(value); } } template T* readMaybe(EncodingResult& value) { if (value.hadErrors) { return nullptr; } else { return &value; } } template const T* readMaybe(const EncodingResult& value) { if (value.hadErrors) { return nullptr; } else { return &value; } } } // namespace _ (private) inline String encodeUriComponent(ArrayPtr text) { return encodeUriComponent(text.asBytes()); } inline EncodingResult decodeUriComponent(ArrayPtr text) { auto result = decodeBinaryUriComponent(text, true); return { String(result.releaseAsChars()), result.hadErrors }; } inline String encodeCEscape(ArrayPtr text) { return encodeCEscape(text.asBytes()); } inline EncodingResult decodeCEscape(ArrayPtr text) { auto result = decodeBinaryCEscape(text, true); return { String(result.releaseAsChars()), result.hadErrors }; } // If you pass a string literal to a function taking ArrayPtr, it'll include the NUL // termintator, which is surprising. Let's add overloads that avoid that. In practice this probably // only even matters for encoding-test.c++. template inline EncodingResult> encodeUtf16(const char (&text)[s], bool nulTerminate=false) { return encodeUtf16(arrayPtr(text, s - 1), nulTerminate); } template inline EncodingResult> encodeUtf32(const char (&text)[s], bool nulTerminate=false) { return encodeUtf32(arrayPtr(text, s - 1), nulTerminate); } template inline EncodingResult decodeUtf16(const char16_t (&utf16)[s]) { return decodeUtf16(arrayPtr(utf16, s - 1)); } template inline EncodingResult decodeUtf32(const char32_t (&utf32)[s]) { return decodeUtf32(arrayPtr(utf32, s - 1)); } template inline EncodingResult> decodeHex(const char (&text)[s]) { return decodeHex(arrayPtr(text, s - 1)); } template inline String encodeUriComponent(const char (&text)[s]) { return encodeUriComponent(arrayPtr(text, s - 1)); } template inline Array decodeBinaryUriComponent(const char (&text)[s]) { return decodeBinaryUriComponent(arrayPtr(text, s - 1)); } template inline EncodingResult decodeUriComponent(const char (&text)[s]) { return decodeUriComponent(arrayPtr(text, s-1)); } template inline String encodeCEscape(const char (&text)[s]) { return encodeCEscape(arrayPtr(text, s - 1)); } template inline EncodingResult> decodeBinaryCEscape(const char (&text)[s]) { return decodeBinaryCEscape(arrayPtr(text, s - 1)); } template inline EncodingResult decodeCEscape(const char (&text)[s]) { return decodeCEscape(arrayPtr(text, s-1)); } template Array decodeBase64(const char (&text)[s]) { return decodeBase64(arrayPtr(text, s - 1)); } } // namespace kj #endif // KJ_ENCODING_H_