123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- #include <sstream>
- #include "exp.h"
- #include "stream.h"
- #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
- namespace YAML {
- struct Mark;
- } // namespace YAML
- namespace YAML {
- namespace Exp {
- unsigned ParseHex(const std::string& str, const Mark& mark) {
- unsigned value = 0;
- for (std::size_t i = 0; i < str.size(); i++) {
- char ch = str[i];
- int digit = 0;
- if ('a' <= ch && ch <= 'f')
- digit = ch - 'a' + 10;
- else if ('A' <= ch && ch <= 'F')
- digit = ch - 'A' + 10;
- else if ('0' <= ch && ch <= '9')
- digit = ch - '0';
- else
- throw ParserException(mark, ErrorMsg::INVALID_HEX);
- value = (value << 4) + digit;
- }
- return value;
- }
- std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
- // Escape
- // . Translates the next 'codeLength' characters into a hex number and returns
- // the result.
- // . Throws if it's not actually hex.
- std::string Escape(Stream& in, int codeLength) {
- // grab string
- std::string str;
- for (int i = 0; i < codeLength; i++)
- str += in.get();
- // get the value
- unsigned value = ParseHex(str, in.mark());
- // legal unicode?
- if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
- std::stringstream msg;
- msg << ErrorMsg::INVALID_UNICODE << value;
- throw ParserException(in.mark(), msg.str());
- }
- // now break it up into chars
- if (value <= 0x7F)
- return Str(value);
- else if (value <= 0x7FF)
- return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
- else if (value <= 0xFFFF)
- return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
- Str(0x80 + (value & 0x3F));
- else
- return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
- Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
- }
- // Escape
- // . Escapes the sequence starting 'in' (it must begin with a '\' or single
- // quote)
- // and returns the result.
- // . Throws if it's an unknown escape character.
- std::string Escape(Stream& in) {
- // eat slash
- char escape = in.get();
- // switch on escape character
- char ch = in.get();
- // first do single quote, since it's easier
- if (escape == '\'' && ch == '\'')
- return "\'";
- // now do the slash (we're not gonna check if it's a slash - you better pass
- // one!)
- switch (ch) {
- case '0':
- return std::string(1, '\x00');
- case 'a':
- return "\x07";
- case 'b':
- return "\x08";
- case 't':
- case '\t':
- return "\x09";
- case 'n':
- return "\x0A";
- case 'v':
- return "\x0B";
- case 'f':
- return "\x0C";
- case 'r':
- return "\x0D";
- case 'e':
- return "\x1B";
- case ' ':
- return "\x20";
- case '\"':
- return "\"";
- case '\'':
- return "\'";
- case '\\':
- return "\\";
- case '/':
- return "/";
- case 'N':
- return "\x85";
- case '_':
- return "\xA0";
- case 'L':
- return "\xE2\x80\xA8"; // LS (#x2028)
- case 'P':
- return "\xE2\x80\xA9"; // PS (#x2029)
- case 'x':
- return Escape(in, 2);
- case 'u':
- return Escape(in, 4);
- case 'U':
- return Escape(in, 8);
- }
- std::stringstream msg;
- throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
- }
- }
- }
|