exp.cpp 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. #include <sstream>
  2. #include "exp.h"
  3. #include "stream.h"
  4. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  5. namespace YAML {
  6. struct Mark;
  7. } // namespace YAML
  8. namespace YAML {
  9. namespace Exp {
  10. unsigned ParseHex(const std::string& str, const Mark& mark) {
  11. unsigned value = 0;
  12. for (std::size_t i = 0; i < str.size(); i++) {
  13. char ch = str[i];
  14. int digit = 0;
  15. if ('a' <= ch && ch <= 'f')
  16. digit = ch - 'a' + 10;
  17. else if ('A' <= ch && ch <= 'F')
  18. digit = ch - 'A' + 10;
  19. else if ('0' <= ch && ch <= '9')
  20. digit = ch - '0';
  21. else
  22. throw ParserException(mark, ErrorMsg::INVALID_HEX);
  23. value = (value << 4) + digit;
  24. }
  25. return value;
  26. }
  27. std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
  28. // Escape
  29. // . Translates the next 'codeLength' characters into a hex number and returns
  30. // the result.
  31. // . Throws if it's not actually hex.
  32. std::string Escape(Stream& in, int codeLength) {
  33. // grab string
  34. std::string str;
  35. for (int i = 0; i < codeLength; i++)
  36. str += in.get();
  37. // get the value
  38. unsigned value = ParseHex(str, in.mark());
  39. // legal unicode?
  40. if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
  41. std::stringstream msg;
  42. msg << ErrorMsg::INVALID_UNICODE << value;
  43. throw ParserException(in.mark(), msg.str());
  44. }
  45. // now break it up into chars
  46. if (value <= 0x7F)
  47. return Str(value);
  48. else if (value <= 0x7FF)
  49. return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
  50. else if (value <= 0xFFFF)
  51. return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
  52. Str(0x80 + (value & 0x3F));
  53. else
  54. return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
  55. Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
  56. }
  57. // Escape
  58. // . Escapes the sequence starting 'in' (it must begin with a '\' or single
  59. // quote)
  60. // and returns the result.
  61. // . Throws if it's an unknown escape character.
  62. std::string Escape(Stream& in) {
  63. // eat slash
  64. char escape = in.get();
  65. // switch on escape character
  66. char ch = in.get();
  67. // first do single quote, since it's easier
  68. if (escape == '\'' && ch == '\'')
  69. return "\'";
  70. // now do the slash (we're not gonna check if it's a slash - you better pass
  71. // one!)
  72. switch (ch) {
  73. case '0':
  74. return std::string(1, '\x00');
  75. case 'a':
  76. return "\x07";
  77. case 'b':
  78. return "\x08";
  79. case 't':
  80. case '\t':
  81. return "\x09";
  82. case 'n':
  83. return "\x0A";
  84. case 'v':
  85. return "\x0B";
  86. case 'f':
  87. return "\x0C";
  88. case 'r':
  89. return "\x0D";
  90. case 'e':
  91. return "\x1B";
  92. case ' ':
  93. return "\x20";
  94. case '\"':
  95. return "\"";
  96. case '\'':
  97. return "\'";
  98. case '\\':
  99. return "\\";
  100. case '/':
  101. return "/";
  102. case 'N':
  103. return "\x85";
  104. case '_':
  105. return "\xA0";
  106. case 'L':
  107. return "\xE2\x80\xA8"; // LS (#x2028)
  108. case 'P':
  109. return "\xE2\x80\xA9"; // PS (#x2029)
  110. case 'x':
  111. return Escape(in, 2);
  112. case 'u':
  113. return Escape(in, 4);
  114. case 'U':
  115. return Escape(in, 8);
  116. }
  117. std::stringstream msg;
  118. throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
  119. }
  120. }
  121. }