exp.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #ifndef EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  2. #define EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  3. #if defined(_MSC_VER) || \
  4. (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
  5. (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
  6. #pragma once
  7. #endif
  8. #include <ios>
  9. #include <string>
  10. #include "regex_yaml.h"
  11. #include "stream.h"
  12. namespace YAML {
  13. ////////////////////////////////////////////////////////////////////////////////
  14. // Here we store a bunch of expressions for matching different parts of the
  15. // file.
  16. namespace Exp {
  17. // misc
  18. inline const RegEx& Empty() {
  19. static const RegEx e;
  20. return e;
  21. }
  22. inline const RegEx& Space() {
  23. static const RegEx e = RegEx(' ');
  24. return e;
  25. }
  26. inline const RegEx& Tab() {
  27. static const RegEx e = RegEx('\t');
  28. return e;
  29. }
  30. inline const RegEx& Blank() {
  31. static const RegEx e = Space() | Tab();
  32. return e;
  33. }
  34. inline const RegEx& Break() {
  35. static const RegEx e = RegEx('\n') | RegEx("\r\n") | RegEx('\r');
  36. return e;
  37. }
  38. inline const RegEx& BlankOrBreak() {
  39. static const RegEx e = Blank() | Break();
  40. return e;
  41. }
  42. inline const RegEx& Digit() {
  43. static const RegEx e = RegEx('0', '9');
  44. return e;
  45. }
  46. inline const RegEx& Alpha() {
  47. static const RegEx e = RegEx('a', 'z') | RegEx('A', 'Z');
  48. return e;
  49. }
  50. inline const RegEx& AlphaNumeric() {
  51. static const RegEx e = Alpha() | Digit();
  52. return e;
  53. }
  54. inline const RegEx& Word() {
  55. static const RegEx e = AlphaNumeric() | RegEx('-');
  56. return e;
  57. }
  58. inline const RegEx& Hex() {
  59. static const RegEx e = Digit() | RegEx('A', 'F') | RegEx('a', 'f');
  60. return e;
  61. }
  62. // Valid Unicode code points that are not part of c-printable (YAML 1.2, sec.
  63. // 5.1)
  64. inline const RegEx& NotPrintable() {
  65. static const RegEx e =
  66. RegEx(0) |
  67. RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) |
  68. RegEx(0x0E, 0x1F) |
  69. (RegEx('\xC2') + (RegEx('\x80', '\x84') | RegEx('\x86', '\x9F')));
  70. return e;
  71. }
  72. inline const RegEx& Utf8_ByteOrderMark() {
  73. static const RegEx e = RegEx("\xEF\xBB\xBF");
  74. return e;
  75. }
  76. // actual tags
  77. inline const RegEx& DocStart() {
  78. static const RegEx e = RegEx("---") + (BlankOrBreak() | RegEx());
  79. return e;
  80. }
  81. inline const RegEx& DocEnd() {
  82. static const RegEx e = RegEx("...") + (BlankOrBreak() | RegEx());
  83. return e;
  84. }
  85. inline const RegEx& DocIndicator() {
  86. static const RegEx e = DocStart() | DocEnd();
  87. return e;
  88. }
  89. inline const RegEx& BlockEntry() {
  90. static const RegEx e = RegEx('-') + (BlankOrBreak() | RegEx());
  91. return e;
  92. }
  93. inline const RegEx& Key() {
  94. static const RegEx e = RegEx('?') + BlankOrBreak();
  95. return e;
  96. }
  97. inline const RegEx& KeyInFlow() {
  98. static const RegEx e = RegEx('?') + BlankOrBreak();
  99. return e;
  100. }
  101. inline const RegEx& Value() {
  102. static const RegEx e = RegEx(':') + (BlankOrBreak() | RegEx());
  103. return e;
  104. }
  105. inline const RegEx& ValueInFlow() {
  106. static const RegEx e = RegEx(':') + (BlankOrBreak() | RegEx(",]}", REGEX_OR));
  107. return e;
  108. }
  109. inline const RegEx& ValueInJSONFlow() {
  110. static const RegEx e = RegEx(':');
  111. return e;
  112. }
  113. inline const RegEx& Ampersand() {
  114. static const RegEx e = RegEx('&');
  115. return e;
  116. }
  117. inline const RegEx Comment() {
  118. static const RegEx e = RegEx('#');
  119. return e;
  120. }
  121. inline const RegEx& Anchor() {
  122. static const RegEx e = !(RegEx("[]{},", REGEX_OR) | BlankOrBreak());
  123. return e;
  124. }
  125. inline const RegEx& AnchorEnd() {
  126. static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) | BlankOrBreak();
  127. return e;
  128. }
  129. inline const RegEx& URI() {
  130. static const RegEx e = Word() | RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) |
  131. (RegEx('%') + Hex() + Hex());
  132. return e;
  133. }
  134. inline const RegEx& Tag() {
  135. static const RegEx e = Word() | RegEx("#;/?:@&=+$_.~*'()", REGEX_OR) |
  136. (RegEx('%') + Hex() + Hex());
  137. return e;
  138. }
  139. // Plain scalar rules:
  140. // . Cannot start with a blank.
  141. // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
  142. // . In the block context - ? : must be not be followed with a space.
  143. // . In the flow context ? is illegal and : and - must not be followed with a
  144. // space.
  145. inline const RegEx& PlainScalar() {
  146. static const RegEx e =
  147. !(BlankOrBreak() | RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) |
  148. (RegEx("-?:", REGEX_OR) + (BlankOrBreak() | RegEx())));
  149. return e;
  150. }
  151. inline const RegEx& PlainScalarInFlow() {
  152. static const RegEx e =
  153. !(BlankOrBreak() | RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) |
  154. (RegEx("-:", REGEX_OR) + (Blank() | RegEx())));
  155. return e;
  156. }
  157. inline const RegEx& EndScalar() {
  158. static const RegEx e = RegEx(':') + (BlankOrBreak() | RegEx());
  159. return e;
  160. }
  161. inline const RegEx& EndScalarInFlow() {
  162. static const RegEx e =
  163. (RegEx(':') + (BlankOrBreak() | RegEx() | RegEx(",]}", REGEX_OR))) |
  164. RegEx(",?[]{}", REGEX_OR);
  165. return e;
  166. }
  167. inline const RegEx& ScanScalarEndInFlow() {
  168. static const RegEx e = (EndScalarInFlow() | (BlankOrBreak() + Comment()));
  169. return e;
  170. }
  171. inline const RegEx& ScanScalarEnd() {
  172. static const RegEx e = EndScalar() | (BlankOrBreak() + Comment());
  173. return e;
  174. }
  175. inline const RegEx& EscSingleQuote() {
  176. static const RegEx e = RegEx("\'\'");
  177. return e;
  178. }
  179. inline const RegEx& EscBreak() {
  180. static const RegEx e = RegEx('\\') + Break();
  181. return e;
  182. }
  183. inline const RegEx& ChompIndicator() {
  184. static const RegEx e = RegEx("+-", REGEX_OR);
  185. return e;
  186. }
  187. inline const RegEx& Chomp() {
  188. static const RegEx e = (ChompIndicator() + Digit()) |
  189. (Digit() + ChompIndicator()) | ChompIndicator() |
  190. Digit();
  191. return e;
  192. }
  193. // and some functions
  194. std::string Escape(Stream& in);
  195. } // namespace Exp
  196. namespace Keys {
  197. const char Directive = '%';
  198. const char FlowSeqStart = '[';
  199. const char FlowSeqEnd = ']';
  200. const char FlowMapStart = '{';
  201. const char FlowMapEnd = '}';
  202. const char FlowEntry = ',';
  203. const char Alias = '*';
  204. const char Anchor = '&';
  205. const char Tag = '!';
  206. const char LiteralScalar = '|';
  207. const char FoldedScalar = '>';
  208. const char VerbatimTagStart = '<';
  209. const char VerbatimTagEnd = '>';
  210. } // namespace Keys
  211. } // namespace YAML
  212. #endif // EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66