scantoken.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #include <sstream>
  2. #include "exp.h"
  3. #include "regex_yaml.h"
  4. #include "regeximpl.h"
  5. #include "scanner.h"
  6. #include "scanscalar.h"
  7. #include "scantag.h" // IWYU pragma: keep
  8. #include "tag.h" // IWYU pragma: keep
  9. #include "token.h"
  10. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  11. #include "yaml-cpp/mark.h"
  12. namespace YAML {
  13. ///////////////////////////////////////////////////////////////////////
  14. // Specialization for scanning specific tokens
  15. // Directive
  16. // . Note: no semantic checking is done here (that's for the parser to do)
  17. void Scanner::ScanDirective() {
  18. std::string name;
  19. std::vector<std::string> params;
  20. // pop indents and simple keys
  21. PopAllIndents();
  22. PopAllSimpleKeys();
  23. m_simpleKeyAllowed = false;
  24. m_canBeJSONFlow = false;
  25. // store pos and eat indicator
  26. Token token(Token::DIRECTIVE, INPUT.mark());
  27. INPUT.eat(1);
  28. // read name
  29. while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
  30. token.value += INPUT.get();
  31. // read parameters
  32. while (1) {
  33. // first get rid of whitespace
  34. while (Exp::Blank().Matches(INPUT))
  35. INPUT.eat(1);
  36. // break on newline or comment
  37. if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
  38. break;
  39. // now read parameter
  40. std::string param;
  41. while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
  42. param += INPUT.get();
  43. token.params.push_back(param);
  44. }
  45. m_tokens.push(token);
  46. }
  47. // DocStart
  48. void Scanner::ScanDocStart() {
  49. PopAllIndents();
  50. PopAllSimpleKeys();
  51. m_simpleKeyAllowed = false;
  52. m_canBeJSONFlow = false;
  53. // eat
  54. Mark mark = INPUT.mark();
  55. INPUT.eat(3);
  56. m_tokens.push(Token(Token::DOC_START, mark));
  57. }
  58. // DocEnd
  59. void Scanner::ScanDocEnd() {
  60. PopAllIndents();
  61. PopAllSimpleKeys();
  62. m_simpleKeyAllowed = false;
  63. m_canBeJSONFlow = false;
  64. // eat
  65. Mark mark = INPUT.mark();
  66. INPUT.eat(3);
  67. m_tokens.push(Token(Token::DOC_END, mark));
  68. }
  69. // FlowStart
  70. void Scanner::ScanFlowStart() {
  71. // flows can be simple keys
  72. InsertPotentialSimpleKey();
  73. m_simpleKeyAllowed = true;
  74. m_canBeJSONFlow = false;
  75. // eat
  76. Mark mark = INPUT.mark();
  77. char ch = INPUT.get();
  78. FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
  79. m_flows.push(flowType);
  80. Token::TYPE type =
  81. (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
  82. m_tokens.push(Token(type, mark));
  83. }
  84. // FlowEnd
  85. void Scanner::ScanFlowEnd() {
  86. if (InBlockContext())
  87. throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
  88. // we might have a solo entry in the flow context
  89. if (InFlowContext()) {
  90. if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
  91. m_tokens.push(Token(Token::VALUE, INPUT.mark()));
  92. else if (m_flows.top() == FLOW_SEQ)
  93. InvalidateSimpleKey();
  94. }
  95. m_simpleKeyAllowed = false;
  96. m_canBeJSONFlow = true;
  97. // eat
  98. Mark mark = INPUT.mark();
  99. char ch = INPUT.get();
  100. // check that it matches the start
  101. FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
  102. if (m_flows.top() != flowType)
  103. throw ParserException(mark, ErrorMsg::FLOW_END);
  104. m_flows.pop();
  105. Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
  106. m_tokens.push(Token(type, mark));
  107. }
  108. // FlowEntry
  109. void Scanner::ScanFlowEntry() {
  110. // we might have a solo entry in the flow context
  111. if (InFlowContext()) {
  112. if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
  113. m_tokens.push(Token(Token::VALUE, INPUT.mark()));
  114. else if (m_flows.top() == FLOW_SEQ)
  115. InvalidateSimpleKey();
  116. }
  117. m_simpleKeyAllowed = true;
  118. m_canBeJSONFlow = false;
  119. // eat
  120. Mark mark = INPUT.mark();
  121. INPUT.eat(1);
  122. m_tokens.push(Token(Token::FLOW_ENTRY, mark));
  123. }
  124. // BlockEntry
  125. void Scanner::ScanBlockEntry() {
  126. // we better be in the block context!
  127. if (InFlowContext())
  128. throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
  129. // can we put it here?
  130. if (!m_simpleKeyAllowed)
  131. throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
  132. PushIndentTo(INPUT.column(), IndentMarker::SEQ);
  133. m_simpleKeyAllowed = true;
  134. m_canBeJSONFlow = false;
  135. // eat
  136. Mark mark = INPUT.mark();
  137. INPUT.eat(1);
  138. m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
  139. }
  140. // Key
  141. void Scanner::ScanKey() {
  142. // handle keys diffently in the block context (and manage indents)
  143. if (InBlockContext()) {
  144. if (!m_simpleKeyAllowed)
  145. throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
  146. PushIndentTo(INPUT.column(), IndentMarker::MAP);
  147. }
  148. // can only put a simple key here if we're in block context
  149. m_simpleKeyAllowed = InBlockContext();
  150. // eat
  151. Mark mark = INPUT.mark();
  152. INPUT.eat(1);
  153. m_tokens.push(Token(Token::KEY, mark));
  154. }
  155. // Value
  156. void Scanner::ScanValue() {
  157. // and check that simple key
  158. bool isSimpleKey = VerifySimpleKey();
  159. m_canBeJSONFlow = false;
  160. if (isSimpleKey) {
  161. // can't follow a simple key with another simple key (dunno why, though - it
  162. // seems fine)
  163. m_simpleKeyAllowed = false;
  164. } else {
  165. // handle values diffently in the block context (and manage indents)
  166. if (InBlockContext()) {
  167. if (!m_simpleKeyAllowed)
  168. throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
  169. PushIndentTo(INPUT.column(), IndentMarker::MAP);
  170. }
  171. // can only put a simple key here if we're in block context
  172. m_simpleKeyAllowed = InBlockContext();
  173. }
  174. // eat
  175. Mark mark = INPUT.mark();
  176. INPUT.eat(1);
  177. m_tokens.push(Token(Token::VALUE, mark));
  178. }
  179. // AnchorOrAlias
  180. void Scanner::ScanAnchorOrAlias() {
  181. bool alias;
  182. std::string name;
  183. // insert a potential simple key
  184. InsertPotentialSimpleKey();
  185. m_simpleKeyAllowed = false;
  186. m_canBeJSONFlow = false;
  187. // eat the indicator
  188. Mark mark = INPUT.mark();
  189. char indicator = INPUT.get();
  190. alias = (indicator == Keys::Alias);
  191. // now eat the content
  192. while (INPUT && Exp::Anchor().Matches(INPUT))
  193. name += INPUT.get();
  194. // we need to have read SOMETHING!
  195. if (name.empty())
  196. throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
  197. : ErrorMsg::ANCHOR_NOT_FOUND);
  198. // and needs to end correctly
  199. if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
  200. throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
  201. : ErrorMsg::CHAR_IN_ANCHOR);
  202. // and we're done
  203. Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
  204. token.value = name;
  205. m_tokens.push(token);
  206. }
  207. // Tag
  208. void Scanner::ScanTag() {
  209. // insert a potential simple key
  210. InsertPotentialSimpleKey();
  211. m_simpleKeyAllowed = false;
  212. m_canBeJSONFlow = false;
  213. Token token(Token::TAG, INPUT.mark());
  214. // eat the indicator
  215. INPUT.get();
  216. if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
  217. std::string tag = ScanVerbatimTag(INPUT);
  218. token.value = tag;
  219. token.data = Tag::VERBATIM;
  220. } else {
  221. bool canBeHandle;
  222. token.value = ScanTagHandle(INPUT, canBeHandle);
  223. if (!canBeHandle && token.value.empty())
  224. token.data = Tag::NON_SPECIFIC;
  225. else if (token.value.empty())
  226. token.data = Tag::SECONDARY_HANDLE;
  227. else
  228. token.data = Tag::PRIMARY_HANDLE;
  229. // is there a suffix?
  230. if (canBeHandle && INPUT.peek() == Keys::Tag) {
  231. // eat the indicator
  232. INPUT.get();
  233. token.params.push_back(ScanTagSuffix(INPUT));
  234. token.data = Tag::NAMED_HANDLE;
  235. }
  236. }
  237. m_tokens.push(token);
  238. }
  239. // PlainScalar
  240. void Scanner::ScanPlainScalar() {
  241. std::string scalar;
  242. // set up the scanning parameters
  243. ScanScalarParams params;
  244. params.end =
  245. (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
  246. params.eatEnd = false;
  247. params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
  248. params.fold = FOLD_FLOW;
  249. params.eatLeadingWhitespace = true;
  250. params.trimTrailingSpaces = true;
  251. params.chomp = STRIP;
  252. params.onDocIndicator = BREAK;
  253. params.onTabInIndentation = THROW;
  254. // insert a potential simple key
  255. InsertPotentialSimpleKey();
  256. Mark mark = INPUT.mark();
  257. scalar = ScanScalar(INPUT, params);
  258. // can have a simple key only if we ended the scalar by starting a new line
  259. m_simpleKeyAllowed = params.leadingSpaces;
  260. m_canBeJSONFlow = false;
  261. // finally, check and see if we ended on an illegal character
  262. // if(Exp::IllegalCharInScalar.Matches(INPUT))
  263. // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
  264. Token token(Token::PLAIN_SCALAR, mark);
  265. token.value = scalar;
  266. m_tokens.push(token);
  267. }
  268. // QuotedScalar
  269. void Scanner::ScanQuotedScalar() {
  270. std::string scalar;
  271. // peek at single or double quote (don't eat because we need to preserve (for
  272. // the time being) the input position)
  273. char quote = INPUT.peek();
  274. bool single = (quote == '\'');
  275. // setup the scanning parameters
  276. ScanScalarParams params;
  277. RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
  278. params.end = &end;
  279. params.eatEnd = true;
  280. params.escape = (single ? '\'' : '\\');
  281. params.indent = 0;
  282. params.fold = FOLD_FLOW;
  283. params.eatLeadingWhitespace = true;
  284. params.trimTrailingSpaces = false;
  285. params.chomp = CLIP;
  286. params.onDocIndicator = THROW;
  287. // insert a potential simple key
  288. InsertPotentialSimpleKey();
  289. Mark mark = INPUT.mark();
  290. // now eat that opening quote
  291. INPUT.get();
  292. // and scan
  293. scalar = ScanScalar(INPUT, params);
  294. m_simpleKeyAllowed = false;
  295. m_canBeJSONFlow = true;
  296. Token token(Token::NON_PLAIN_SCALAR, mark);
  297. token.value = scalar;
  298. m_tokens.push(token);
  299. }
  300. // BlockScalarToken
  301. // . These need a little extra processing beforehand.
  302. // . We need to scan the line where the indicator is (this doesn't count as part
  303. // of the scalar),
  304. // and then we need to figure out what level of indentation we'll be using.
  305. void Scanner::ScanBlockScalar() {
  306. std::string scalar;
  307. ScanScalarParams params;
  308. params.indent = 1;
  309. params.detectIndent = true;
  310. // eat block indicator ('|' or '>')
  311. Mark mark = INPUT.mark();
  312. char indicator = INPUT.get();
  313. params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
  314. // eat chomping/indentation indicators
  315. params.chomp = CLIP;
  316. int n = Exp::Chomp().Match(INPUT);
  317. for (int i = 0; i < n; i++) {
  318. char ch = INPUT.get();
  319. if (ch == '+')
  320. params.chomp = KEEP;
  321. else if (ch == '-')
  322. params.chomp = STRIP;
  323. else if (Exp::Digit().Matches(ch)) {
  324. if (ch == '0')
  325. throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
  326. params.indent = ch - '0';
  327. params.detectIndent = false;
  328. }
  329. }
  330. // now eat whitespace
  331. while (Exp::Blank().Matches(INPUT))
  332. INPUT.eat(1);
  333. // and comments to the end of the line
  334. if (Exp::Comment().Matches(INPUT))
  335. while (INPUT && !Exp::Break().Matches(INPUT))
  336. INPUT.eat(1);
  337. // if it's not a line break, then we ran into a bad character inline
  338. if (INPUT && !Exp::Break().Matches(INPUT))
  339. throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
  340. // set the initial indentation
  341. if (GetTopIndent() >= 0)
  342. params.indent += GetTopIndent();
  343. params.eatLeadingWhitespace = false;
  344. params.trimTrailingSpaces = false;
  345. params.onTabInIndentation = THROW;
  346. scalar = ScanScalar(INPUT, params);
  347. // simple keys always ok after block scalars (since we're gonna start a new
  348. // line anyways)
  349. m_simpleKeyAllowed = true;
  350. m_canBeJSONFlow = false;
  351. Token token(Token::NON_PLAIN_SCALAR, mark);
  352. token.value = scalar;
  353. m_tokens.push(token);
  354. }
  355. }