scanner.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  2. #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  3. #if defined(_MSC_VER) || \
  4. (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
  5. (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
  6. #pragma once
  7. #endif
  8. #include <cstddef>
  9. #include <ios>
  10. #include <queue>
  11. #include <stack>
  12. #include <string>
  13. #include "ptr_vector.h"
  14. #include "stream.h"
  15. #include "token.h"
  16. #include "yaml-cpp/mark.h"
  17. namespace YAML {
  18. class Node;
  19. class RegEx;
  20. /**
  21. * A scanner transforms a stream of characters into a stream of tokens.
  22. */
  23. class Scanner {
  24. public:
  25. explicit Scanner(std::istream &in);
  26. ~Scanner();
  27. /** Returns true if there are no more tokens to be read. */
  28. bool empty();
  29. /** Removes the next token in the queue. */
  30. void pop();
  31. /** Returns, but does not remove, the next token in the queue. */
  32. Token &peek();
  33. /** Returns the current mark in the input stream. */
  34. Mark mark() const;
  35. private:
  36. struct IndentMarker {
  37. enum INDENT_TYPE { MAP, SEQ, NONE };
  38. enum STATUS { VALID, INVALID, UNKNOWN };
  39. IndentMarker(int column_, INDENT_TYPE type_)
  40. : column(column_), type(type_), status(VALID), pStartToken(nullptr) {}
  41. int column;
  42. INDENT_TYPE type;
  43. STATUS status;
  44. Token *pStartToken;
  45. };
  46. enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
  47. private:
  48. // scanning
  49. /**
  50. * Scans until there's a valid token at the front of the queue, or the queue
  51. * is empty. The state can be checked by {@link #empty}, and the next token
  52. * retrieved by {@link #peek}.
  53. */
  54. void EnsureTokensInQueue();
  55. /**
  56. * The main scanning function; this method branches out to scan whatever the
  57. * next token should be.
  58. */
  59. void ScanNextToken();
  60. /** Eats the input stream until it reaches the next token-like thing. */
  61. void ScanToNextToken();
  62. /** Sets the initial conditions for starting a stream. */
  63. void StartStream();
  64. /** Closes out the stream, finish up, etc. */
  65. void EndStream();
  66. Token *PushToken(Token::TYPE type);
  67. bool InFlowContext() const { return !m_flows.empty(); }
  68. bool InBlockContext() const { return m_flows.empty(); }
  69. std::size_t GetFlowLevel() const { return m_flows.size(); }
  70. Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
  71. /**
  72. * Pushes an indentation onto the stack, and enqueues the proper token
  73. * (sequence start or mapping start).
  74. *
  75. * @return the indent marker it generates (if any).
  76. */
  77. IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
  78. /**
  79. * Pops indentations off the stack until it reaches the current indentation
  80. * level, and enqueues the proper token each time. Then pops all invalid
  81. * indentations off.
  82. */
  83. void PopIndentToHere();
  84. /**
  85. * Pops all indentations (except for the base empty one) off the stack, and
  86. * enqueues the proper token each time.
  87. */
  88. void PopAllIndents();
  89. /** Pops a single indent, pushing the proper token. */
  90. void PopIndent();
  91. int GetTopIndent() const;
  92. // checking input
  93. bool CanInsertPotentialSimpleKey() const;
  94. bool ExistsActiveSimpleKey() const;
  95. void InsertPotentialSimpleKey();
  96. void InvalidateSimpleKey();
  97. bool VerifySimpleKey();
  98. void PopAllSimpleKeys();
  99. /**
  100. * Throws a ParserException with the current token location (if available),
  101. * and does not parse any more tokens.
  102. */
  103. void ThrowParserException(const std::string &msg) const;
  104. bool IsWhitespaceToBeEaten(char ch);
  105. /**
  106. * Returns the appropriate regex to check if the next token is a value token.
  107. */
  108. const RegEx &GetValueRegex() const;
  109. struct SimpleKey {
  110. SimpleKey(const Mark &mark_, std::size_t flowLevel_);
  111. void Validate();
  112. void Invalidate();
  113. Mark mark;
  114. std::size_t flowLevel;
  115. IndentMarker *pIndent;
  116. Token *pMapStart, *pKey;
  117. };
  118. // and the tokens
  119. void ScanDirective();
  120. void ScanDocStart();
  121. void ScanDocEnd();
  122. void ScanBlockSeqStart();
  123. void ScanBlockMapSTart();
  124. void ScanBlockEnd();
  125. void ScanBlockEntry();
  126. void ScanFlowStart();
  127. void ScanFlowEnd();
  128. void ScanFlowEntry();
  129. void ScanKey();
  130. void ScanValue();
  131. void ScanAnchorOrAlias();
  132. void ScanTag();
  133. void ScanPlainScalar();
  134. void ScanQuotedScalar();
  135. void ScanBlockScalar();
  136. private:
  137. // the stream
  138. Stream INPUT;
  139. // the output (tokens)
  140. std::queue<Token> m_tokens;
  141. // state info
  142. bool m_startedStream, m_endedStream;
  143. bool m_simpleKeyAllowed;
  144. bool m_canBeJSONFlow;
  145. std::stack<SimpleKey> m_simpleKeys;
  146. std::stack<IndentMarker *> m_indents;
  147. ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
  148. std::stack<FLOW_MARKER> m_flows;
  149. };
  150. }
  151. #endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66