scanner.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  2. #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  3. #if defined(_MSC_VER) || \
  4. (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
  5. (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
  6. #pragma once
  7. #endif
  8. #include <cstddef>
  9. #include <ios>
  10. #include <map>
  11. #include <queue>
  12. #include <set>
  13. #include <stack>
  14. #include <string>
  15. #include "ptr_vector.h"
  16. #include "stream.h"
  17. #include "token.h"
  18. #include "yaml-cpp/mark.h"
  19. namespace YAML {
  20. class Node;
  21. class RegEx;
  22. /**
  23. * A scanner transforms a stream of characters into a stream of tokens.
  24. */
  25. class Scanner {
  26. public:
  27. explicit Scanner(std::istream &in);
  28. ~Scanner();
  29. /** Returns true if there are no more tokens to be read. */
  30. bool empty();
  31. /** Removes the next token in the queue. */
  32. void pop();
  33. /** Returns, but does not remove, the next token in the queue. */
  34. Token &peek();
  35. /** Returns the current mark in the input stream. */
  36. Mark mark() const;
  37. private:
  38. struct IndentMarker {
  39. enum INDENT_TYPE { MAP, SEQ, NONE };
  40. enum STATUS { VALID, INVALID, UNKNOWN };
  41. IndentMarker(int column_, INDENT_TYPE type_)
  42. : column(column_), type(type_), status(VALID), pStartToken(0) {}
  43. int column;
  44. INDENT_TYPE type;
  45. STATUS status;
  46. Token *pStartToken;
  47. };
  48. enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
  49. private:
  50. // scanning
  51. /**
  52. * Scans until there's a valid token at the front of the queue, or the queue
  53. * is empty. The state can be checked by {@link #empty}, and the next token
  54. * retrieved by {@link #peek}.
  55. */
  56. void EnsureTokensInQueue();
  57. /**
  58. * The main scanning function; this method branches out to scan whatever the
  59. * next token should be.
  60. */
  61. void ScanNextToken();
  62. /** Eats the input stream until it reaches the next token-like thing. */
  63. void ScanToNextToken();
  64. /** Sets the initial conditions for starting a stream. */
  65. void StartStream();
  66. /** Closes out the stream, finish up, etc. */
  67. void EndStream();
  68. Token *PushToken(Token::TYPE type);
  69. bool InFlowContext() const { return !m_flows.empty(); }
  70. bool InBlockContext() const { return m_flows.empty(); }
  71. std::size_t GetFlowLevel() const { return m_flows.size(); }
  72. Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
  73. /**
  74. * Pushes an indentation onto the stack, and enqueues the proper token
  75. * (sequence start or mapping start).
  76. *
  77. * @return the indent marker it generates (if any).
  78. */
  79. IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
  80. /**
  81. * Pops indentations off the stack until it reaches the current indentation
  82. * level, and enqueues the proper token each time. Then pops all invalid
  83. * indentations off.
  84. */
  85. void PopIndentToHere();
  86. /**
  87. * Pops all indentations (except for the base empty one) off the stack, and
  88. * enqueues the proper token each time.
  89. */
  90. void PopAllIndents();
  91. /** Pops a single indent, pushing the proper token. */
  92. void PopIndent();
  93. int GetTopIndent() const;
  94. // checking input
  95. bool CanInsertPotentialSimpleKey() const;
  96. bool ExistsActiveSimpleKey() const;
  97. void InsertPotentialSimpleKey();
  98. void InvalidateSimpleKey();
  99. bool VerifySimpleKey();
  100. void PopAllSimpleKeys();
  101. /**
  102. * Throws a ParserException with the current token location (if available),
  103. * and does not parse any more tokens.
  104. */
  105. void ThrowParserException(const std::string &msg) const;
  106. bool IsWhitespaceToBeEaten(char ch);
  107. /**
  108. * Returns the appropriate regex to check if the next token is a value token.
  109. */
  110. const RegEx &GetValueRegex() const;
  111. struct SimpleKey {
  112. SimpleKey(const Mark &mark_, std::size_t flowLevel_);
  113. void Validate();
  114. void Invalidate();
  115. Mark mark;
  116. std::size_t flowLevel;
  117. IndentMarker *pIndent;
  118. Token *pMapStart, *pKey;
  119. };
  120. // and the tokens
  121. void ScanDirective();
  122. void ScanDocStart();
  123. void ScanDocEnd();
  124. void ScanBlockSeqStart();
  125. void ScanBlockMapSTart();
  126. void ScanBlockEnd();
  127. void ScanBlockEntry();
  128. void ScanFlowStart();
  129. void ScanFlowEnd();
  130. void ScanFlowEntry();
  131. void ScanKey();
  132. void ScanValue();
  133. void ScanAnchorOrAlias();
  134. void ScanTag();
  135. void ScanPlainScalar();
  136. void ScanQuotedScalar();
  137. void ScanBlockScalar();
  138. private:
  139. // the stream
  140. Stream INPUT;
  141. // the output (tokens)
  142. std::queue<Token> m_tokens;
  143. // state info
  144. bool m_startedStream, m_endedStream;
  145. bool m_simpleKeyAllowed;
  146. bool m_canBeJSONFlow;
  147. std::stack<SimpleKey> m_simpleKeys;
  148. std::stack<IndentMarker *> m_indents;
  149. ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
  150. std::stack<FLOW_MARKER> m_flows;
  151. };
  152. }
  153. #endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66