scanner.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. #include <cassert>
  2. #include <memory>
  3. #include "exp.h"
  4. #include "scanner.h"
  5. #include "token.h"
  6. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  7. namespace YAML {
  8. Scanner::Scanner(std::istream& in)
  9. : INPUT(in),
  10. m_startedStream(false),
  11. m_endedStream(false),
  12. m_simpleKeyAllowed(false),
  13. m_canBeJSONFlow(false) {}
  14. Scanner::~Scanner() {}
  15. bool Scanner::empty() {
  16. EnsureTokensInQueue();
  17. return m_tokens.empty();
  18. }
  19. void Scanner::pop() {
  20. EnsureTokensInQueue();
  21. if (!m_tokens.empty())
  22. m_tokens.pop();
  23. }
  24. Token& Scanner::peek() {
  25. EnsureTokensInQueue();
  26. assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
  27. // just be checking
  28. // if it's empty before peeking.
  29. #if 0
  30. static Token *pLast = 0;
  31. if(pLast != &m_tokens.front())
  32. std::cerr << "peek: " << m_tokens.front() << "\n";
  33. pLast = &m_tokens.front();
  34. #endif
  35. return m_tokens.front();
  36. }
  37. Mark Scanner::mark() const { return INPUT.mark(); }
  38. void Scanner::EnsureTokensInQueue() {
  39. while (1) {
  40. if (!m_tokens.empty()) {
  41. Token& token = m_tokens.front();
  42. // if this guy's valid, then we're done
  43. if (token.status == Token::VALID) {
  44. return;
  45. }
  46. // here's where we clean up the impossible tokens
  47. if (token.status == Token::INVALID) {
  48. m_tokens.pop();
  49. continue;
  50. }
  51. // note: what's left are the unverified tokens
  52. }
  53. // no token? maybe we've actually finished
  54. if (m_endedStream) {
  55. return;
  56. }
  57. // no? then scan...
  58. ScanNextToken();
  59. }
  60. }
  61. void Scanner::ScanNextToken() {
  62. if (m_endedStream) {
  63. return;
  64. }
  65. if (!m_startedStream) {
  66. return StartStream();
  67. }
  68. // get rid of whitespace, etc. (in between tokens it should be irrelevent)
  69. ScanToNextToken();
  70. // maybe need to end some blocks
  71. PopIndentToHere();
  72. // *****
  73. // And now branch based on the next few characters!
  74. // *****
  75. // end of stream
  76. if (!INPUT) {
  77. return EndStream();
  78. }
  79. if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
  80. return ScanDirective();
  81. }
  82. // document token
  83. if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
  84. return ScanDocStart();
  85. }
  86. if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
  87. return ScanDocEnd();
  88. }
  89. // flow start/end/entry
  90. if (INPUT.peek() == Keys::FlowSeqStart ||
  91. INPUT.peek() == Keys::FlowMapStart) {
  92. return ScanFlowStart();
  93. }
  94. if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
  95. return ScanFlowEnd();
  96. }
  97. if (INPUT.peek() == Keys::FlowEntry) {
  98. return ScanFlowEntry();
  99. }
  100. // block/map stuff
  101. if (Exp::BlockEntry().Matches(INPUT)) {
  102. return ScanBlockEntry();
  103. }
  104. if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
  105. return ScanKey();
  106. }
  107. if (GetValueRegex().Matches(INPUT)) {
  108. return ScanValue();
  109. }
  110. // alias/anchor
  111. if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
  112. return ScanAnchorOrAlias();
  113. }
  114. // tag
  115. if (INPUT.peek() == Keys::Tag) {
  116. return ScanTag();
  117. }
  118. // special scalars
  119. if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
  120. INPUT.peek() == Keys::FoldedScalar)) {
  121. return ScanBlockScalar();
  122. }
  123. if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
  124. return ScanQuotedScalar();
  125. }
  126. // plain scalars
  127. if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
  128. .Matches(INPUT)) {
  129. return ScanPlainScalar();
  130. }
  131. // don't know what it is!
  132. throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
  133. }
  134. void Scanner::ScanToNextToken() {
  135. while (1) {
  136. // first eat whitespace
  137. while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
  138. if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
  139. m_simpleKeyAllowed = false;
  140. }
  141. INPUT.eat(1);
  142. }
  143. // then eat a comment
  144. if (Exp::Comment().Matches(INPUT)) {
  145. // eat until line break
  146. while (INPUT && !Exp::Break().Matches(INPUT)) {
  147. INPUT.eat(1);
  148. }
  149. }
  150. // if it's NOT a line break, then we're done!
  151. if (!Exp::Break().Matches(INPUT)) {
  152. break;
  153. }
  154. // otherwise, let's eat the line break and keep going
  155. int n = Exp::Break().Match(INPUT);
  156. INPUT.eat(n);
  157. // oh yeah, and let's get rid of that simple key
  158. InvalidateSimpleKey();
  159. // new line - we may be able to accept a simple key now
  160. if (InBlockContext()) {
  161. m_simpleKeyAllowed = true;
  162. }
  163. }
  164. }
  165. ///////////////////////////////////////////////////////////////////////
  166. // Misc. helpers
  167. // IsWhitespaceToBeEaten
  168. // . We can eat whitespace if it's a space or tab
  169. // . Note: originally tabs in block context couldn't be eaten
  170. // "where a simple key could be allowed
  171. // (i.e., not at the beginning of a line, or following '-', '?', or
  172. // ':')"
  173. // I think this is wrong, since tabs can be non-content whitespace; it's just
  174. // that they can't contribute to indentation, so once you've seen a tab in a
  175. // line, you can't start a simple key
  176. bool Scanner::IsWhitespaceToBeEaten(char ch) {
  177. if (ch == ' ') {
  178. return true;
  179. }
  180. if (ch == '\t') {
  181. return true;
  182. }
  183. return false;
  184. }
  185. const RegEx& Scanner::GetValueRegex() const {
  186. if (InBlockContext()) {
  187. return Exp::Value();
  188. }
  189. return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
  190. }
  191. void Scanner::StartStream() {
  192. m_startedStream = true;
  193. m_simpleKeyAllowed = true;
  194. std::unique_ptr<IndentMarker> pIndent(
  195. new IndentMarker(-1, IndentMarker::NONE));
  196. m_indentRefs.push_back(std::move(pIndent));
  197. m_indents.push(&m_indentRefs.back());
  198. }
  199. void Scanner::EndStream() {
  200. // force newline
  201. if (INPUT.column() > 0) {
  202. INPUT.ResetColumn();
  203. }
  204. PopAllIndents();
  205. PopAllSimpleKeys();
  206. m_simpleKeyAllowed = false;
  207. m_endedStream = true;
  208. }
  209. Token* Scanner::PushToken(Token::TYPE type) {
  210. m_tokens.push(Token(type, INPUT.mark()));
  211. return &m_tokens.back();
  212. }
  213. Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
  214. switch (type) {
  215. case IndentMarker::SEQ:
  216. return Token::BLOCK_SEQ_START;
  217. case IndentMarker::MAP:
  218. return Token::BLOCK_MAP_START;
  219. case IndentMarker::NONE:
  220. assert(false);
  221. break;
  222. }
  223. assert(false);
  224. throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
  225. }
  226. Scanner::IndentMarker* Scanner::PushIndentTo(int column,
  227. IndentMarker::INDENT_TYPE type) {
  228. // are we in flow?
  229. if (InFlowContext()) {
  230. return 0;
  231. }
  232. std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
  233. IndentMarker& indent = *pIndent;
  234. const IndentMarker& lastIndent = *m_indents.top();
  235. // is this actually an indentation?
  236. if (indent.column < lastIndent.column) {
  237. return 0;
  238. }
  239. if (indent.column == lastIndent.column &&
  240. !(indent.type == IndentMarker::SEQ &&
  241. lastIndent.type == IndentMarker::MAP)) {
  242. return 0;
  243. }
  244. // push a start token
  245. indent.pStartToken = PushToken(GetStartTokenFor(type));
  246. // and then the indent
  247. m_indents.push(&indent);
  248. m_indentRefs.push_back(std::move(pIndent));
  249. return &m_indentRefs.back();
  250. }
  251. void Scanner::PopIndentToHere() {
  252. // are we in flow?
  253. if (InFlowContext()) {
  254. return;
  255. }
  256. // now pop away
  257. while (!m_indents.empty()) {
  258. const IndentMarker& indent = *m_indents.top();
  259. if (indent.column < INPUT.column()) {
  260. break;
  261. }
  262. if (indent.column == INPUT.column() &&
  263. !(indent.type == IndentMarker::SEQ &&
  264. !Exp::BlockEntry().Matches(INPUT))) {
  265. break;
  266. }
  267. PopIndent();
  268. }
  269. while (!m_indents.empty() &&
  270. m_indents.top()->status == IndentMarker::INVALID) {
  271. PopIndent();
  272. }
  273. }
  274. void Scanner::PopAllIndents() {
  275. // are we in flow?
  276. if (InFlowContext()) {
  277. return;
  278. }
  279. // now pop away
  280. while (!m_indents.empty()) {
  281. const IndentMarker& indent = *m_indents.top();
  282. if (indent.type == IndentMarker::NONE) {
  283. break;
  284. }
  285. PopIndent();
  286. }
  287. }
  288. void Scanner::PopIndent() {
  289. const IndentMarker& indent = *m_indents.top();
  290. m_indents.pop();
  291. if (indent.status != IndentMarker::VALID) {
  292. InvalidateSimpleKey();
  293. return;
  294. }
  295. if (indent.type == IndentMarker::SEQ) {
  296. m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
  297. } else if (indent.type == IndentMarker::MAP) {
  298. m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
  299. }
  300. }
  301. int Scanner::GetTopIndent() const {
  302. if (m_indents.empty()) {
  303. return 0;
  304. }
  305. return m_indents.top()->column;
  306. }
  307. void Scanner::ThrowParserException(const std::string& msg) const {
  308. Mark mark = Mark::null_mark();
  309. if (!m_tokens.empty()) {
  310. const Token& token = m_tokens.front();
  311. mark = token.mark;
  312. }
  313. throw ParserException(mark, msg);
  314. }
  315. } // namespace YAML