scanner.cpp 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. #include <cassert>
  2. #include <memory>
  3. #include "exp.h"
  4. #include "scanner.h"
  5. #include "token.h"
  6. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  7. namespace YAML {
  8. Scanner::Scanner(std::istream& in)
  9. : INPUT(in),
  10. m_tokens{},
  11. m_startedStream(false),
  12. m_endedStream(false),
  13. m_simpleKeyAllowed(false),
  14. m_canBeJSONFlow(false),
  15. m_simpleKeys{},
  16. m_indents{},
  17. m_indentRefs{},
  18. m_flows{} {}
  19. Scanner::~Scanner() = default;
  20. bool Scanner::empty() {
  21. EnsureTokensInQueue();
  22. return m_tokens.empty();
  23. }
  24. void Scanner::pop() {
  25. EnsureTokensInQueue();
  26. if (!m_tokens.empty())
  27. m_tokens.pop();
  28. }
  29. Token& Scanner::peek() {
  30. EnsureTokensInQueue();
  31. assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
  32. // just be checking
  33. // if it's empty before peeking.
  34. #if 0
  35. static Token *pLast = 0;
  36. if(pLast != &m_tokens.front())
  37. std::cerr << "peek: " << m_tokens.front() << "\n";
  38. pLast = &m_tokens.front();
  39. #endif
  40. return m_tokens.front();
  41. }
  42. Mark Scanner::mark() const { return INPUT.mark(); }
  43. void Scanner::EnsureTokensInQueue() {
  44. while (true) {
  45. if (!m_tokens.empty()) {
  46. Token& token = m_tokens.front();
  47. // if this guy's valid, then we're done
  48. if (token.status == Token::VALID) {
  49. return;
  50. }
  51. // here's where we clean up the impossible tokens
  52. if (token.status == Token::INVALID) {
  53. m_tokens.pop();
  54. continue;
  55. }
  56. // note: what's left are the unverified tokens
  57. }
  58. // no token? maybe we've actually finished
  59. if (m_endedStream) {
  60. return;
  61. }
  62. // no? then scan...
  63. ScanNextToken();
  64. }
  65. }
  66. void Scanner::ScanNextToken() {
  67. if (m_endedStream) {
  68. return;
  69. }
  70. if (!m_startedStream) {
  71. return StartStream();
  72. }
  73. // get rid of whitespace, etc. (in between tokens it should be irrelevant)
  74. ScanToNextToken();
  75. // maybe need to end some blocks
  76. PopIndentToHere();
  77. // *****
  78. // And now branch based on the next few characters!
  79. // *****
  80. // end of stream
  81. if (!INPUT) {
  82. return EndStream();
  83. }
  84. if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
  85. return ScanDirective();
  86. }
  87. // document token
  88. if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
  89. return ScanDocStart();
  90. }
  91. if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
  92. return ScanDocEnd();
  93. }
  94. // flow start/end/entry
  95. if (INPUT.peek() == Keys::FlowSeqStart ||
  96. INPUT.peek() == Keys::FlowMapStart) {
  97. return ScanFlowStart();
  98. }
  99. if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
  100. return ScanFlowEnd();
  101. }
  102. if (INPUT.peek() == Keys::FlowEntry) {
  103. return ScanFlowEntry();
  104. }
  105. // block/map stuff
  106. if (Exp::BlockEntry().Matches(INPUT)) {
  107. return ScanBlockEntry();
  108. }
  109. if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
  110. return ScanKey();
  111. }
  112. if (GetValueRegex().Matches(INPUT)) {
  113. return ScanValue();
  114. }
  115. // alias/anchor
  116. if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
  117. return ScanAnchorOrAlias();
  118. }
  119. // tag
  120. if (INPUT.peek() == Keys::Tag) {
  121. return ScanTag();
  122. }
  123. // special scalars
  124. if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
  125. INPUT.peek() == Keys::FoldedScalar)) {
  126. return ScanBlockScalar();
  127. }
  128. if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
  129. return ScanQuotedScalar();
  130. }
  131. // plain scalars
  132. if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
  133. .Matches(INPUT)) {
  134. return ScanPlainScalar();
  135. }
  136. // don't know what it is!
  137. throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
  138. }
  139. void Scanner::ScanToNextToken() {
  140. while (true) {
  141. // first eat whitespace
  142. while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
  143. if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
  144. m_simpleKeyAllowed = false;
  145. }
  146. INPUT.eat(1);
  147. }
  148. // then eat a comment
  149. if (Exp::Comment().Matches(INPUT)) {
  150. // eat until line break
  151. while (INPUT && !Exp::Break().Matches(INPUT)) {
  152. INPUT.eat(1);
  153. }
  154. }
  155. // if it's NOT a line break, then we're done!
  156. if (!Exp::Break().Matches(INPUT)) {
  157. break;
  158. }
  159. // otherwise, let's eat the line break and keep going
  160. int n = Exp::Break().Match(INPUT);
  161. INPUT.eat(n);
  162. // oh yeah, and let's get rid of that simple key
  163. InvalidateSimpleKey();
  164. // new line - we may be able to accept a simple key now
  165. if (InBlockContext()) {
  166. m_simpleKeyAllowed = true;
  167. }
  168. }
  169. }
  170. ///////////////////////////////////////////////////////////////////////
  171. // Misc. helpers
  172. // IsWhitespaceToBeEaten
  173. // . We can eat whitespace if it's a space or tab
  174. // . Note: originally tabs in block context couldn't be eaten
  175. // "where a simple key could be allowed
  176. // (i.e., not at the beginning of a line, or following '-', '?', or
  177. // ':')"
  178. // I think this is wrong, since tabs can be non-content whitespace; it's just
  179. // that they can't contribute to indentation, so once you've seen a tab in a
  180. // line, you can't start a simple key
  181. bool Scanner::IsWhitespaceToBeEaten(char ch) {
  182. if (ch == ' ') {
  183. return true;
  184. }
  185. if (ch == '\t') {
  186. return true;
  187. }
  188. return false;
  189. }
  190. const RegEx& Scanner::GetValueRegex() const {
  191. if (InBlockContext()) {
  192. return Exp::Value();
  193. }
  194. return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
  195. }
  196. void Scanner::StartStream() {
  197. m_startedStream = true;
  198. m_simpleKeyAllowed = true;
  199. std::unique_ptr<IndentMarker> pIndent(
  200. new IndentMarker(-1, IndentMarker::NONE));
  201. m_indentRefs.push_back(std::move(pIndent));
  202. m_indents.push(&m_indentRefs.back());
  203. }
  204. void Scanner::EndStream() {
  205. // force newline
  206. if (INPUT.column() > 0) {
  207. INPUT.ResetColumn();
  208. }
  209. PopAllIndents();
  210. PopAllSimpleKeys();
  211. m_simpleKeyAllowed = false;
  212. m_endedStream = true;
  213. }
  214. Token* Scanner::PushToken(Token::TYPE type) {
  215. m_tokens.push(Token(type, INPUT.mark()));
  216. return &m_tokens.back();
  217. }
  218. Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
  219. switch (type) {
  220. case IndentMarker::SEQ:
  221. return Token::BLOCK_SEQ_START;
  222. case IndentMarker::MAP:
  223. return Token::BLOCK_MAP_START;
  224. case IndentMarker::NONE:
  225. assert(false);
  226. break;
  227. }
  228. assert(false);
  229. throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
  230. }
  231. Scanner::IndentMarker* Scanner::PushIndentTo(int column,
  232. IndentMarker::INDENT_TYPE type) {
  233. // are we in flow?
  234. if (InFlowContext()) {
  235. return nullptr;
  236. }
  237. std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
  238. IndentMarker& indent = *pIndent;
  239. const IndentMarker& lastIndent = *m_indents.top();
  240. // is this actually an indentation?
  241. if (indent.column < lastIndent.column) {
  242. return nullptr;
  243. }
  244. if (indent.column == lastIndent.column &&
  245. !(indent.type == IndentMarker::SEQ &&
  246. lastIndent.type == IndentMarker::MAP)) {
  247. return nullptr;
  248. }
  249. // push a start token
  250. indent.pStartToken = PushToken(GetStartTokenFor(type));
  251. // and then the indent
  252. m_indents.push(&indent);
  253. m_indentRefs.push_back(std::move(pIndent));
  254. return &m_indentRefs.back();
  255. }
  256. void Scanner::PopIndentToHere() {
  257. // are we in flow?
  258. if (InFlowContext()) {
  259. return;
  260. }
  261. // now pop away
  262. while (!m_indents.empty()) {
  263. const IndentMarker& indent = *m_indents.top();
  264. if (indent.column < INPUT.column()) {
  265. break;
  266. }
  267. if (indent.column == INPUT.column() &&
  268. !(indent.type == IndentMarker::SEQ &&
  269. !Exp::BlockEntry().Matches(INPUT))) {
  270. break;
  271. }
  272. PopIndent();
  273. }
  274. while (!m_indents.empty() &&
  275. m_indents.top()->status == IndentMarker::INVALID) {
  276. PopIndent();
  277. }
  278. }
  279. void Scanner::PopAllIndents() {
  280. // are we in flow?
  281. if (InFlowContext()) {
  282. return;
  283. }
  284. // now pop away
  285. while (!m_indents.empty()) {
  286. const IndentMarker& indent = *m_indents.top();
  287. if (indent.type == IndentMarker::NONE) {
  288. break;
  289. }
  290. PopIndent();
  291. }
  292. }
  293. void Scanner::PopIndent() {
  294. const IndentMarker& indent = *m_indents.top();
  295. m_indents.pop();
  296. if (indent.status != IndentMarker::VALID) {
  297. InvalidateSimpleKey();
  298. return;
  299. }
  300. if (indent.type == IndentMarker::SEQ) {
  301. m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
  302. } else if (indent.type == IndentMarker::MAP) {
  303. m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
  304. }
  305. }
  306. int Scanner::GetTopIndent() const {
  307. if (m_indents.empty()) {
  308. return 0;
  309. }
  310. return m_indents.top()->column;
  311. }
  312. void Scanner::ThrowParserException(const std::string& msg) const {
  313. Mark mark = Mark::null_mark();
  314. if (!m_tokens.empty()) {
  315. const Token& token = m_tokens.front();
  316. mark = token.mark;
  317. }
  318. throw ParserException(mark, msg);
  319. }
  320. } // namespace YAML