123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- #include <cassert>
- #include <memory>
- #include "exp.h"
- #include "scanner.h"
- #include "token.h"
- #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
- namespace YAML {
- Scanner::Scanner(std::istream& in)
- : INPUT(in),
- m_tokens{},
- m_startedStream(false),
- m_endedStream(false),
- m_simpleKeyAllowed(false),
- m_canBeJSONFlow(false),
- m_simpleKeys{},
- m_indents{},
- m_indentRefs{},
- m_flows{} {}
- Scanner::~Scanner() = default;
- bool Scanner::empty() {
- EnsureTokensInQueue();
- return m_tokens.empty();
- }
- void Scanner::pop() {
- EnsureTokensInQueue();
- if (!m_tokens.empty())
- m_tokens.pop();
- }
- Token& Scanner::peek() {
- EnsureTokensInQueue();
- assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
- // just be checking
- // if it's empty before peeking.
- #if 0
- static Token *pLast = 0;
- if(pLast != &m_tokens.front())
- std::cerr << "peek: " << m_tokens.front() << "\n";
- pLast = &m_tokens.front();
- #endif
- return m_tokens.front();
- }
- Mark Scanner::mark() const { return INPUT.mark(); }
- void Scanner::EnsureTokensInQueue() {
- while (true) {
- if (!m_tokens.empty()) {
- Token& token = m_tokens.front();
- // if this guy's valid, then we're done
- if (token.status == Token::VALID) {
- return;
- }
- // here's where we clean up the impossible tokens
- if (token.status == Token::INVALID) {
- m_tokens.pop();
- continue;
- }
- // note: what's left are the unverified tokens
- }
- // no token? maybe we've actually finished
- if (m_endedStream) {
- return;
- }
- // no? then scan...
- ScanNextToken();
- }
- }
- void Scanner::ScanNextToken() {
- if (m_endedStream) {
- return;
- }
- if (!m_startedStream) {
- return StartStream();
- }
- // get rid of whitespace, etc. (in between tokens it should be irrelevant)
- ScanToNextToken();
- // maybe need to end some blocks
- PopIndentToHere();
- // *****
- // And now branch based on the next few characters!
- // *****
- // end of stream
- if (!INPUT) {
- return EndStream();
- }
- if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
- return ScanDirective();
- }
- // document token
- if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
- return ScanDocStart();
- }
- if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
- return ScanDocEnd();
- }
- // flow start/end/entry
- if (INPUT.peek() == Keys::FlowSeqStart ||
- INPUT.peek() == Keys::FlowMapStart) {
- return ScanFlowStart();
- }
- if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
- return ScanFlowEnd();
- }
- if (INPUT.peek() == Keys::FlowEntry) {
- return ScanFlowEntry();
- }
- // block/map stuff
- if (Exp::BlockEntry().Matches(INPUT)) {
- return ScanBlockEntry();
- }
- if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
- return ScanKey();
- }
- if (GetValueRegex().Matches(INPUT)) {
- return ScanValue();
- }
- // alias/anchor
- if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
- return ScanAnchorOrAlias();
- }
- // tag
- if (INPUT.peek() == Keys::Tag) {
- return ScanTag();
- }
- // special scalars
- if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
- INPUT.peek() == Keys::FoldedScalar)) {
- return ScanBlockScalar();
- }
- if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
- return ScanQuotedScalar();
- }
- // plain scalars
- if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
- .Matches(INPUT)) {
- return ScanPlainScalar();
- }
- // don't know what it is!
- throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
- }
- void Scanner::ScanToNextToken() {
- while (true) {
- // first eat whitespace
- while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
- if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
- m_simpleKeyAllowed = false;
- }
- INPUT.eat(1);
- }
- // then eat a comment
- if (Exp::Comment().Matches(INPUT)) {
- // eat until line break
- while (INPUT && !Exp::Break().Matches(INPUT)) {
- INPUT.eat(1);
- }
- }
- // if it's NOT a line break, then we're done!
- if (!Exp::Break().Matches(INPUT)) {
- break;
- }
- // otherwise, let's eat the line break and keep going
- int n = Exp::Break().Match(INPUT);
- INPUT.eat(n);
- // oh yeah, and let's get rid of that simple key
- InvalidateSimpleKey();
- // new line - we may be able to accept a simple key now
- if (InBlockContext()) {
- m_simpleKeyAllowed = true;
- }
- }
- }
- ///////////////////////////////////////////////////////////////////////
- // Misc. helpers
- // IsWhitespaceToBeEaten
- // . We can eat whitespace if it's a space or tab
- // . Note: originally tabs in block context couldn't be eaten
- // "where a simple key could be allowed
- // (i.e., not at the beginning of a line, or following '-', '?', or
- // ':')"
- // I think this is wrong, since tabs can be non-content whitespace; it's just
- // that they can't contribute to indentation, so once you've seen a tab in a
- // line, you can't start a simple key
- bool Scanner::IsWhitespaceToBeEaten(char ch) {
- if (ch == ' ') {
- return true;
- }
- if (ch == '\t') {
- return true;
- }
- return false;
- }
- const RegEx& Scanner::GetValueRegex() const {
- if (InBlockContext()) {
- return Exp::Value();
- }
- return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
- }
- void Scanner::StartStream() {
- m_startedStream = true;
- m_simpleKeyAllowed = true;
- std::unique_ptr<IndentMarker> pIndent(
- new IndentMarker(-1, IndentMarker::NONE));
- m_indentRefs.push_back(std::move(pIndent));
- m_indents.push(&m_indentRefs.back());
- }
- void Scanner::EndStream() {
- // force newline
- if (INPUT.column() > 0) {
- INPUT.ResetColumn();
- }
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_endedStream = true;
- }
- Token* Scanner::PushToken(Token::TYPE type) {
- m_tokens.push(Token(type, INPUT.mark()));
- return &m_tokens.back();
- }
- Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
- switch (type) {
- case IndentMarker::SEQ:
- return Token::BLOCK_SEQ_START;
- case IndentMarker::MAP:
- return Token::BLOCK_MAP_START;
- case IndentMarker::NONE:
- assert(false);
- break;
- }
- assert(false);
- throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
- }
- Scanner::IndentMarker* Scanner::PushIndentTo(int column,
- IndentMarker::INDENT_TYPE type) {
- // are we in flow?
- if (InFlowContext()) {
- return nullptr;
- }
- std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
- IndentMarker& indent = *pIndent;
- const IndentMarker& lastIndent = *m_indents.top();
- // is this actually an indentation?
- if (indent.column < lastIndent.column) {
- return nullptr;
- }
- if (indent.column == lastIndent.column &&
- !(indent.type == IndentMarker::SEQ &&
- lastIndent.type == IndentMarker::MAP)) {
- return nullptr;
- }
- // push a start token
- indent.pStartToken = PushToken(GetStartTokenFor(type));
- // and then the indent
- m_indents.push(&indent);
- m_indentRefs.push_back(std::move(pIndent));
- return &m_indentRefs.back();
- }
- void Scanner::PopIndentToHere() {
- // are we in flow?
- if (InFlowContext()) {
- return;
- }
- // now pop away
- while (!m_indents.empty()) {
- const IndentMarker& indent = *m_indents.top();
- if (indent.column < INPUT.column()) {
- break;
- }
- if (indent.column == INPUT.column() &&
- !(indent.type == IndentMarker::SEQ &&
- !Exp::BlockEntry().Matches(INPUT))) {
- break;
- }
- PopIndent();
- }
- while (!m_indents.empty() &&
- m_indents.top()->status == IndentMarker::INVALID) {
- PopIndent();
- }
- }
- void Scanner::PopAllIndents() {
- // are we in flow?
- if (InFlowContext()) {
- return;
- }
- // now pop away
- while (!m_indents.empty()) {
- const IndentMarker& indent = *m_indents.top();
- if (indent.type == IndentMarker::NONE) {
- break;
- }
- PopIndent();
- }
- }
- void Scanner::PopIndent() {
- const IndentMarker& indent = *m_indents.top();
- m_indents.pop();
- if (indent.status != IndentMarker::VALID) {
- InvalidateSimpleKey();
- return;
- }
- if (indent.type == IndentMarker::SEQ) {
- m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
- } else if (indent.type == IndentMarker::MAP) {
- m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
- }
- }
- int Scanner::GetTopIndent() const {
- if (m_indents.empty()) {
- return 0;
- }
- return m_indents.top()->column;
- }
- void Scanner::ThrowParserException(const std::string& msg) const {
- Mark mark = Mark::null_mark();
- if (!m_tokens.empty()) {
- const Token& token = m_tokens.front();
- mark = token.mark;
- }
- throw ParserException(mark, msg);
- }
- } // namespace YAML
|