123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437 |
- #include <sstream>
- #include "exp.h"
- #include "regex_yaml.h"
- #include "regeximpl.h"
- #include "scanner.h"
- #include "scanscalar.h"
- #include "scantag.h" // IWYU pragma: keep
- #include "tag.h" // IWYU pragma: keep
- #include "token.h"
- #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
- #include "yaml-cpp/mark.h"
- namespace YAML {
- ///////////////////////////////////////////////////////////////////////
- // Specialization for scanning specific tokens
- // Directive
- // . Note: no semantic checking is done here (that's for the parser to do)
- void Scanner::ScanDirective() {
- std::string name;
- std::vector<std::string> params;
- // pop indents and simple keys
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
- // store pos and eat indicator
- Token token(Token::DIRECTIVE, INPUT.mark());
- INPUT.eat(1);
- // read name
- while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
- token.value += INPUT.get();
- // read parameters
- while (1) {
- // first get rid of whitespace
- while (Exp::Blank().Matches(INPUT))
- INPUT.eat(1);
- // break on newline or comment
- if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
- break;
- // now read parameter
- std::string param;
- while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
- param += INPUT.get();
- token.params.push_back(param);
- }
- m_tokens.push(token);
- }
- // DocStart
- void Scanner::ScanDocStart() {
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(3);
- m_tokens.push(Token(Token::DOC_START, mark));
- }
- // DocEnd
- void Scanner::ScanDocEnd() {
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(3);
- m_tokens.push(Token(Token::DOC_END, mark));
- }
- // FlowStart
- void Scanner::ScanFlowStart() {
- // flows can be simple keys
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
- // eat
- Mark mark = INPUT.mark();
- char ch = INPUT.get();
- FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
- m_flows.push(flowType);
- Token::TYPE type =
- (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
- m_tokens.push(Token(type, mark));
- }
- // FlowEnd
- void Scanner::ScanFlowEnd() {
- if (InBlockContext())
- throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
- // we might have a solo entry in the flow context
- if (InFlowContext()) {
- if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
- m_tokens.push(Token(Token::VALUE, INPUT.mark()));
- else if (m_flows.top() == FLOW_SEQ)
- InvalidateSimpleKey();
- }
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = true;
- // eat
- Mark mark = INPUT.mark();
- char ch = INPUT.get();
- // check that it matches the start
- FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
- if (m_flows.top() != flowType)
- throw ParserException(mark, ErrorMsg::FLOW_END);
- m_flows.pop();
- Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
- m_tokens.push(Token(type, mark));
- }
- // FlowEntry
- void Scanner::ScanFlowEntry() {
- // we might have a solo entry in the flow context
- if (InFlowContext()) {
- if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
- m_tokens.push(Token(Token::VALUE, INPUT.mark()));
- else if (m_flows.top() == FLOW_SEQ)
- InvalidateSimpleKey();
- }
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::FLOW_ENTRY, mark));
- }
- // BlockEntry
- void Scanner::ScanBlockEntry() {
- // we better be in the block context!
- if (InFlowContext())
- throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
- // can we put it here?
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
- PushIndentTo(INPUT.column(), IndentMarker::SEQ);
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
- }
- // Key
- void Scanner::ScanKey() {
- // handle keys diffently in the block context (and manage indents)
- if (InBlockContext()) {
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
- PushIndentTo(INPUT.column(), IndentMarker::MAP);
- }
- // can only put a simple key here if we're in block context
- m_simpleKeyAllowed = InBlockContext();
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::KEY, mark));
- }
- // Value
- void Scanner::ScanValue() {
- // and check that simple key
- bool isSimpleKey = VerifySimpleKey();
- m_canBeJSONFlow = false;
- if (isSimpleKey) {
- // can't follow a simple key with another simple key (dunno why, though - it
- // seems fine)
- m_simpleKeyAllowed = false;
- } else {
- // handle values diffently in the block context (and manage indents)
- if (InBlockContext()) {
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
- PushIndentTo(INPUT.column(), IndentMarker::MAP);
- }
- // can only put a simple key here if we're in block context
- m_simpleKeyAllowed = InBlockContext();
- }
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::VALUE, mark));
- }
- // AnchorOrAlias
- void Scanner::ScanAnchorOrAlias() {
- bool alias;
- std::string name;
- // insert a potential simple key
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
- // eat the indicator
- Mark mark = INPUT.mark();
- char indicator = INPUT.get();
- alias = (indicator == Keys::Alias);
- // now eat the content
- while (INPUT && Exp::Anchor().Matches(INPUT))
- name += INPUT.get();
- // we need to have read SOMETHING!
- if (name.empty())
- throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
- : ErrorMsg::ANCHOR_NOT_FOUND);
- // and needs to end correctly
- if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
- throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
- : ErrorMsg::CHAR_IN_ANCHOR);
- // and we're done
- Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
- token.value = name;
- m_tokens.push(token);
- }
- // Tag
- void Scanner::ScanTag() {
- // insert a potential simple key
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
- Token token(Token::TAG, INPUT.mark());
- // eat the indicator
- INPUT.get();
- if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
- std::string tag = ScanVerbatimTag(INPUT);
- token.value = tag;
- token.data = Tag::VERBATIM;
- } else {
- bool canBeHandle;
- token.value = ScanTagHandle(INPUT, canBeHandle);
- if (!canBeHandle && token.value.empty())
- token.data = Tag::NON_SPECIFIC;
- else if (token.value.empty())
- token.data = Tag::SECONDARY_HANDLE;
- else
- token.data = Tag::PRIMARY_HANDLE;
- // is there a suffix?
- if (canBeHandle && INPUT.peek() == Keys::Tag) {
- // eat the indicator
- INPUT.get();
- token.params.push_back(ScanTagSuffix(INPUT));
- token.data = Tag::NAMED_HANDLE;
- }
- }
- m_tokens.push(token);
- }
- // PlainScalar
- void Scanner::ScanPlainScalar() {
- std::string scalar;
- // set up the scanning parameters
- ScanScalarParams params;
- params.end =
- (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
- params.eatEnd = false;
- params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
- params.fold = FOLD_FLOW;
- params.eatLeadingWhitespace = true;
- params.trimTrailingSpaces = true;
- params.chomp = STRIP;
- params.onDocIndicator = BREAK;
- params.onTabInIndentation = THROW;
- // insert a potential simple key
- InsertPotentialSimpleKey();
- Mark mark = INPUT.mark();
- scalar = ScanScalar(INPUT, params);
- // can have a simple key only if we ended the scalar by starting a new line
- m_simpleKeyAllowed = params.leadingSpaces;
- m_canBeJSONFlow = false;
- // finally, check and see if we ended on an illegal character
- // if(Exp::IllegalCharInScalar.Matches(INPUT))
- // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
- Token token(Token::PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
- }
- // QuotedScalar
- void Scanner::ScanQuotedScalar() {
- std::string scalar;
- // peek at single or double quote (don't eat because we need to preserve (for
- // the time being) the input position)
- char quote = INPUT.peek();
- bool single = (quote == '\'');
- // setup the scanning parameters
- ScanScalarParams params;
- RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
- params.end = &end;
- params.eatEnd = true;
- params.escape = (single ? '\'' : '\\');
- params.indent = 0;
- params.fold = FOLD_FLOW;
- params.eatLeadingWhitespace = true;
- params.trimTrailingSpaces = false;
- params.chomp = CLIP;
- params.onDocIndicator = THROW;
- // insert a potential simple key
- InsertPotentialSimpleKey();
- Mark mark = INPUT.mark();
- // now eat that opening quote
- INPUT.get();
- // and scan
- scalar = ScanScalar(INPUT, params);
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = true;
- Token token(Token::NON_PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
- }
- // BlockScalarToken
- // . These need a little extra processing beforehand.
- // . We need to scan the line where the indicator is (this doesn't count as part
- // of the scalar),
- // and then we need to figure out what level of indentation we'll be using.
- void Scanner::ScanBlockScalar() {
- std::string scalar;
- ScanScalarParams params;
- params.indent = 1;
- params.detectIndent = true;
- // eat block indicator ('|' or '>')
- Mark mark = INPUT.mark();
- char indicator = INPUT.get();
- params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
- // eat chomping/indentation indicators
- params.chomp = CLIP;
- int n = Exp::Chomp().Match(INPUT);
- for (int i = 0; i < n; i++) {
- char ch = INPUT.get();
- if (ch == '+')
- params.chomp = KEEP;
- else if (ch == '-')
- params.chomp = STRIP;
- else if (Exp::Digit().Matches(ch)) {
- if (ch == '0')
- throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
- params.indent = ch - '0';
- params.detectIndent = false;
- }
- }
- // now eat whitespace
- while (Exp::Blank().Matches(INPUT))
- INPUT.eat(1);
- // and comments to the end of the line
- if (Exp::Comment().Matches(INPUT))
- while (INPUT && !Exp::Break().Matches(INPUT))
- INPUT.eat(1);
- // if it's not a line break, then we ran into a bad character inline
- if (INPUT && !Exp::Break().Matches(INPUT))
- throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
- // set the initial indentation
- if (GetTopIndent() >= 0)
- params.indent += GetTopIndent();
- params.eatLeadingWhitespace = false;
- params.trimTrailingSpaces = false;
- params.onTabInIndentation = THROW;
- scalar = ScanScalar(INPUT, params);
- // simple keys always ok after block scalars (since we're gonna start a new
- // line anyways)
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
- Token token(Token::NON_PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
- }
- }
|