123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- #include "scanscalar.h"
- #include <algorithm>
- #include "exp.h"
- #include "regeximpl.h"
- #include "stream.h"
- #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
- namespace YAML {
- // ScanScalar
- // . This is where the scalar magic happens.
- //
- // . We do the scanning in three phases:
- // 1. Scan until newline
- // 2. Eat newline
- // 3. Scan leading blanks.
- //
- // . Depending on the parameters given, we store or stop
- // and different places in the above flow.
- std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
- bool foundNonEmptyLine = false;
- bool pastOpeningBreak = (params.fold == FOLD_FLOW);
- bool emptyLine = false, moreIndented = false;
- int foldedNewlineCount = 0;
- bool foldedNewlineStartedMoreIndented = false;
- std::size_t lastEscapedChar = std::string::npos;
- std::string scalar;
- params.leadingSpaces = false;
- if (!params.end) {
- params.end = &Exp::Empty();
- }
- while (INPUT) {
- // ********************************
- // Phase #1: scan until line ending
- std::size_t lastNonWhitespaceChar = scalar.size();
- bool escapedNewline = false;
- while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
- if (!INPUT) {
- break;
- }
- // document indicator?
- if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
- if (params.onDocIndicator == BREAK) {
- break;
- } else if (params.onDocIndicator == THROW) {
- throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
- }
- }
- foundNonEmptyLine = true;
- pastOpeningBreak = true;
- // escaped newline? (only if we're escaping on slash)
- if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
- // eat escape character and get out (but preserve trailing whitespace!)
- INPUT.get();
- lastNonWhitespaceChar = scalar.size();
- lastEscapedChar = scalar.size();
- escapedNewline = true;
- break;
- }
- // escape this?
- if (INPUT.peek() == params.escape) {
- scalar += Exp::Escape(INPUT);
- lastNonWhitespaceChar = scalar.size();
- lastEscapedChar = scalar.size();
- continue;
- }
- // otherwise, just add the damn character
- char ch = INPUT.get();
- scalar += ch;
- if (ch != ' ' && ch != '\t') {
- lastNonWhitespaceChar = scalar.size();
- }
- }
- // eof? if we're looking to eat something, then we throw
- if (!INPUT) {
- if (params.eatEnd) {
- throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
- }
- break;
- }
- // doc indicator?
- if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
- Exp::DocIndicator().Matches(INPUT)) {
- break;
- }
- // are we done via character match?
- int n = params.end->Match(INPUT);
- if (n >= 0) {
- if (params.eatEnd) {
- INPUT.eat(n);
- }
- break;
- }
- // do we remove trailing whitespace?
- if (params.fold == FOLD_FLOW)
- scalar.erase(lastNonWhitespaceChar);
- // ********************************
- // Phase #2: eat line ending
- n = Exp::Break().Match(INPUT);
- INPUT.eat(n);
- // ********************************
- // Phase #3: scan initial spaces
- // first the required indentation
- while (INPUT.peek() == ' ' &&
- (INPUT.column() < params.indent ||
- (params.detectIndent && !foundNonEmptyLine)) &&
- !params.end->Matches(INPUT)) {
- INPUT.eat(1);
- }
- // update indent if we're auto-detecting
- if (params.detectIndent && !foundNonEmptyLine) {
- params.indent = std::max(params.indent, INPUT.column());
- }
- // and then the rest of the whitespace
- while (Exp::Blank().Matches(INPUT)) {
- // we check for tabs that masquerade as indentation
- if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
- params.onTabInIndentation == THROW) {
- throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
- }
- if (!params.eatLeadingWhitespace) {
- break;
- }
- if (params.end->Matches(INPUT)) {
- break;
- }
- INPUT.eat(1);
- }
- // was this an empty line?
- bool nextEmptyLine = Exp::Break().Matches(INPUT);
- bool nextMoreIndented = Exp::Blank().Matches(INPUT);
- if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
- foldedNewlineStartedMoreIndented = moreIndented;
- // for block scalars, we always start with a newline, so we should ignore it
- // (not fold or keep)
- if (pastOpeningBreak) {
- switch (params.fold) {
- case DONT_FOLD:
- scalar += "\n";
- break;
- case FOLD_BLOCK:
- if (!emptyLine && !nextEmptyLine && !moreIndented &&
- !nextMoreIndented && INPUT.column() >= params.indent) {
- scalar += " ";
- } else if (nextEmptyLine) {
- foldedNewlineCount++;
- } else {
- scalar += "\n";
- }
- if (!nextEmptyLine && foldedNewlineCount > 0) {
- scalar += std::string(foldedNewlineCount - 1, '\n');
- if (foldedNewlineStartedMoreIndented ||
- nextMoreIndented | !foundNonEmptyLine) {
- scalar += "\n";
- }
- foldedNewlineCount = 0;
- }
- break;
- case FOLD_FLOW:
- if (nextEmptyLine) {
- scalar += "\n";
- } else if (!emptyLine && !nextEmptyLine && !escapedNewline) {
- scalar += " ";
- }
- break;
- }
- }
- emptyLine = nextEmptyLine;
- moreIndented = nextMoreIndented;
- pastOpeningBreak = true;
- // are we done via indentation?
- if (!emptyLine && INPUT.column() < params.indent) {
- params.leadingSpaces = true;
- break;
- }
- }
- // post-processing
- if (params.trimTrailingSpaces) {
- std::size_t pos = scalar.find_last_not_of(' ');
- if (lastEscapedChar != std::string::npos) {
- if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
- }
- }
- if (pos < scalar.size()) {
- scalar.erase(pos + 1);
- }
- }
- switch (params.chomp) {
- case CLIP: {
- std::size_t pos = scalar.find_last_not_of('\n');
- if (lastEscapedChar != std::string::npos) {
- if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
- }
- }
- if (pos == std::string::npos) {
- scalar.erase();
- } else if (pos + 1 < scalar.size()) {
- scalar.erase(pos + 2);
- }
- } break;
- case STRIP: {
- std::size_t pos = scalar.find_last_not_of('\n');
- if (lastEscapedChar != std::string::npos) {
- if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
- }
- }
- if (pos == std::string::npos) {
- scalar.erase();
- } else if (pos < scalar.size()) {
- scalar.erase(pos + 1);
- }
- } break;
- default:
- break;
- }
- return scalar;
- }
- }
|