scanscalar.cpp 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #include "scanscalar.h"
  2. #include <algorithm>
  3. #include "exp.h"
  4. #include "regeximpl.h"
  5. #include "stream.h"
  6. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  7. namespace YAML {
  8. // ScanScalar
  9. // . This is where the scalar magic happens.
  10. //
  11. // . We do the scanning in three phases:
  12. // 1. Scan until newline
  13. // 2. Eat newline
  14. // 3. Scan leading blanks.
  15. //
  16. // . Depending on the parameters given, we store or stop
  17. // and different places in the above flow.
  18. std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
  19. bool foundNonEmptyLine = false;
  20. bool pastOpeningBreak = (params.fold == FOLD_FLOW);
  21. bool emptyLine = false, moreIndented = false;
  22. int foldedNewlineCount = 0;
  23. bool foldedNewlineStartedMoreIndented = false;
  24. std::size_t lastEscapedChar = std::string::npos;
  25. std::string scalar;
  26. params.leadingSpaces = false;
  27. if (!params.end) {
  28. params.end = &Exp::Empty();
  29. }
  30. while (INPUT) {
  31. // ********************************
  32. // Phase #1: scan until line ending
  33. std::size_t lastNonWhitespaceChar = scalar.size();
  34. bool escapedNewline = false;
  35. while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
  36. if (!INPUT) {
  37. break;
  38. }
  39. // document indicator?
  40. if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
  41. if (params.onDocIndicator == BREAK) {
  42. break;
  43. } else if (params.onDocIndicator == THROW) {
  44. throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
  45. }
  46. }
  47. foundNonEmptyLine = true;
  48. pastOpeningBreak = true;
  49. // escaped newline? (only if we're escaping on slash)
  50. if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
  51. // eat escape character and get out (but preserve trailing whitespace!)
  52. INPUT.get();
  53. lastNonWhitespaceChar = scalar.size();
  54. lastEscapedChar = scalar.size();
  55. escapedNewline = true;
  56. break;
  57. }
  58. // escape this?
  59. if (INPUT.peek() == params.escape) {
  60. scalar += Exp::Escape(INPUT);
  61. lastNonWhitespaceChar = scalar.size();
  62. lastEscapedChar = scalar.size();
  63. continue;
  64. }
  65. // otherwise, just add the damn character
  66. char ch = INPUT.get();
  67. scalar += ch;
  68. if (ch != ' ' && ch != '\t') {
  69. lastNonWhitespaceChar = scalar.size();
  70. }
  71. }
  72. // eof? if we're looking to eat something, then we throw
  73. if (!INPUT) {
  74. if (params.eatEnd) {
  75. throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
  76. }
  77. break;
  78. }
  79. // doc indicator?
  80. if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
  81. Exp::DocIndicator().Matches(INPUT)) {
  82. break;
  83. }
  84. // are we done via character match?
  85. int n = params.end->Match(INPUT);
  86. if (n >= 0) {
  87. if (params.eatEnd) {
  88. INPUT.eat(n);
  89. }
  90. break;
  91. }
  92. // do we remove trailing whitespace?
  93. if (params.fold == FOLD_FLOW)
  94. scalar.erase(lastNonWhitespaceChar);
  95. // ********************************
  96. // Phase #2: eat line ending
  97. n = Exp::Break().Match(INPUT);
  98. INPUT.eat(n);
  99. // ********************************
  100. // Phase #3: scan initial spaces
  101. // first the required indentation
  102. while (INPUT.peek() == ' ' &&
  103. (INPUT.column() < params.indent ||
  104. (params.detectIndent && !foundNonEmptyLine)) &&
  105. !params.end->Matches(INPUT)) {
  106. INPUT.eat(1);
  107. }
  108. // update indent if we're auto-detecting
  109. if (params.detectIndent && !foundNonEmptyLine) {
  110. params.indent = std::max(params.indent, INPUT.column());
  111. }
  112. // and then the rest of the whitespace
  113. while (Exp::Blank().Matches(INPUT)) {
  114. // we check for tabs that masquerade as indentation
  115. if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
  116. params.onTabInIndentation == THROW) {
  117. throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
  118. }
  119. if (!params.eatLeadingWhitespace) {
  120. break;
  121. }
  122. if (params.end->Matches(INPUT)) {
  123. break;
  124. }
  125. INPUT.eat(1);
  126. }
  127. // was this an empty line?
  128. bool nextEmptyLine = Exp::Break().Matches(INPUT);
  129. bool nextMoreIndented = Exp::Blank().Matches(INPUT);
  130. if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
  131. foldedNewlineStartedMoreIndented = moreIndented;
  132. // for block scalars, we always start with a newline, so we should ignore it
  133. // (not fold or keep)
  134. if (pastOpeningBreak) {
  135. switch (params.fold) {
  136. case DONT_FOLD:
  137. scalar += "\n";
  138. break;
  139. case FOLD_BLOCK:
  140. if (!emptyLine && !nextEmptyLine && !moreIndented &&
  141. !nextMoreIndented && INPUT.column() >= params.indent) {
  142. scalar += " ";
  143. } else if (nextEmptyLine) {
  144. foldedNewlineCount++;
  145. } else {
  146. scalar += "\n";
  147. }
  148. if (!nextEmptyLine && foldedNewlineCount > 0) {
  149. scalar += std::string(foldedNewlineCount - 1, '\n');
  150. if (foldedNewlineStartedMoreIndented ||
  151. nextMoreIndented | !foundNonEmptyLine) {
  152. scalar += "\n";
  153. }
  154. foldedNewlineCount = 0;
  155. }
  156. break;
  157. case FOLD_FLOW:
  158. if (nextEmptyLine) {
  159. scalar += "\n";
  160. } else if (!emptyLine && !nextEmptyLine && !escapedNewline) {
  161. scalar += " ";
  162. }
  163. break;
  164. }
  165. }
  166. emptyLine = nextEmptyLine;
  167. moreIndented = nextMoreIndented;
  168. pastOpeningBreak = true;
  169. // are we done via indentation?
  170. if (!emptyLine && INPUT.column() < params.indent) {
  171. params.leadingSpaces = true;
  172. break;
  173. }
  174. }
  175. // post-processing
  176. if (params.trimTrailingSpaces) {
  177. std::size_t pos = scalar.find_last_not_of(' ');
  178. if (lastEscapedChar != std::string::npos) {
  179. if (pos < lastEscapedChar || pos == std::string::npos) {
  180. pos = lastEscapedChar;
  181. }
  182. }
  183. if (pos < scalar.size()) {
  184. scalar.erase(pos + 1);
  185. }
  186. }
  187. switch (params.chomp) {
  188. case CLIP: {
  189. std::size_t pos = scalar.find_last_not_of('\n');
  190. if (lastEscapedChar != std::string::npos) {
  191. if (pos < lastEscapedChar || pos == std::string::npos) {
  192. pos = lastEscapedChar;
  193. }
  194. }
  195. if (pos == std::string::npos) {
  196. scalar.erase();
  197. } else if (pos + 1 < scalar.size()) {
  198. scalar.erase(pos + 2);
  199. }
  200. } break;
  201. case STRIP: {
  202. std::size_t pos = scalar.find_last_not_of('\n');
  203. if (lastEscapedChar != std::string::npos) {
  204. if (pos < lastEscapedChar || pos == std::string::npos) {
  205. pos = lastEscapedChar;
  206. }
  207. }
  208. if (pos == std::string::npos) {
  209. scalar.erase();
  210. } else if (pos < scalar.size()) {
  211. scalar.erase(pos + 1);
  212. }
  213. } break;
  214. default:
  215. break;
  216. }
  217. return scalar;
  218. }
  219. }