regeximpl.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  2. #define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
  3. #if defined(_MSC_VER) || \
  4. (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
  5. (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
  6. #pragma once
  7. #endif
  8. #include "stream.h"
  9. #include "stringsource.h"
  10. #include "streamcharsource.h"
  11. namespace YAML {
  12. // query matches
  13. inline bool RegEx::Matches(char ch) const {
  14. std::string str;
  15. str += ch;
  16. return Matches(str);
  17. }
  18. inline bool RegEx::Matches(const std::string& str) const {
  19. return Match(str) >= 0;
  20. }
  21. inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
  22. template <typename Source>
  23. inline bool RegEx::Matches(const Source& source) const {
  24. return Match(source) >= 0;
  25. }
  26. // Match
  27. // . Matches the given string against this regular expression.
  28. // . Returns the number of characters matched.
  29. // . Returns -1 if no characters were matched (the reason for
  30. // not returning zero is that we may have an empty regex
  31. // which is ALWAYS successful at matching zero characters).
  32. // . REMEMBER that we only match from the start of the buffer!
  33. inline int RegEx::Match(const std::string& str) const {
  34. StringCharSource source(str.c_str(), str.size());
  35. return Match(source);
  36. }
  37. inline int RegEx::Match(const Stream& in) const {
  38. StreamCharSource source(in);
  39. return Match(source);
  40. }
  41. template <typename Source>
  42. inline bool RegEx::IsValidSource(const Source& source) const {
  43. return source;
  44. }
  45. template <>
  46. inline bool RegEx::IsValidSource<StringCharSource>(
  47. const StringCharSource& source) const {
  48. switch (m_op) {
  49. case REGEX_MATCH:
  50. case REGEX_RANGE:
  51. return source;
  52. default:
  53. return true;
  54. }
  55. }
  56. template <typename Source>
  57. inline int RegEx::Match(const Source& source) const {
  58. return IsValidSource(source) ? MatchUnchecked(source) : -1;
  59. }
  60. template <typename Source>
  61. inline int RegEx::MatchUnchecked(const Source& source) const {
  62. switch (m_op) {
  63. case REGEX_EMPTY:
  64. return MatchOpEmpty(source);
  65. case REGEX_MATCH:
  66. return MatchOpMatch(source);
  67. case REGEX_RANGE:
  68. return MatchOpRange(source);
  69. case REGEX_OR:
  70. return MatchOpOr(source);
  71. case REGEX_AND:
  72. return MatchOpAnd(source);
  73. case REGEX_NOT:
  74. return MatchOpNot(source);
  75. case REGEX_SEQ:
  76. return MatchOpSeq(source);
  77. }
  78. return -1;
  79. }
  80. //////////////////////////////////////////////////////////////////////////////
  81. // Operators
  82. // Note: the convention MatchOp*<Source> is that we can assume
  83. // IsSourceValid(source).
  84. // So we do all our checks *before* we call these functions
  85. // EmptyOperator
  86. template <typename Source>
  87. inline int RegEx::MatchOpEmpty(const Source& source) const {
  88. return source[0] == Stream::eof() ? 0 : -1;
  89. }
  90. template <>
  91. inline int RegEx::MatchOpEmpty<StringCharSource>(
  92. const StringCharSource& source) const {
  93. return !source
  94. ? 0
  95. : -1; // the empty regex only is successful on the empty string
  96. }
  97. // MatchOperator
  98. template <typename Source>
  99. inline int RegEx::MatchOpMatch(const Source& source) const {
  100. if (source[0] != m_a)
  101. return -1;
  102. return 1;
  103. }
  104. // RangeOperator
  105. template <typename Source>
  106. inline int RegEx::MatchOpRange(const Source& source) const {
  107. if (m_a > source[0] || m_z < source[0])
  108. return -1;
  109. return 1;
  110. }
  111. // OrOperator
  112. template <typename Source>
  113. inline int RegEx::MatchOpOr(const Source& source) const {
  114. for (std::size_t i = 0; i < m_params.size(); i++) {
  115. int n = m_params[i].MatchUnchecked(source);
  116. if (n >= 0)
  117. return n;
  118. }
  119. return -1;
  120. }
  121. // AndOperator
  122. // Note: 'AND' is a little funny, since we may be required to match things
  123. // of different lengths. If we find a match, we return the length of
  124. // the FIRST entry on the list.
  125. template <typename Source>
  126. inline int RegEx::MatchOpAnd(const Source& source) const {
  127. int first = -1;
  128. for (std::size_t i = 0; i < m_params.size(); i++) {
  129. int n = m_params[i].MatchUnchecked(source);
  130. if (n == -1)
  131. return -1;
  132. if (i == 0)
  133. first = n;
  134. }
  135. return first;
  136. }
  137. // NotOperator
  138. template <typename Source>
  139. inline int RegEx::MatchOpNot(const Source& source) const {
  140. if (m_params.empty())
  141. return -1;
  142. if (m_params[0].MatchUnchecked(source) >= 0)
  143. return -1;
  144. return 1;
  145. }
  146. // SeqOperator
  147. template <typename Source>
  148. inline int RegEx::MatchOpSeq(const Source& source) const {
  149. int offset = 0;
  150. for (std::size_t i = 0; i < m_params.size(); i++) {
  151. int n = m_params[i].Match(source + offset); // note Match, not
  152. // MatchUnchecked because we
  153. // need to check validity after
  154. // the offset
  155. if (n == -1)
  156. return -1;
  157. offset += n;
  158. }
  159. return offset;
  160. }
  161. }
  162. #endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66