Jelajahi Sumber

* Added a generic 'delimiter-separated values' string parser to strlib.c/h.

git-svn-id: https://svn.code.sf.net/p/rathena/svn/trunk@11878 54d463be-8e91-2dee-dedb-b68131a5f0ec
FlavioJS 17 tahun lalu
induk
melakukan
6dd13ee535
3 mengubah file dengan 375 tambahan dan 0 penghapusan
  1. 2 0
      Changelog-Trunk.txt
  2. 337 0
      src/common/strlib.c
  3. 36 0
      src/common/strlib.h

+ 2 - 0
Changelog-Trunk.txt

@@ -3,6 +3,8 @@ Date	Added
 AS OF SVN REV. 5091, WE ARE NOW USING TRUNK.  ALL UNTESTED BUGFIXES/FEATURES GO INTO TRUNK.
 IF YOU HAVE A WORKING AND TESTED BUGFIX PUT IT INTO STABLE AS WELL AS TRUNK.
 
+2007/12/09
+	* Added a generic 'delimiter-separated values' string parser to strlib.c/h. [FlavioJS]
 2007/12/07
 	* Fixed ensembles skills.
 	* removed the timer heap correction code when the timers overflow since

+ 337 - 0
src/common/strlib.c

@@ -3,6 +3,7 @@
 
 #include "../common/cbasetypes.h"
 #include "../common/malloc.h"
+#include "../common/showmsg.h"
 #include "strlib.h"
 
 #include <stdio.h>
@@ -362,6 +363,342 @@ int strline(const char* str, size_t pos)
 	return line;
 }
 
+
+
+/////////////////////////////////////////////////////////////////////
+/// Parses a delim-separated string.
+/// Starts parsing at startoff and fills the out_pos array with the start and 
+/// end positions in the string of the line and fields (that fit the array).
+/// Returns the number of fields or -1 if an error occurs.
+/// 
+/// out_pos can be NULL.
+/// Positions out_pos[0] and out_pos[1] are for the line start and end 
+/// positions. If a line terminator is found, the end position is placed there.
+/// The next values of the array are the start and end positions of the fields.
+/// out_pos[2] and out_pos[3] for the first field, out_pos[4] and out_pos[5] 
+/// for the seconds field and so on.
+/// Unfilled positions are set to -1.
+/// 
+/// @param str String to parse
+/// @param len Length of the string
+/// @param startoff Where to start parsing
+/// @param delim Field delimiter
+/// @parem out_pos Array of resulting positions
+/// @param npos Size of the pos array
+/// @param opt Options that determine the parsing behaviour
+/// @return Number of fields in the string or -1 if an error occured
+int sv_parse(const char* str, int len, int startoff, char delim, int* out_pos, int npos, enum e_svopt opt)
+{
+	int i;
+	int count;
+	enum {
+		START_OF_FIELD,
+		PARSING_FIELD,
+		PARSING_C_ESCAPE,
+		END_OF_FIELD,
+		TERMINATE,
+		END
+	} state;
+
+	// check pos/npos
+	if( out_pos == NULL ) npos = 0;
+	for( i = 0; i < npos; ++i )
+		out_pos[i] = -1;
+
+	// check opt
+	if( delim == '\n' && (opt&(SV_TERMINATE_CRLF|SV_TERMINATE_LF)) )
+	{
+		ShowError("sv_parse: delimiter '\\n' is not compatible with options SV_TERMINATE_LF or SV_TERMINATE_CRLF.\n");
+		return -1;// error
+	}
+	if( delim == '\r' && (opt&(SV_TERMINATE_CRLF|SV_TERMINATE_CR)) )
+	{
+		ShowError("sv_parse: delimiter '\\r' is not compatible with options SV_TERMINATE_CR or SV_TERMINATE_CRLF.\n");
+		return -1;// error
+	}
+
+	// check str
+	if( str == NULL )
+		return 0;// nothing to parse
+
+#define IS_END() ( i >= len )
+#define IS_DELIM() ( str[i] == delim )
+#define IS_TERMINATOR() ( \
+	((opt&SV_TERMINATE_LF) && str[i] == '\n') || \
+	((opt&SV_TERMINATE_CR) && str[i] == '\r') || \
+	((opt&SV_TERMINATE_CRLF) && i+1 < len && str[i] == '\r' && str[i+1] == '\n') )
+#define IS_C_ESCAPE() ( (opt&SV_ESCAPE_C) && str[i] == '\\' )
+#define SET_FIELD_START() if( npos > count*2+2 ) out_pos[count*2+2] = i
+#define SET_FIELD_END() if( npos > count*2+3 ) out_pos[count*2+3] = i; ++count
+
+	i = startoff;
+	count = 0;
+	state = START_OF_FIELD;
+	if( npos > 0 ) out_pos[0] = startoff;// start
+	while( state != END )
+	{
+		if( npos > 1 ) out_pos[1] = i;// end
+		switch( state )
+		{
+		case START_OF_FIELD:// record start of field and start parsing it
+			SET_FIELD_START();
+			state = PARSING_FIELD;
+			break;
+
+		case PARSING_FIELD:// skip field character
+			if( IS_END() || IS_DELIM() || IS_TERMINATOR() )
+				state = END_OF_FIELD;
+			else if( IS_C_ESCAPE() )
+				state = PARSING_C_ESCAPE;
+			else
+				++i;// normal character
+			break;
+
+		case PARSING_C_ESCAPE:// skip escape sequence (validates it too)
+			{
+				++i;// '\\'
+				if( IS_END() )
+				{
+					ShowError("sv_parse: empty escape sequence\n");
+					return -1;
+				}
+				if( str[i] == 'x' )
+				{// hex escape
+					++i;// 'x'
+					if( IS_END() || !ISXDIGIT(str[i]) )
+					{
+						ShowError("sv_parse: \\x with no following hex digits\n");
+						return -1;
+					}
+					do{
+						++i;// hex digit
+					}while( !IS_END() && ISXDIGIT(str[i]));
+				}
+				else if( str[i] == '0' || str[i] == '1' || str[i] == '2' )
+				{// octal escape
+					++i;// octal digit
+					if( !IS_END() && str[i] >= '0' && str[i] <= '7' )
+						++i;// octal digit
+					if( !IS_END() && str[i] >= '0' && str[i] <= '7' )
+						++i;// octal digit
+				}
+				else if( strchr(SV_ESCAPE_C_SUPPORTED, str[i]) )
+				{// supported escape character
+					++i;
+				}
+				else
+				{
+					ShowError("sv_parse: unknown escape sequence \\%c\n", str[i]);
+					return -1;
+				}
+				state = PARSING_FIELD;
+				break;
+			}
+
+		case END_OF_FIELD:// record end of field and continue
+			SET_FIELD_END();
+			if( IS_END() )
+				state = END;
+			else if( IS_DELIM() )
+			{
+				++i;// delim
+				state = START_OF_FIELD;
+			}
+			else if( IS_TERMINATOR() )
+				state = TERMINATE;
+			else
+				state = START_OF_FIELD;
+			break;
+
+		case TERMINATE:
+#if 0
+			// skip line terminator
+			if( (opt&SV_TERMINATE_CRLF) && i+1 < len && str[i] == '\r' && str[i+1] == '\n' )
+				i += 2;// CRLF
+			else
+				++i;// CR or LF
+#endif
+			state = END;
+			break;
+		}
+	}
+
+#undef IS_END
+#undef IS_DELIM
+#undef IS_TERMINATOR
+#undef IS_C_ESCAPE
+#undef SET_FIELD_START
+#undef SET_FIELD_END
+
+	return count;
+}
+
+/// Escapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the escaped string.
+/// out_dest should be len*4+1 in size.
+///
+/// @param out_dest Destination buffer
+/// @param src Source string
+/// @param len Length of the source string
+/// @param escapes Extra characters to be escaped
+/// @return Length of the escaped string
+size_t sv_escape_c(char* out_dest, const char* src, size_t len, const char* escapes)
+{
+	size_t i;
+	size_t j;
+
+	if( out_dest == NULL )
+		return 0;// nothing to do
+	if( src == NULL )
+	{// nothing to escape
+		*out_dest = 0;
+		return 0;
+	}
+	if( escapes == NULL )
+		escapes = "";
+
+	for( i = 0, j = 0; i < len; ++i )
+	{
+		switch( src[i] )
+		{
+		case '\0':// octal 0
+			out_dest[j++] = '\\';
+			out_dest[j++] = '0';
+			out_dest[j++] = '0';
+			out_dest[j++] = '0';
+			break;
+		case '\r':// carriage return
+			out_dest[j++] = '\\';
+			out_dest[j++] = 'r';
+			break;
+		case '\n':// line feed
+			out_dest[j++] = '\\';
+			out_dest[j++] = 'n';
+			break;
+		case '\\':// escape character
+			out_dest[j++] = '\\';
+			out_dest[j++] = '\\';
+			break;
+		default:
+			if( strchr(escapes,src[i]) )
+			{// escapes to octal
+				out_dest[j++] = '\\';
+				out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0700)>>6));
+				out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0070)>>3));
+				out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0007)   ));
+			}
+			else
+				out_dest[j++] = src[i];
+			break;
+		}
+	}
+	out_dest[j] = 0;
+	return j;
+}
+
+/// Unescapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the unescaped string.
+/// out_dest should be len+1 in size and can be the same buffer as src.
+///
+/// @param out_dest Destination buffer
+/// @param src Source string
+/// @param len Length of the source string
+/// @return Length of the escaped string
+size_t sv_unescape_c(char* out_dest, const char* src, size_t len)
+{
+	static unsigned char low2hex[256] = {
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x0?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x1?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x2?
+		0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0,// 0x3?
+		0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x4?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x5?
+		0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x6?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x7?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x8?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x9?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xA?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xB?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xC?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xD?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xE?
+		0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF?
+	};
+	size_t i;
+	size_t j;
+
+	for( i = 0, j = 0; i < len; )
+	{
+		if( src[i] == '\\' )
+		{
+			++i;// '\\'
+			if( i >= len )
+				ShowWarning("sv_unescape_c: empty escape sequence\n");
+			else if( src[i] == 'x' )
+			{// hex escape sequence
+				unsigned char c = 0;
+				unsigned char inrange = 1;
+
+				++i;// 'x'
+				if( i >= len || !ISXDIGIT(src[i]) )
+				{
+					ShowWarning("sv_unescape_c: \\x with no following hex digits\n");
+					continue;
+				}
+				do{
+					if( c > 0x0F && inrange )
+					{
+						ShowWarning("sv_unescape_c: hex escape sequence out of range\n");
+						inrange = 0;
+					}
+					c = (c<<8)|low2hex[(unsigned char)src[i++]];// hex digit
+				}while( i >= len || !ISXDIGIT(src[i]) );
+				out_dest[j++] = (char)c;
+			}
+			else if( src[i] == '0' || src[i] == '1' || src[i] == '2' || src[i] == '3' )
+			{// octal escape sequence (255=0377)
+				unsigned char c = src[i]-'0';
+				++i;// '0', '1', '2' or '3'
+				if( i < len && src[i] >= '0' && src[i] <= '9' )
+				{
+					c = (c<<3)|(src[i]-'0');
+					++i;// octal digit
+				}
+				if( i < len && src[i] >= '0' && src[i] <= '9' )
+				{
+					c = (c<<3)|(src[i]-'0');
+					++i;// octal digit
+				}
+				out_dest[j++] = (char)c;
+			}
+			else
+			{// other escape sequence
+				if( strchr(SV_ESCAPE_C_SUPPORTED, src[i]) == NULL )
+					ShowWarning("sv_parse: unknown escape sequence \\%c\n", src[i]);
+				switch( src[i] )
+				{
+				case 'a': out_dest[j++] = '\a'; break;
+				case 'b': out_dest[j++] = '\b'; break;
+				case 't': out_dest[j++] = '\t'; break;
+				case 'n': out_dest[j++] = '\n'; break;
+				case 'v': out_dest[j++] = '\v'; break;
+				case 'f': out_dest[j++] = '\f'; break;
+				case 'r': out_dest[j++] = '\r'; break;
+				case '?': out_dest[j++] = '\?'; break;
+				default: out_dest[j++] = src[i]; break;
+				}
+				++i;// escaped character
+			}
+		}
+		else
+			out_dest[j++] = src[i++];// normal character
+	}
+	out_dest[j] = 0;
+	return j;
+}
+
+
+
 /////////////////////////////////////////////////////////////////////
 // StringBuf - dynamic string
 //

+ 36 - 0
src/common/strlib.h

@@ -46,6 +46,42 @@ int safesnprintf(char* buf, size_t sz, const char* fmt, ...);
 /// Lines start at 1.
 int strline(const char* str, size_t pos);
 
+
+
+/// Bitfield determining the behaviour of sv_parse.
+enum e_svopt
+{
+	// default: no escapes and no line terminator
+	SV_NOESCAPE_NOTERMINATE = 0,
+	// Escapes according to the C compiler.
+	SV_ESCAPE_C = 1,
+	// Line terminators
+	SV_TERMINATE_LF = 2,
+	SV_TERMINATE_CRLF = 4,
+	SV_TERMINATE_CR = 8,
+};
+
+/// Other escape sequences supported by the C compiler.
+#define SV_ESCAPE_C_SUPPORTED "abtnvfr\?\"'\\"
+
+/// Parses a delim-separated string.
+/// Starts parsing at startoff and fills the pos array with the start and end 
+/// positions in the string of the line and fields (that fit the array).
+/// Returns the number of fields or -1 if an error occurs.
+int sv_parse(const char* str, int len, int startoff, char delim, int* out_pos, int npos, enum e_svopt opt);
+
+/// Escapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the escaped string.
+/// out_dest should be len*4+1 in size.
+size_t sv_escape_c(char* out_dest, const char* src, size_t len, const char* escapes);
+
+/// Unescapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the unescaped string.
+/// out_dest should be len+1 in size and can be the same buffer as src.
+size_t sv_unescape_c(char* out_dest, const char* src, size_t len);
+
+
+
 /// StringBuf - dynamic string
 struct StringBuf
 {