Cafu Engine
TextParser.hpp
1 /*
2 Cafu Engine, http://www.cafu.de/
3 Copyright (c) Carsten Fuchs and other contributors.
4 This project is licensed under the terms of the MIT license.
5 */
6 
7 /****************************/
8 /*** Text Parser (Header) ***/
9 /****************************/
10 
11 #ifndef CAFU_TEXT_PARSER_HPP_INCLUDED
12 #define CAFU_TEXT_PARSER_HPP_INCLUDED
13 
14 #include "Templates/Array.hpp"
15 
16 #include <string>
17 
18 /// This is a class for parsing text. It has the following features:
19 /// a) C++ style comments ("// ...") are recognized.
20 /// b) Quoted tokens are recognized (ie. "1 2 3" is returned as ONE token).
22 {
23  public:
24 
25  /// Error when parsing a text/file.
26  class ParseError { };
27 
28  /// The constructor.
29  /// If IsFileName=true, Input specifies the name of the input file.
30  /// If IsFileName=false, Input is interpreted as the input string itself.
31  /// If the constructor experiences any problems (e.g. the input file cannot be read), it creates a text parser for the text of length 0 (the empty text).
32  /// Delims specifies token delimiters that are returned as separate tokens even if they are not bordered by white-space.
33  /// CommentChar specifies the character that starts a comment. The default 0 makes "//" being recognized as comment start.
34  /// If CommentChar has another value, that character is considered as initiating a comment.
35  /// @param Input An input string or the filename of an input file depending on IsFileName.
36  /// @param Delims Specifies token delimiters that are returned as separate tokens even if they are not bordered by white-space.
37  /// @param IsFileName Whether the input string is a real string to parse or the name of a file to parse.
38  /// @param CommentChar Char that initiates a comment line (default is '//').
39  TextParserT(const char* Input, std::string Delims="", bool IsFileName=true, const char CommentChar='\0');
40 
41  /// Returns the next token. ParseError is thrown when an error is encountered (e.g. EOF), after which the parsing cannot be continued.
42  /// @throw ParseError
43  std::string GetNextToken();
44 
45  /// Returns the next token as an int. ParseError is thrown when an error is encountered (e.g. EOF), after which the parsing cannot be continued.
46  /// @throw ParseError
47  int GetNextTokenAsInt();
48 
49  /// Returns the next token as a float. ParseError is thrown when an error is encountered (e.g. EOF), after which the parsing cannot be continued.
50  /// @throw ParseError
51  float GetNextTokenAsFloat();
52 
53  /// Puts back the string Token, such that the next call to GetNextToken() returns Token.
54  /// This function can be called multiple times in sequence, GetNextToken() will return all put back tokens in a stack-like manner.
55  /// No checks are performed if Token was actually returned by a previous call by GetNextToken().
56  /// @param Token The string to put back in the string parsed by TextParserT.
57  void PutBack(const std::string& Token);
58 
59  /// Returns a peek at the next token without reading over it.
60  /// This is equivalent to: "std::string T=GetNextToken(); PutBack(T); return T;".
61  /// @throw ParseError
62  std::string PeekNextToken();
63 
64  /// Makes sure that the next token is equal to Token. Otherwise, a ParseError is thrown.
65  /// This is short for: "if (TP.GetNextToken()!=Token) throw TextParserT::ParseError();".
66  /// @param Token The string asserted as the next token.
67  /// @throw ParseError
68  void AssertAndSkipToken(const std::string& Token);
69 
70  /// Returns whether the last read "real" token was a "quoted" token.
71  ///
72  /// "Real" token means that this method doesn't take into account tokens that have been put back with the PutBack() method.
73  /// That is, when more than one token has been put back at a time (i.e. PutBack() has been called two times in a row without
74  /// an intermediate call to one of the Get...() methods), the result of this method is not reliable and should not be used.
75  ///
76  /// @returns whether the last read "real" token was a "quoted" token.
77  bool WasLastTokenQuoted() const { return LastTokenWasQuoted; }
78 
79  /// Skips tokens until the end of the current line of text. This method always succeeds (it never throws).
80  /// @returns the rest (skipped portion) of the line, preceeded by previously put back or peeked tokens.
81  /// (Note that previously put back or peeked tokens are returned as well. The whitespace that is used
82  /// to separate these tokens is however not guaranteed to be identical with the original input text.)
83  std::string SkipLine();
84 
85  /// Skips a whole "block" of tokens, e.g. a { ... } or ( ... ) block. The block can contain nested blocks.
86  /// It can (and must) be stated if the caller has already read the opening token.
87  /// @param OpeningToken Opening token of the block.
88  /// @param ClosingToken Closing token of the block.
89  /// @param CallerAlreadyReadOpeningToken The opening token has already been read by the caller.
90  /// @returns the (possibly multi-line) content of the skipped block.
91  std::string SkipBlock(const std::string& OpeningToken, const std::string& ClosingToken, bool CallerAlreadyReadOpeningToken);
92 
93  /// Returns the current read position in the input file in byte.
94  /// Tokens that have been put back are not taken into account.
95  unsigned long GetReadPosByte() const;
96 
97  /// Returns the current read position in the input file in percent.
98  /// Tokens that have been put back are not taken into account.
99  float GetReadPosPercent() const;
100 
101  /// Returns whether the parser has reached the EOF or not.
102  bool IsAtEOF() const;
103 
104 
105  private:
106 
107  ArrayT<char> TextBuffer;
108  std::string Delimiters;
109  char CommentInitChar;
110  unsigned long BeginOfToken;
111  unsigned long EndOfToken;
112  ArrayT<std::string> PutBackTokens;
113  bool LastTokenWasQuoted;
114 
115  bool IsCharInDelimiters(const char c) const;
116 };
117 
118 #endif
unsigned long GetReadPosByte() const
Returns the current read position in the input file in byte.
Definition: TextParser.cpp:276
Error when parsing a text/file.
Definition: TextParser.hpp:26
bool WasLastTokenQuoted() const
Returns whether the last read "real" token was a "quoted" token.
Definition: TextParser.hpp:77
TextParserT(const char *Input, std::string Delims="", bool IsFileName=true, const char CommentChar='\0')
The constructor.
Definition: TextParser.cpp:16
std::string SkipBlock(const std::string &OpeningToken, const std::string &ClosingToken, bool CallerAlreadyReadOpeningToken)
Skips a whole "block" of tokens, e.g.
Definition: TextParser.cpp:245
void AssertAndSkipToken(const std::string &Token)
Makes sure that the next token is equal to Token.
Definition: TextParser.cpp:209
float GetReadPosPercent() const
Returns the current read position in the input file in percent.
Definition: TextParser.cpp:282
bool IsAtEOF() const
Returns whether the parser has reached the EOF or not.
Definition: TextParser.cpp:288
std::string SkipLine()
Skips tokens until the end of the current line of text.
Definition: TextParser.cpp:215
std::string GetNextToken()
Returns the next token.
Definition: TextParser.cpp:73
void PutBack(const std::string &Token)
Puts back the string Token, such that the next call to GetNextToken() returns Token.
Definition: TextParser.cpp:195
std::string PeekNextToken()
Returns a peek at the next token without reading over it.
Definition: TextParser.cpp:201
float GetNextTokenAsFloat()
Returns the next token as a float.
Definition: TextParser.cpp:176
int GetNextTokenAsInt()
Returns the next token as an int.
Definition: TextParser.cpp:165
This is a class for parsing text.
Definition: TextParser.hpp:21