mjplusplus  v0.4.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Public Member Functions | Protected Types | Protected Member Functions | Protected Attributes | Static Protected Attributes | List of all members
lexer::Lexer Class Reference

#include <lexer.hpp>

Public Member Functions

 Lexer (const char *file_name, Stateomat const &stateomat, shptr< ErrorReporter > errorReporter)
 
Token get_next_token ()
 
void unget_token (Token const &t)
 
bool good () const
 
std::string describe (Token::Token_type const &t) const
 

Protected Types

enum  kw_states {
  CHECK_ABSTRACT = 30, CHECK_ASSERT, CHECK_BOOLEAN, CHECK_BREAK,
  CHECK_BYTE, CHECK_CASE, CHECK_CATCH, CHECK_CHAR,
  CHECK_CLASS, CHECK_CONST, CHECK_CONTINUE, CHECK_DEFAULT,
  CHECK_DOUBLE, CHECK_ELSE, CHECK_ENUM, CHECK_EXTENDS,
  CHECK_FALSE, CHECK_FINALLY, CHECK_FLOAT, CHECK_FOR,
  CHECK_GOTO, CHECK_IMPLEMENTS, CHECK_IMPORT, CHECK_INSTANCEOF,
  CHECK_INTERFACE, CHECK_LONG, CHECK_NATIVE, CHECK_NEW,
  CHECK_NULL, CHECK_PACKAGE, CHECK_PRIVATE, CHECK_PROTECTED,
  CHECK_PUBLIC, CHECK_RETURN, CHECK_SHORT, CHECK_STATIC,
  CHECK_STRICTFP, CHECK_SUPER, CHECK_SWITCH, CHECK_SYNCHRONIZED,
  CHECK_THIS, CHECK_TRANSIENT, CHECK_TRUE, CHECK_TRY,
  CHECK_VOID, CHECK_VOLATILE, CHECK_WHILE, KEYWORD_DO,
  KEYWORD_FINAL, KEYWORD_IF, KEYWORD_INT, KEYWORD_THROW,
  KEYWORD_THROWS, IDENT
}
 

Protected Member Functions

char getc ()
 
void advancePosition (int nextCharacter)
 
Token::Token_type lex_keyword_or_ident (const char *s)
 

Protected Attributes

source_position_t position
 
Stateomat stateomat
 
shptr< ErrorReportererrorReporter
 
std::vector< Tokentoken_stack
 
int c
 
int fd
 
char buf [BUF_SIZE]
 
size_t buf_off
 
size_t buf_len
 
off_t line_start
 

Static Protected Attributes

static Token::Token_type kw_array []
 
static std::vector< std::pair
< const char
*, Token::Token_type > > 
kw_vector
 
static const int kw_lex_table [][26]
 

Member Enumeration Documentation

enum lexer::Lexer::kw_states
protected

(partial) state list for keyword checking automaton

Enumerator
CHECK_ABSTRACT 
CHECK_ASSERT 
CHECK_BOOLEAN 
CHECK_BREAK 
CHECK_BYTE 
CHECK_CASE 
CHECK_CATCH 
CHECK_CHAR 
CHECK_CLASS 
CHECK_CONST 
CHECK_CONTINUE 
CHECK_DEFAULT 
CHECK_DOUBLE 
CHECK_ELSE 
CHECK_ENUM 
CHECK_EXTENDS 
CHECK_FALSE 
CHECK_FINALLY 
CHECK_FLOAT 
CHECK_FOR 
CHECK_GOTO 
CHECK_IMPLEMENTS 
CHECK_IMPORT 
CHECK_INSTANCEOF 
CHECK_INTERFACE 
CHECK_LONG 
CHECK_NATIVE 
CHECK_NEW 
CHECK_NULL 
CHECK_PACKAGE 
CHECK_PRIVATE 
CHECK_PROTECTED 
CHECK_PUBLIC 
CHECK_RETURN 
CHECK_SHORT 
CHECK_STATIC 
CHECK_STRICTFP 
CHECK_SUPER 
CHECK_SWITCH 
CHECK_SYNCHRONIZED 
CHECK_THIS 
CHECK_TRANSIENT 
CHECK_TRUE 
CHECK_TRY 
CHECK_VOID 
CHECK_VOLATILE 
CHECK_WHILE 
KEYWORD_DO 
KEYWORD_FINAL 
KEYWORD_IF 
KEYWORD_INT 
KEYWORD_THROW 
KEYWORD_THROWS 
IDENT 

Constructor & Destructor Documentation

lexer::Lexer::Lexer ( const char *  file_name,
Stateomat const &  stateomat,
shptr< ErrorReporter errorReporter 
)

Constructor.

Initializes the lexer so that llexing can be started.

Parameters
file_namefile that will be lexed
stateomatstateomat that helds the transitions

Member Function Documentation

void lexer::Lexer::advancePosition ( int  nextCharacter)
protected

Advance the current position.

If nextCharacter is
the line number in position will be incremented and column is reset to 1. Otherwise column will be incremented.

Parameters
nextCharacterthe next character
std::string lexer::Lexer::describe ( Token::Token_type const &  t) const

Returns a description string for the given token

Parameters
tokento print the description of
Returns
description for t
Token lexer::Lexer::get_next_token ( )

Lexes the next token and returns it.

Returns
the next token
char lexer::Lexer::getc ( )
protected

Reads the next character and returns it.

Returns
next character
bool lexer::Lexer::good ( ) const

Constant true.

Returns
true
Token::Token_type lexer::Lexer::lex_keyword_or_ident ( const char *  s)
protected
void lexer::Lexer::unget_token ( Token const &  t)

Puts back a token that was inspected but not yet consumed. This can happen because the grammar is not SLL(1). If there were calls to unget, get_next_token will return them first (LIFO order) before lexing the next token.

Parameters
ttoken to be set back

Member Data Documentation

char lexer::Lexer::buf[BUF_SIZE]
protected
size_t lexer::Lexer::buf_len
protected
size_t lexer::Lexer::buf_off
protected
int lexer::Lexer::c
protected

currently read char

shptr<ErrorReporter> lexer::Lexer::errorReporter
protected
int lexer::Lexer::fd
protected
Token::Token_type lexer::Lexer::kw_array
staticprotected
Initial value:
=
{
Token::Token_type::KEYWORD_DO,
Token::Token_type::KEYWORD_FINAL,
Token::Token_type::KEYWORD_IF,
Token::Token_type::KEYWORD_INT,
Token::Token_type::KEYWORD_THROW,
Token::Token_type::KEYWORD_THROWS,
}

keyword list to be used as lookup in keyword automaton. used for words that can only be recognised at the end of the string

const int lexer::Lexer::kw_lex_table
staticprotected

transition table for keyword automaton

std::vector< std::pair< const char *, Token::Token_type > > lexer::Lexer::kw_vector
staticprotected

lookup for keyword automaton check used when only one possible keyword remains in the middle of the string

off_t lexer::Lexer::line_start
protected
source_position_t lexer::Lexer::position
protected

current lexer position in file (line, column)

Stateomat lexer::Lexer::stateomat
protected

stateomat holds the transition informations

std::vector<Token> lexer::Lexer::token_stack
protected

saves tokens that are put back by unget

See also
unget

The documentation for this class was generated from the following files: