//+------------------------------------------------------------------+
//|                                                      Scanner.mqh |
//|                                    Copyright (c) 2019, Marketeer |
//|                          https://www.mql5.com/en/users/marketeer |
//|                            https://www.mql5.com/ru/articles/5638 |
//| partially taken from            http://craftinginterpreters.com/ |
//|                                Copyright (c) 2015 Robert Nystrom |
//|                                                   rev.2020.02.13 |
//+------------------------------------------------------------------+

#include "HashMapTemplate.mqh"
#include "RubbArray.mqh"

#include "FileReader.mqh"

#include "TokenType.mqh"
#include "Token.mqh"


class Scanner
{
  private:
    static string reserved[];
    static Map<string, TokenType> keywords;

    const Source *source; // wrapped string

    List<Token *> *tokens;
    
    int start;
    int current;
    int line;
    
    bool pointLessMode;
    bool failed;

    void error(string message)
    {
      Print("Error [", line, "]: ", message);
      failed = true;
    }

    void error(Token *token, string message)
    {
      Print("Error [", token.getLine(), "]: ", message, " ", token.toString(NULL));
      failed = true;
    }

  public:
    Scanner(const Source *_source, const bool dereference = true): line(0), current(0)
    {
      tokens = new List<Token *>();
      if(keywords.getSize() == 0)
      {
        for(int i = 0; i < ArraySize(reserved); i++)
        {
          keywords.put(reserved[i], TokenType(BREAK + i));
        }
      }
      source = _source;
      pointLessMode = dereference;
    }
    
    ~Scanner()
    {
      // don't delete tokens
    }
    
    
    List<Token *> *scanTokens()
    {
      while(!isAtEnd())
      {
        // We are at the beginning of the next lexeme
        start = current;
        scanToken();
      }
  
      start = current;
      addToken(EOF);
      return tokens;
    }
    
    int getLines() const
    {
      return line;
    }
    
    bool isSuccess() const
    {
      return !failed;
    }
    
    void dump(const string filename) const
    {
      source.dump(filename);
    }
    
  private:

    void scanToken()
    {
      ushort c = advance();
      switch(c)
      {
        case '(': addToken(LEFT_PAREN); break;
        case ')': addToken(RIGHT_PAREN); break;
        case '{': addToken(LEFT_BRACE); break;
        case '}': addToken(RIGHT_BRACE); break;
        case '[': addToken(LEFT_BRACKET); break;
        case ']': addToken(RIGHT_BRACKET); break;
        case ',': addToken(COMMA); break;
        case '.': addToken(DOT); break;
        case '-': addToken(match('-') ? DEC : (match('=') ? MINUS_EQUAL : MINUS)); break;
        case '+': addToken(match('+') ? INC : (match('=') ? PLUS_EQUAL : PLUS)); break;
        case ';': addToken(SEMICOLON); break;
        case '*': addToken(match('=') ? STAR_EQUAL : STAR); break;
        case '~': addToken(TILDE); break;
        case '^': addToken(match('=') ? BIT_XOR_EQUAL : BIT_XOR); break;
        case '%': addToken(match('=') ? DIV_EQUAL : DIV); break;
        
        /* TODO: shift and re-assign
        '<<='
        '>>='
        */
        
        case '?': addToken(QMARK); break;
        case ':': addToken(COLON); break;
        
        // two-ushort-tokens
        case '!': addToken(match('=') ? BANG_EQUAL : BANG); break;
        case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break;
        case '<': addToken(match('=') ? LESS_EQUAL : (/*match('<') ? SHIFT_LEFT :*/ LESS)); break;
        case '>': addToken(match('=') ? GREATER_EQUAL : (/*match('>') ? SHIFT_RIGHT :*/ GREATER)); break;
        // NB: SHIFTs collide with template<template<?>>, so they should be detected by parser
        
        // slash
        case '/':
          if(match('/'))
          {
            // A comment goes until the end of the line
            while(peek() != '\n' && !isAtEnd()) advance();
            // Print("Line comment:", StringSubstr(source, start, current - start));
          }
          else if(match('*'))
          {
            blockcomment();
          }
          else if(match('='))
          {
            addToken(SLASH_EQUAL);
          }
          else
          {
            addToken(SLASH);
          }
          break;

        // Ignore whitespace
        case ' ':
        case '\r':
        case '\t':
        case 0xFEFF:
          break;
          
        case '\n':
          line++;
          break;

        // Consume blocks
        case '"': _string(); break;
        case '\'': literal(); break;
        case '#': preprocessor(); break;

        case '&':
        case '|':
          if(match(c))
          {
            addToken(c == '&' ? AND : OR); break;
          }
          else if(match('='))
          {
            addToken(c == '&' ? BIT_AND_EQUAL : BIT_OR_EQUAL); break;
          }
          else
          {
            addToken(c == '&' ? BIT_AND : BIT_OR);
          }
          break;
        case 'D':
          if(match('\''))
          {
            literal(true);
            break;
          }

        default:
        
          if(isDigit(c))
          {
            number();
          }
          else if(isAlpha(c) || isDot(c))
          {
            identifier();
          }
          else
          {
            error("Unexpected character `" + ShortToString(c) + "` 0x" + StringFormat("%X", c) + " @ " + (string)current + ":" + source.get(MathMax(current - 10, 0), 20));
          }
          break;
      }
    }
    
    void identifier()
    {
      while(isAlphaNumeric(peek()) || isDot(peek())) advance();

      // See if the identifier is a reserved word
      string text = source.get(start, current - start);
  
      TokenType type = keywords.get(text);
      if(type == null) type = IDENTIFIER;
      
      addToken(type);
    }

    void number()
    {
      while(isDigit(peek())) advance();
      
      bool fraction = false;
  
      // Look for a fractional part
      if(peek() == '.' && isDigit(peekNext()))
      {
        advance(); // Consume the dot
        fraction = true;
  
        while(isDigit(peek())) advance();
      }
      
      if((peek() == 'e' || peek() == 'E') && (isDigit(peekNext()) || peekNext() == '+' || peekNext() == '-'))
      {
        advance(); // eat 'e'xponent
        advance(); // eat first digit
        while(isDigit(peek())) advance(); // eat remaining digits
        fraction = true;
      }
      else if((peek() == 'x' || peek() == 'X') && isHex(peekNext())) // hexadecimal
      {
        advance();
        while(isHex(peek())) advance();
      }
      else if(peek() == 'f') // float suffix
      {
        advance();
      }
      
      if(fraction)
      {
        addToken(CONST_NUMBER);
      }
      else
      {
        addToken(CONST_INTEGER);
      }
    }
    
    void _string()
    {
      bool escape = false;
      while(!(peek() == '"' && !escape) && !isAtEnd())
      {
        if(peek() == '\n')
        {
          line++;
        }
        if(peek() == '\\' && previous() != '\\') escape = true;
        else escape = false;
        advance();
      }
  
      // Unterminated string
      if(isAtEnd())
      {
        error("Unterminated string");
        return;
      }
  
      // The closing "
      advance();
  
      // Trim the surrounding quotes
      // string value = StringSubstr(source, start + 1, current - 1 - (start + 1));
      addToken(CONST_STRING);
    }

    void literal(const bool D = false)
    {
      bool escape = false;
      // Print(current, ": ",  StringFormat("%c", previous()), " ", source.get(current, 10));
      while(!(peek() == '\'' && !escape) && !isAtEnd())
      {
        if(peek() == '\n')
        {
          error("Unterminated literal");
          return;
        }
        if(peek() == '\\' && previous() != '\\') escape = true;
        else escape = false;
        advance();
        // Print(current, "  ",  StringFormat("%c", previous()), " ", source.get(current, 10));
      }
  
      if(isAtEnd())
      {
        error("Unterminated literal");
        return;
      }
  
      // The closing '
      advance();
  
      // Trim the surrounding quotes
      // string value = StringSubstr(source, start + 1, current - 1 - (start + 1));
      addToken(D ? CONST_DATETIME : LITERAL);
    }
    
    void blockcomment()
    {
      while(!isAtEnd())
      {
        if(peek() == '\n')
        {
          line++;
        }
        else
        if(peek() == '*' && peekNext() == '/')
        {
          break;
        }
        
        advance();
      }
      
      if(isAtEnd())
      {
        error("Unterminated block comment");
        return;
      }
      
      advance();
      advance(); // skip trailing '*/'
    }
    
    void preprocessor()
    {
      while(isAlpha(peek()) && !isAtEnd()) advance();

      string text = source.get(start, current - start);
      
      while(peek() != '\n' && !isAtEnd())
      {
        if(peek() == '\\' && peekNext() == '\n')
        {
          advance();
          line++;
        }
        advance();
      }

      TokenType type = keywords.get(text);
      if(type == null)
      {
        error("Unexpected #");
      }
      else
      {
        addToken(type);
      }
      
      // Print("Pragma:", text);
    }

    bool match(ushort expected)
    {
      if(isAtEnd()) return false;
      if(source[current] != expected) return false;
  
      current++;
      return true;
    }
    
    ushort previous() const
    {
      if(current > 0) return source[current - 1];
      return 0;
    }
    
    ushort peek() const
    {
      if(isAtEnd()) return '\0';
      return source[current];
    }
    
    ushort peekNext() const
    {
      if((uint)(current + 1) >= source.length()) return '\0';
      return source[current + 1];
    }

    bool isAlpha(ushort c) const
    {
      return(c >= 'a' && c <= 'z') ||
            (c >= 'A' && c <= 'Z') ||
             c == '_';
    }

    bool isAlphaNumeric(ushort c) const
    {
      return isAlpha(c) || isDigit(c);
    }
    
    bool isDigit(ushort c) const
    {
      return c >= '0' && c <= '9';
    }

    bool isHex(ushort c) const
    {
      return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
    }

    bool isDot(ushort c) const
    {
      return (pointLessMode && c == '.');
    }
    
    bool isAtEnd() const
    {
      return (uint)current >= source.length();
    }

    ushort advance()
    {
      current++;
      return source[current - 1];
    }

    void addToken(TokenType type)
    {
      //string text = StringSubstr(source, start, current - start);
      tokens.add(new Token(type, line, start, current - start));
    }
};

static HashMapTemplate<string, TokenType> Scanner::keywords;
static string Scanner::reserved[] =
{
#include "reserved.txt"
};
