/* lexer.ll - Lexical analyser for GNU Flex */

%{
#include <limits.h>
#include <string.h>
#include <string>

#include "DeclarationSequence.h"
#include "CompoundStmt.h"
#include "BinaryOpExpr.h"
#include "UnaryOpExpr.h"
#include "TranslationUnit.h"
#include "FunctionDef.h"
#include "ClassDef.h"
#include "FormalParameter.h"
#include "FormalParamList.h"
#include "DeclarationSpecifierList.h"
#include "FunctionPointerCast.h"

#define YY_NO_INPUT  /* do not generate yyinput() */

#include "parser.hh"  /* generated by bison -d */
    /*  Note for Automake users: version 1.12 or later is required
        to generate parser.hh. Previous versions generate parser.h.
    */

using namespace std;

string sourceFilename;
int lineno = 1;  // line number in the input text; first line is numbered 1


static bool doublePrecisionLiteralWarningIssued = false;

static void warnIfDoublePrecisionLiteral(const char *realLiteralText)
{
    if (!doublePrecisionLiteralWarningIssued && tolower(realLiteralText[strlen(realLiteralText) - 1]) != 'f')
    {
        warnmsg("double-precision numeric literals have same precision as `float' for this compiler");
        doublePrecisionLiteralWarningIssued = true;
    }
}


extern "C" int yywrap(void) { return 1; }  // yylex() returns 0 when EOF reached

bool isOctal(uint64_t &value, const char *str);

bool processPreprocessorLineDirective(char *&reader);
%}

letter          [A-Za-z_]
digit           [0-9]
ws              ([ \t\f\v\r\xA0]|\xC2\xA0)
                /* A0 = Latin-1 unbreakable space; C2 A0 = same in UTF-8 */

%%

        /*  WARNING:
            The opening brace for code that follows a pattern must be
            on the same line as the pattern.
            C-style comments must be indented so as not to be taken
            as lexical rules. 
        */


        /*  #include markers generated by cpp.
            Format: # NUM "FILENAME" [NUM ...]
            The 1st integer is the line number in FILENAME.
            One or more integers can appear after the filename. They are all ignored.
        */
^\#\ [0-9]+\ \".*\"[ 0-9]*$     { sscanf(yytext + 2, "%d", &lineno);
                                  lineno--;
                                  char *fn = strchr(yytext, '\"') + 1;
                                  char *end = strchr(fn, '\"');
                                  *end = '\0';
                                  sourceFilename = fn;
                                  TranslationUnit::instance().addPrerequisiteFilename(fn);
                                }

    /* Pragma directive. */
^{ws}*\#{ws}*pragma{ws}+.*$     {
                                    // Pragma directive.
                                    //
                                    const char *src = strstr(yytext, "pragma") + 6;
                                    for (++src; isspace(*src); ++src)
                                        ;
                                    yylval.str = strdup(src);
                                    return PRAGMA; 
                                }

    /* Inline assembly: asm { ... }. Accept balanced braces inside the outer ones. */
asm({ws}|\n)*\{([^\{\}]|(\{[^\{\}]*\}))*\}         {
                                    // Verbatim assembler text.
                                    // Copy contents of braces into yytext, and count newlines.
                                    // Update 'lineno' according to number of source lines taken.
                                    // Process cpp lines of the form '# <num> "<filename>"' if any.
                                    // 
                                    char *p;
                                    for (p = yytext + 3; *p != '{'; ++p)
                                        if (*p == '\n')
                                            ++lineno;

                                    char *begin = p + 1;
                                    char *end = strrchr(begin, '}');
                                    if (begin != end)  // only return something if text not empty
                                    {
                                        char *writer = yytext;
                                        bool lastCharIsNewline = true;
                                        for (char *reader = begin; reader != end; )
                                        {
                                            char c = *reader++;
                                            if (lastCharIsNewline && c == '#' && processPreprocessorLineDirective(reader))
                                            {
                                                lastCharIsNewline = true;
                                            }
                                            else
                                            {
                                                *writer++ = c;
                                                if (c == '\n')
                                                    ++lineno;
                                                lastCharIsNewline = (c == '\n');
                                            }
                                        }
                                        *writer = '\0';

                                        return VERBATIM_ASM;
                                    }
                                }

    /* White space: ignored. */
{ws}+                           ;

    /* Newline increments the line counter. */
\n                              { lineno++; }

--                              { return MINUS_MINUS; }
\+\+                            { return PLUS_PLUS; }
==                              { return EQUALS_EQUALS; }
\!=                             { return BANG_EQUALS; }
\<=                             { return LOWER_EQUALS; }
\>=                             { return GREATER_EQUALS; }
\+=                             { return PLUS_EQUALS; }
-=                              { return MINUS_EQUALS; }
\*=                             { return ASTERISK_EQUALS; }
\/=                             { return SLASH_EQUALS; }
%=                              { return PERCENT_EQUALS; }
\^=                             { return CARET_EQUALS; }
&=                              { return AMP_EQUALS; }
\|=                             { return PIPE_EQUALS; }
&&                              { return AMP_AMP; }
\|\|                            { return PIPE_PIPE; }
\<\<                            { return LT_LT; }
\>\>                            { return GT_GT; }
\<\<=                           { return LT_LT_EQUALS; }
\>\>=                           { return GT_GT_EQUALS; }
-\>                             { return RIGHT_ARROW; }
\.\.\.                          { return ELLIPSIS; }

    /* Hexadecimal constant. */
0[xX]([0-9A-Fa-f]+)([lLuU]+)?    { unsigned long n;
                                   sscanf(yytext + 2, "%lx", &n);
                                   yylval.real = n;
                                   return REAL; }

    /* Binary constant. */
0[bB]([01]+)([lLuU]+)?    { unsigned long n = 0;
                            for (const char *p = yytext + 2; isdigit(*p); ++p)
                                n = (n << 1) | unsigned(*p != '0'); 
                            yylval.real = n;
                            return REAL; }

    /* Decimal float constant.
       No optional minus sign at the beginning of this rule, because it creates
       an ambiguity where 'n-1' is seen as tokens 'n' and '-1'. It should be seen
       as 'n', '-' and '1', and the add_expr rule in parser.yy will recognize
       these 3 tokens as a subtraction. 
    */
(([0-9]*\.[0-9]+))([eE][-+]?[0-9]+)?[fF]?       {
                yylval.real = atof(yytext);
                warnIfDoublePrecisionLiteral(yytext);
                return REAL; }

    /* If there are no digits after the decimal point, there must be
       at least one digit before the point, e.g., "1.", "1.e6", "1.f", "1.e6f".
    */
([0-9]+\.)([eE][-+]?[0-9]+)?[fF]?       {
                yylval.real = atof(yytext);
                warnIfDoublePrecisionLiteral(yytext);
                return REAL; }

    /* Same as previous rule, but no decimal point, and required 'e'.
    */
([0-9]+)([eE][-+]?[0-9]+)[fF]?       {
                yylval.real = atof(yytext);
                warnIfDoublePrecisionLiteral(yytext);
                return REAL; }

    /* Decimal or octal integer, with optional "unsigned" or "long" suffix character. */ 
([0-9]+)([uUlL]*)       {
                uint64_t octalConversion = 0;
                if (yytext[0] == '0' && isOctal(octalConversion, yytext + 1))
                    yylval.real = double(octalConversion);
                else
                    yylval.real = atof(yytext);
                return REAL; }

    /* Keyword, typedef name or identifier. */
{letter}({letter}|{digit})*     {
                if (strcmp(yytext, "int") == 0) return INT;
                if (strcmp(yytext, "char") == 0) return CHAR;
                if (strcmp(yytext, "short") == 0) return SHORT;
                if (strcmp(yytext, "long") == 0) return LONG;
                if (strcmp(yytext, "float") == 0) return FLOAT;
                if (strcmp(yytext, "double") == 0) return DOUBLE;
                if (strcmp(yytext, "signed") == 0) return SIGNED;
                if (strcmp(yytext, "unsigned") == 0) return UNSIGNED;
                if (strcmp(yytext, "void") == 0) return VOID;
                if (strcmp(yytext, "if") == 0) return IF;
                if (strcmp(yytext, "else") == 0) return ELSE;
                if (strcmp(yytext, "while") == 0) return WHILE;
                if (strcmp(yytext, "do") == 0) return DO;
                if (strcmp(yytext, "for") == 0) return FOR;
                if (strcmp(yytext, "break") == 0) return BREAK;
                if (strcmp(yytext, "continue") == 0) return CONTINUE;
                if (strcmp(yytext, "return") == 0) return RETURN;
                if (strcmp(yytext, "asm") == 0) return ASM;
                if (strcmp(yytext, "__norts__") == 0) return NORTS;
                if (strcmp(yytext, "struct") == 0) return STRUCT;
                if (strcmp(yytext, "union") == 0) return UNION;
                if (strcmp(yytext, "interrupt") == 0) return INTERRUPT;
                if (strcmp(yytext, "__gcccall") == 0) return FUNC_USES_GCC6809_CALL_CONV;
                if (strcmp(yytext, "_CMOC_fpir_") == 0) return FUNC_RECEIVES_FIRST_PARAM_IN_REG;
                if (strcmp(yytext, "sizeof") == 0) return SIZEOF;
                if (strcmp(yytext, "typedef") == 0) return TYPEDEF;
                if (strcmp(yytext, "switch") == 0) return SWITCH;
                if (strcmp(yytext, "case") == 0) return CASE;
                if (strcmp(yytext, "default") == 0) return DEFAULT;
                if (strcmp(yytext, "asm") == 0) return ASM;
                if (strcmp(yytext, "register") == 0) return REGISTER;
                if (strcmp(yytext, "goto") == 0) return GOTO;
                if (strcmp(yytext, "extern") == 0) return EXTERN;
                if (strcmp(yytext, "static") == 0) return STATIC;
                if (strcmp(yytext, "enum") == 0) return ENUM;
                if (strcmp(yytext, "const") == 0) return CONST;
                if (strcmp(yytext, "volatile") == 0) return VOLATILE;
                if (strcmp(yytext, "auto") == 0) return AUTO;

                const TypeDesc *td = TranslationUnit::getTypeManager().getTypeDef(yytext);
                if (td)  // if yytext is the name of a typedef: 
                {
                    yylval.typeDesc = td;
                    return TYPE_NAME;
                }

                yylval.str = strdup(yytext); return ID;
                }

    /* Double-quoted string constant. No double-quote or newline allowed inside. */
\"([^\"\n]|\\\")*\"     { yylval.str = strdup(yytext + 1);
                        yylval.str[strlen(yylval.str) - 1] = '\0';
                        return STRLIT; }

    /* Control code character constants. */
''              { yylval.character = '\0'; return CHARLIT; }
'\\0'           { yylval.character = '\0'; return CHARLIT; }
'\\a'           { yylval.character = '\a'; return CHARLIT; }
'\\b'           { yylval.character = '\b'; return CHARLIT; }
'\\t'           { yylval.character = '\t'; return CHARLIT; }
'\\n'           { yylval.character = '\n'; return CHARLIT; }
'\\v'           { yylval.character = '\v'; return CHARLIT; }
'\\f'           { yylval.character = '\f'; return CHARLIT; }
'\\r'           { yylval.character = '\r'; return CHARLIT; }
'\\''           { yylval.character = '\''; return CHARLIT; }
'\\\"'          { yylval.character = '\"'; return CHARLIT; }
'\\\\'          { yylval.character = '\\'; return CHARLIT; }

    /* Character constant expressed as a hexadecimal character code (e.g., '\xFF'). */
'\\x[0-9A-Fa-f][0-9A-Fa-f]?'    { unsigned int n;
                                sscanf(yytext + 3, "%x", &n);
                                yylval.character = (signed char) n;
                                return CHARLIT; }

    /* Character constant expressed as an octal character code (e.g., '\0377'). */
'\\0?[0-7][0-7]?[0-7]?'    { unsigned int n;
                            sscanf(yytext + 2, "%o", &n);
                            yylval.character = (signed char) n;
                            return CHARLIT;}

    /* Other character constant. */
'.'             { yylval.character = yytext[1]; return CHARLIT; }

    /* Any non-blank single-character token. */
.                               { return yytext[0]; }

%%


/*  Avoids warning about `yyunput' being defined but not used.
*/
void BlackHole(void) { yyunput('\0', NULL); }


/*  Returns true if 'str' contains only octal digits and puts the value in 'value'.
    Returns false otherwise; 'value' is then undefined.
*/
bool isOctal(uint64_t &value, const char *str)
{
    value = 0;
    for ( ; *str; ++str)
    {
        if (*str < '0' || *str > '7')  // if not octal digit
            return false;
        value = (value << 3) | (*str - '0');
    }
    return true;
}


// If reader points to ' <num> "<filename>"\n', then move reader past that
// and set global 'lineno' to <num>.
//
bool processPreprocessorLineDirective(char *&reader)
{
    char *r = reader;

    // Pass blanks.
    while (isspace(*r) && *r != '\n')
        ++r;

    // Try to parse a non-negative decimal int.
    char *endptr;
    unsigned long num = strtoul(r, &endptr, 10);
    if ((num == ULONG_MAX && errno) || num > INT_MAX)
        return false;
    r = endptr;

    // Pass blanks.
    while (isspace(*r) && *r != '\n')
        ++r;

    // Expect filename in double quotes.
    if (*r != '\"')
        return false;
    ++r;
    while (*r && *r != '\"')
        ++r;
    if (*r != '\"')
        return false;
    ++r;
    while (isspace(*r) && *r != '\n')
        ++r;
    if (*r != '\n')
        return false;

    // Found expected directive.
    reader = r + 1;
    ::lineno = (int) num;
    return true;
}
