Skip to content

Commit 32bcf03

Browse files
committed
Implemented section 1.5 - associating a token to the table entry. Added error messages for overflow integer and floats. Some other minor changes to main.h.
1 parent 009a5c3 commit 32bcf03

File tree

7 files changed

+93
-69
lines changed

7 files changed

+93
-69
lines changed

include/cc_dict.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void free_comp_dict_item_t(comp_dict_item_t* item);
5151
* */
5252
comp_dict_item_t* symbols_table_add(
5353
const char* key,
54-
int line,
54+
int line_number,
5555
int token_type,
5656
const char* token_value,
5757
comp_dict_t* table);
@@ -74,6 +74,6 @@ void symbols_table_finalize(comp_dict_t* table);
7474
/* given a text and the token type, computes the value corresponding to that
7575
* text, puts it in an allocated void*, and returns it.
7676
* see comp_dict_item_t::value for more info. */
77-
void* interpret_token_value(const char* text, int token_type);
77+
void* interpret_token_value(const char* text, int token_type, int line_number);
7878

7979
#endif

include/cc_misc.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define __MISC_H
33
#include <stdio.h>
44

5+
struct comp_dict_item_t;
6+
57
int getLineNumber (void);
68

79
void yyerror (char const *mensagem);
@@ -12,7 +14,7 @@ void main_finalize (void);
1214

1315
/* receives a token, and, if it needs to be added to the symbols table, add it.
1416
* return its token id at the end. */
15-
int recognize_token(const char* token_text, int token_id);
17+
int recognize_token(int token_id);
1618

1719
/* given a lexeme and it's type, computes the key that will be used for that
1820
* lexeme in the symbols table, and returns a pointer to it. note: the string

include/main.h

+9-6
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,14 @@ extern int yylex(void);
2626
Constantes a serem utilizadas para diferenciar os lexemas que estão
2727
registrados na tabela de símbolos.
2828
*/
29-
#define SIMBOLO_LITERAL_INT 1
30-
#define SIMBOLO_LITERAL_FLOAT 2
31-
#define SIMBOLO_LITERAL_CHAR 3
32-
#define SIMBOLO_LITERAL_STRING 4
33-
#define SIMBOLO_LITERAL_BOOL 5
34-
#define SIMBOLO_IDENTIFICADOR 6
29+
enum {
30+
SIMBOLO_LITERAL_INT = 1,
31+
SIMBOLO_LITERAL_FLOAT,
32+
SIMBOLO_LITERAL_CHAR,
33+
SIMBOLO_LITERAL_STRING,
34+
SIMBOLO_LITERAL_BOOL,
35+
SIMBOLO_IDENTIFICADOR,
36+
SIMBOLO_ERRO
37+
};
3538

3639
#endif

parser.y

+6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11

22
%{
33
#include <stdio.h>
4+
#include "cc_dict.h" /* we must add this so we can specify comp_dict_item_t
5+
* in yylval's union */
46
%}
57

68
/* Declaração dos tokens da linguagem */
@@ -34,6 +36,10 @@
3436
%token TK_IDENTIFICADOR
3537
%token TOKEN_ERRO
3638

39+
%union {
40+
comp_dict_item_t* valor_simbolo_lexico;
41+
}
42+
3743
%left TK_OC_OR TK_OC_AND
3844
%nonassoc TK_OC_LE TK_OC_GE TK_OC_EQ TK_OC_NE '<' '>'
3945
%left '+' '-'

scanner.l

+31-29
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Integrantes: Alex Gliesch, Leonardo Tagliaro, Marina Fortes Rey */
33

44
%{
5+
6+
#include "cc_dict.h"
57
#include "parser.h" /* arquivo automaticamente gerado pelo bison */
68
#include "cc_misc.h" /* for the recognize_token function. */
79
int lineCounter = 1;
@@ -32,34 +34,34 @@ specialchar [,|;|:|(|)|\[|\]|{|}|+|\-|*|/|<|>|=|!|&|$]
3234
<comment>"*"+"/" BEGIN(INITIAL);
3335
"//".* ;
3436
"\n" ;lineCounter++;
35-
"false" return recognize_token(yytext, TK_LIT_FALSE);
36-
"true" return recognize_token(yytext, TK_LIT_TRUE);
37-
"int" return recognize_token(yytext, TK_PR_INT);
38-
"float" return recognize_token(yytext, TK_PR_FLOAT);
39-
"bool" return recognize_token(yytext, TK_PR_BOOL);
40-
"char" return recognize_token(yytext, TK_PR_CHAR);
41-
"string" return recognize_token(yytext, TK_PR_STRING);
42-
"if" return recognize_token(yytext, TK_PR_IF);
43-
"then" return recognize_token(yytext, TK_PR_THEN);
44-
"else" return recognize_token(yytext, TK_PR_ELSE);
45-
"while" return recognize_token(yytext, TK_PR_WHILE);
46-
"do" return recognize_token(yytext, TK_PR_DO);
47-
"input" return recognize_token(yytext, TK_PR_INPUT);
48-
"output" return recognize_token(yytext, TK_PR_OUTPUT);
49-
"return" return recognize_token(yytext, TK_PR_RETURN);
50-
"const" return recognize_token(yytext, TK_PR_CONST);
51-
"static" return recognize_token(yytext, TK_PR_STATIC);
52-
"<=" return recognize_token(yytext, TK_OC_LE);
53-
">=" return recognize_token(yytext, TK_OC_GE);
54-
"==" return recognize_token(yytext, TK_OC_EQ);
55-
"!=" return recognize_token(yytext, TK_OC_NE);
56-
"&&" return recognize_token(yytext, TK_OC_AND);
57-
"||" return recognize_token(yytext, TK_OC_OR);
58-
{integer} return recognize_token(yytext, TK_LIT_INT);
59-
{floatamount} return recognize_token(yytext, TK_LIT_FLOAT);
60-
{char} return recognize_token(yytext, TK_LIT_CHAR);
61-
{string} return recognize_token(yytext, TK_LIT_STRING);
62-
{specialchar} return recognize_token(yytext, (int) yytext[0]);
63-
{identifier} return recognize_token(yytext, TK_IDENTIFICADOR);
37+
"false" return recognize_token(TK_LIT_FALSE);
38+
"true" return recognize_token(TK_LIT_TRUE);
39+
"int" return recognize_token(TK_PR_INT);
40+
"float" return recognize_token(TK_PR_FLOAT);
41+
"bool" return recognize_token(TK_PR_BOOL);
42+
"char" return recognize_token(TK_PR_CHAR);
43+
"string" return recognize_token(TK_PR_STRING);
44+
"if" return recognize_token(TK_PR_IF);
45+
"then" return recognize_token(TK_PR_THEN);
46+
"else" return recognize_token(TK_PR_ELSE);
47+
"while" return recognize_token(TK_PR_WHILE);
48+
"do" return recognize_token(TK_PR_DO);
49+
"input" return recognize_token(TK_PR_INPUT);
50+
"output" return recognize_token(TK_PR_OUTPUT);
51+
"return" return recognize_token(TK_PR_RETURN);
52+
"const" return recognize_token(TK_PR_CONST);
53+
"static" return recognize_token(TK_PR_STATIC);
54+
"<=" return recognize_token(TK_OC_LE);
55+
">=" return recognize_token(TK_OC_GE);
56+
"==" return recognize_token(TK_OC_EQ);
57+
"!=" return recognize_token(TK_OC_NE);
58+
"&&" return recognize_token(TK_OC_AND);
59+
"||" return recognize_token(TK_OC_OR);
60+
{integer} return recognize_token(TK_LIT_INT);
61+
{floatamount} return recognize_token(TK_LIT_FLOAT);
62+
{char} return recognize_token(TK_LIT_CHAR);
63+
{string} return recognize_token(TK_LIT_STRING);
64+
{specialchar} return recognize_token((int) yytext[0]);
65+
{identifier} return recognize_token(TK_IDENTIFICADOR);
6466
. return TOKEN_ERRO;
6567
%%

src/cc_dict.c

+20-11
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,21 @@ void free_comp_dict_item_t(comp_dict_item_t* item) {
1919
/* The global symbols table defined in the program. */
2020
comp_dict_t symbols_table = NULL;
2121

22-
comp_dict_item_t* symbols_table_add(const char* key, int line,
22+
comp_dict_item_t* symbols_table_add(const char* key, int line_number,
2323
int token_type, const char* token_value, comp_dict_t* table) {
2424

2525
comp_dict_item_t* item = symbols_table_find(key, table);
2626

2727
if (item != NULL) {
2828
/* an item with the same key is already in the symbols table.
2929
* we have to update the line where it appeared and the value. */
30-
item->line_where_it_last_appeared = line;
30+
item->line_where_it_last_appeared = line_number;
3131

3232
if (item->value != NULL)
3333
free(item->value);
3434

35-
item->value = interpret_token_value(token_value, token_type);
35+
item->value = interpret_token_value(token_value, token_type,
36+
line_number);
3637

3738
/* note that we don't have to change the token_type or worry about
3839
* the value's type, since, if the token_type were different,
@@ -45,8 +46,8 @@ comp_dict_item_t* symbols_table_add(const char* key, int line,
4546

4647
item->token = (const char*) malloc((strlen(key) + 1) * sizeof(char));
4748
strcpy((char*)item->token, key);
48-
item->line_where_it_last_appeared = line;
49-
item->value = interpret_token_value(token_value, token_type);
49+
item->line_where_it_last_appeared = line_number;
50+
item->value = interpret_token_value(token_value, token_type, line_number);
5051
item->token_type = token_type;
5152

5253
HASH_ADD_KEYPTR(hh, *table, item->token, strlen(item->token), item);
@@ -82,42 +83,50 @@ int symbols_table_count(comp_dict_t* table) {
8283
return HASH_COUNT(*table);
8384
}
8485

85-
void* interpret_token_value(const char* text, int token_type) {
86+
void* interpret_token_value(const char* text, int token_type, int line_number) {
87+
//printf("text %s interpreted as %d -> ", text, token_type);
8688
void* value = NULL;
8789
if (token_type == SIMBOLO_LITERAL_INT) {
8890
value = malloc(sizeof(int));
8991
*((int*)value) = strtol(text, NULL, 10);
9092
if (errno == ERANGE) {
91-
/* int value is off integer limits, defaults to 2^31 - 1.
92-
* should we print an error message here?*/
93+
/* int value is off integer limits, defaults to 2^31 - 1. */
94+
printf("warning: line %d: integer value %s is off limits. "
95+
"defaulting to %d\n", line_number, text, *((int*)value));
9396
}
97+
//printf("%d\n", *((int*)value));
9498
} else if (token_type == SIMBOLO_LITERAL_CHAR) {
9599
value = malloc(sizeof(char));
96100
*((char*)value) = text[0];
101+
//printf("%c\n", *((char*)value));
97102
} else if (token_type == SIMBOLO_LITERAL_BOOL) {
98103
value = malloc(sizeof(int));
99104
*((int*)value) = strcmp(text, "false"); /* strcmp returns 0 if strings
100105
are equal, and different than 0
101106
otherwise. so, if it's false, we'll get
102107
0 anyway :) */
108+
//printf("%d\n", *((int*)value));
103109
} else if (token_type == SIMBOLO_LITERAL_FLOAT) {
104110
value = malloc(sizeof(float));
105111
*((float*)value) = strtof(text, NULL);
106112
if (errno == ERANGE) {
107-
/* float value overflow. should we print an error message? */
113+
/* float value overflow. */
114+
printf("warning: line %d: float value %s is off limits. "
115+
"defaulting to %.2f\n", line_number, text, *((float*)value));
108116
}
117+
//printf("%f\n", *((float*)value));
109118
} else if (token_type == SIMBOLO_LITERAL_STRING) {
110119
value = malloc((strlen(text) + 1) * sizeof(char));
111120
strcpy(value, text);
121+
//printf("%s\n", (char*)value);
112122
} else if (token_type == SIMBOLO_IDENTIFICADOR) {
113123
/* if the token is an identifier, it has no value associated to it
114124
* for now. */
115125
value = NULL;
116126
} else {
117127
/* error. */
118-
printf("invalid token type (%d) passed to interpret_token_value().\n",
128+
printf("invalid token type (%d) passed to interpret_token_value().\n",
119129
token_type);
120-
//assert(0);
121130
value = NULL;
122131
}
123132
return value;

src/cc_misc.c

+22-20
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ extern int lineCounter; /* lineCounter is declared in scanner.c,
1010
extern comp_dict_t symbols_table; /* the compiler's symbols table, declared
1111
in cc_dict.c */
1212

13+
extern YYSTYPE yylval;
14+
extern char* yytext;
15+
1316
int getLineNumber(void) {
1417
return lineCounter;
1518
}
@@ -27,7 +30,9 @@ void main_finalize(void) {
2730
symbols_table_finalize(&symbols_table);
2831
}
2932

30-
int recognize_token(const char* token_text, int token_id) {
33+
int recognize_token(int token_id) {
34+
const char* token_text = yytext;
35+
yylval.valor_simbolo_lexico = NULL;
3136

3237
/* add the lexeme to symbols table only if it is a literal or an
3338
* identificator. */
@@ -62,20 +67,30 @@ int recognize_token(const char* token_text, int token_id) {
6267
default:
6368
printf("Invalid token id (%d) given to "
6469
"compute_symbols_table_key()!\n", token_id);
65-
//assert(0);
70+
}
71+
72+
/* remove double and single quotes if token is a string or
73+
* char literal */
74+
if (token_type_id == SIMBOLO_LITERAL_STRING ||
75+
token_type_id == SIMBOLO_LITERAL_CHAR) {
76+
token_text = remove_quotes(token_text);
6677
}
6778

6879
/* get the key that refers to that lexeme: */
6980
char* symbols_table_key = compute_symbols_table_key(token_text,
7081
token_type_id);
7182

7283
/* add the lexeme to the symbols table. */
73-
symbols_table_add(symbols_table_key, /* key */
74-
getLineNumber(), /* line number */
75-
token_type_id, /* type of token */
76-
token_text, /* token value*/
77-
&symbols_table);
84+
yylval.valor_simbolo_lexico = symbols_table_add(symbols_table_key,
85+
getLineNumber(), token_type_id, token_text, &symbols_table);
86+
7887
free((void*)symbols_table_key);
88+
89+
/* deallocate token_text here, because we created a new string when we
90+
* removed quotes.*/
91+
if (token_type_id == SIMBOLO_LITERAL_STRING ||
92+
token_type_id == SIMBOLO_LITERAL_CHAR)
93+
free((void*)token_text);
7994
}
8095

8196
/* then, return the token's identifier. */
@@ -91,13 +106,6 @@ char* remove_quotes(const char* token_text) {
91106
}
92107

93108
char* compute_symbols_table_key(const char* token_text, int token_type_id) {
94-
/* remove double and single quotes if token is a string or
95-
* char literal */
96-
if (token_type_id == SIMBOLO_LITERAL_STRING ||
97-
token_type_id == SIMBOLO_LITERAL_CHAR) {
98-
//assert(token_text[0] == '"' || token_text[0] == '\'');
99-
token_text = remove_quotes(token_text);
100-
}
101109

102110
/* here, we safely assume that the token_type_id's number will never
103111
* have more more than 20 characters, which is the maximum length of
@@ -109,11 +117,5 @@ char* compute_symbols_table_key(const char* token_text, int token_type_id) {
109117
* an identifier ('$$') so no conflicts arise */
110118
sprintf(symbols_table_key, "%d $$ %s", token_type_id, token_text);
111119

112-
/* deallocate token_text here, because we created a new string when we
113-
* removed quotes.*/
114-
if (token_type_id == SIMBOLO_LITERAL_STRING ||
115-
token_type_id == SIMBOLO_LITERAL_CHAR)
116-
free((void*)token_text);
117-
118120
return symbols_table_key;
119121
}

0 commit comments

Comments
 (0)