mini_c——利用编译原理实现编译器
mini_c
目标:制作一个简单的mini_c编译器
以下仅部分代码,仅供参考使用
1. 词法分析部分
词法分析这里使用的是有限状态自动机,不多说直接上代码了。
//token define
enum state{start,identifier_accepted,number_accepted,
plus_accepted,minus_accepted,multiply_accepted,
left_bracket_1_accepted,right_bracket_1_accepted,
left_bracket_2_accepted,right_bracket_2_accepted,
left_bracket_3_accepted,right_bracket_3_accepted,
divide,
comment_body,comment_right,comment_accepted,
bigger,big,
smaller,small,
assignment,equal,
un,unequal,
and_accepted,end_accepted,
whitespace_accepted,
keyword_accepted,
error,end_of_file,
dot_accepted,
to_accepted
};
/* accepted token state:
id:identifier_accepted
num:number_accepted
+:plus_accepted
-:minus_accepted
*:multiply_accepted
/:divide
(,):left_bracket_1_accepted,right_bracket_1_accepted
[,]:left_bracket_2_accepted,right_bracket_2_accepted
{,}:left_bracket_3_accepted,right_bracket_3_accepted
>:bigger
<:smaller
>=:big
<=:small
=:assignment
==:equal
!=:unequal
,:and_accepted
;:end_accepted
keyword:keyword_accepted
.:dot_accepted
::to_accepted
*/
//token define
typedef struct Token{
char token_string[1000];
enum state token_state;
} tokenInfo;
//keyword define
const char* keywords[13] = {"else","if","switch","case","default","int","void","struct","return","break","goto","while","for"};
//use to justify whether the character is a letter
int isLetter(char current_char){
if((current_char<='z' && current_char>='a' )||(current_char>='A' && current_char<='Z') ){
return true;
}else{
return false;
}
}
//use to justify whether the character is a number
int isNumber(char current_char){
if(current_char<='9' && current_char>='0'){
return true;
}else{
return false;
}
}
//use to justify whether the token is a keyword
int isKeyword(char id[],int length){
id[length] = '\0';
for(int i=0;i<13;i++){
if(strcmp(id,keywords[i]) == 0){
return true;
}
}
return false;
}
//use to print token string
void printToken(char token[],int length){
for(int i=0;i<length;i++){
//printf("%c",token[i]);
}
token[length] = '\0';
//printf("\n");
}
//use to make token string and token state into token information struct
tokenInfo getTokenInfo(char token[],int length,enum state token_state){
tokenInfo token_info;
token[length] = '\0';
// for(int i=0;i<length;i++){
// //printf("%c",token[i]);
// }
strcpy(token_info.token_string,token);
token_info.token_state = token_state;
return token_info;
}
//use to read a token from the file
tokenInfo lexical_analyzer(FILE* file){
int cnt1 = 0;
int cnt2 = 0;
enum state current_state = start;
char token[1000][1050];
char current_char = getc(file);
while(current_char!=EOF || current_state!=start){
switch(current_state){
case start:{
if(isLetter(current_char)){
current_state = identifier_accepted;
token[cnt1][cnt2++] = current_char;
}
else if(isNumber(current_char)){
current_state = number_accepted;
token[cnt1][cnt2++] = current_char;
}
else{
switch(current_char){
case '+':{
current_state = plus_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '-':{
current_state = minus_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '*':{
current_state = multiply_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '(':{
current_state = left_bracket_1_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case ')':{
current_state = right_bracket_1_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '[':{
current_state = left_bracket_2_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case ']':{
current_state = right_bracket_2_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '{':{
current_state = left_bracket_3_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '}':{
current_state = right_bracket_3_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case '/':{
current_state = divide;
token[cnt1][cnt2++] = current_char;
break;
}
case '>':{
current_state = bigger;
token[cnt1][cnt2++] = current_char;
break;
}
case '<':{
current_state = smaller;
token[cnt1][cnt2++] = current_char;
break;
}
case '=':{
current_state = assignment;
token[cnt1][cnt2++] = current_char;
break;
}
case '!':{
current_state = un;
token[cnt1][cnt2++] = current_char;
break;
}
case ',':{
current_state = and_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case ';':{
current_state = end_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case ' ':
case '\n':
case '\t':{
current_state = whitespace_accepted;
break;
}
case '.':{
current_state = dot_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
case ':':{
current_state = to_accepted;
token[cnt1][cnt2++] = current_char;
break;
}
default:{
//printf("unknown character!!\n");
tokenInfo info = {"\0",error};
//if(!feof(file))
//fseek(file,-1,SEEK_CUR);
return info;
break;
}
}
}
current_char = getc(file);
break;
}
case identifier_accepted:{
if(!isLetter(current_char)){
if(isKeyword(token[cnt1],cnt2)){
current_state = keyword_accepted;
}
else{
//printf("an identifier is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
}
}else{
current_state = identifier_accepted;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
break;
}
case number_accepted:{
if(!isNumber(current_char)){
//printf("a constant is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
}else{
current_state = number_accepted;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
break;
}
case plus_accepted:{
//printf("a plus is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case minus_accepted:{
//printf("a minus is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case multiply_accepted:{
//printf("a multiply is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case left_bracket_1_accepted:{
//printf("a left bracket_1 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case left_bracket_2_accepted:{
//printf("a left bracket_2 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case left_bracket_3_accepted:{
//printf("a left bracket_3 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case right_bracket_1_accepted:{
//printf("a right bracket_1 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case right_bracket_2_accepted:{
//printf("a right bracket_2 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case right_bracket_3_accepted:{
//printf("a right bracket_3 is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case divide:{
if(current_char!='*'){
//printf("a divide is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}else{
current_state = comment_body;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
break;
}
}
case comment_body:{
if(current_char!='*'){
current_state = comment_body;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
break;
}else{
current_state = comment_right;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
break;
}
}
case comment_right:{
if(current_char=='/'){
current_state = comment_accepted;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
break;
}else{
current_state = comment_body;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
break;
}
}
case comment_accepted:{//注释识别但是不返回这个token
//printf("a comment is accepted!\n");
printToken(token[cnt1],cnt2);
cnt2 = 0;
current_state = whitespace_accepted;
break;
}
case bigger:{
if(current_char!='='){
//printf("a bigger is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}else{
current_state = big;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
}
case big:{
//printf("a maybig is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case smaller:{
if(current_char!='='){
//printf("a smaller is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}else{
current_state = small;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
}
case small:{
//printf("a maysmall is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case assignment:{
if(current_char!='='){
//printf("an assignment is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}else{
current_state = equal;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
}
case equal:{
//printf("an equal is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case un:{
if(current_char!='='){
//printf("an un is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}else{
current_state = unequal;
token[cnt1][cnt2++] = current_char;
current_char = getc(file);
}
}
case unequal:{
//printf("an unequal is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case and_accepted:{
//printf("an and is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case end_accepted:{
//printf("an end is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case whitespace_accepted:{//空白符识别但是不返回token
if(current_char == ' ' || current_char == '\n' || current_char == '\t'){
current_state = whitespace_accepted;
current_char = getc(file);
}else{
////printf("a whitespace is accepted!\n");
////printf("\n");
current_state = start;
}
break;
}
case keyword_accepted:{
//printf("a keyword is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case dot_accepted:{
//printf("a dot is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
case to_accepted:{
//printf("a to is accepted!\n");
printToken(token[cnt1],cnt2);
tokenInfo info = getTokenInfo(token[cnt1++],cnt2,current_state);
if(!feof(file))
fseek(file,-1,SEEK_CUR);
return info;
cnt2 = 0;
current_state = start;
break;
}
default:{
break;
}
}
}
if(current_char == EOF){
tokenInfo info = {"",end_of_file};
return info;
}
}
简单总结:
简单来说我的思路就是首先判断token的第一个字符应当属于哪个token,然后再进行一步步转化直到进入token的接收态。
该部分代码最大的缺点是很多代码段其实是重复的,但是我没有进行函数封装。
2. 语法分析部分
语法分析部分采用的是简单消除了左递归的递推文法。(吐槽!老师给的伪代码有很大问题,想要使用得自己更改)
大致的逻辑如下图:
(图太大了等我有时间再传= =)
递推关系式的实现代码如下:
(有些功能函数未在该代码中写出)
((递推关系式和简单解释看注释(注释中英文混杂的原因是一开始编辑器没调好中文全乱码只能写英文T T)))
enum nodeType{functionNode,paramsNode,compoundNode,ifStmtNode,switchStmtNode,whileStmtNode,forStmtNode,//6
returnStmtNode,breakStmtNode,caseStmtNode,defaultStmtNode,addNode,multNode,structNode,//13
IDNode,NUMNode,assignNode,relopNode,arrayNode,funcallNode};//19
enum variable_type{INT,VOID,STRUCT,NEITHER};
typedef struct treeNode{
enum state token_type;
enum nodeType node_type;
char id_string[1000];
struct treeNode *sibling;
/*function[name,params,body]*/
struct treeNode *params;
struct treeNode *body;
/*params[type,id]*/
enum variable_type type;
/*compound[decl,body]*/
struct treeNode *decl;
/*if_stmt[test,then,else]*/
struct treeNode *test;
struct treeNode *then;
struct treeNode *else_stmt;
/*switch_stmt[test,case]*/
struct treeNode *case_stmt;
/*while_stmt[test,body]*/
/*for_stmt[part1,part2,part3,body]*/
struct treeNode *part1;
struct treeNode *part2;
struct treeNode *part3;
/*return_stmt[returnexp]*/
struct treeNode *returnexp;
/*break_stmt[]*/
/*case_stmt[test,body]*/
/*default_stmt[body]*/
/*add/mult[left,right]*/
struct treeNode *left;
struct treeNode *right;
/*assign/relop[lvalue,rvalue]*/
struct treeNode *lvalue;
struct treeNode *rvalue;
/*struct[struct_name,memName]*/
struct treeNode *struct_name;
/*array[array_name,index_exp]*/
struct treeNode *array_name;
struct treeNode *index_exp;
/*funcall[funname,args_list]*/
struct treeNode *fun_name;
struct treeNode *args_list;
/*ID[id]*/
/*NUM[value]*/
int value;
}treeNode;
/*--begin--declaration-list()--*/
/*
program -> declaration-list
declaration-list -> declaration { declaration }
*/
treeNode* declaration_list(){
treeNode *root = declaration();
treeNode *p = root;
while(!feof(file)){
treeNode *q = declaration();
p->sibling = q;
p=q;
}
return root;
}
/*--end--declaration-list()--*/
/*--begin--declaration()--*/
/*
declaration -> variable-declaration | function-declaration
variable-declaration -> type-specifier ID ["[" NUM "]"]
type-specifier -> int | void |struct-specifier
struct-specifier -> struct [ID] ["{" struct-declaration-list "}"]
struct-declaration-list -> int ID ";" { int ID ";"}
function-declaration -> type-specifier ID "(" parameters ")" compound-statement
parameters -> parameter-list | void
parameter-list -> parameter {"," parameter}
parameter -> type-specifier ID ["[" "]"]
*/
treeNode* declaration(){
enum variable_type t = type_specifier();
if(t==NEITHER) return NULL;
treeNode *d = decl_body(t);
return d;
}
enum variable_type type_specifier(){
if(currentToken.token_state == keyword_accepted){
if(keyword_cmp(currentToken.token_string,"int")){
match(keyword_accepted);
return INT;
}else if(keyword_cmp(currentToken.token_string,"void")){
match(keyword_accepted);
return VOID;
}else if(keyword_cmp(currentToken.token_string,"struct")){
match(keyword_accepted);
tokenInfo currentT = currentToken;
addStructType(currentToken.token_string);
//addtype(ID,currentToken.token_string);//unimplemented
if(currentToken.token_state==left_bracket_3_accepted){//struct可以在声明的时候加入变量,所以要测试
//有没有左括号
match(left_bracket_3_accepted);
while(currentToken.token_state!=right_bracket_3_accepted){
match(keyword_accepted);//这个取得是int,因为只有这个类型所以只取这个
tokenInfo tt = match(identifier_accepted);
if(currentToken.token_state==left_bracket_2_accepted){//有可能声明的是一个数组
match(left_bracket_2_accepted);
addStructMemArrayType(tt.token_string,currentT.token_string,getCurrentTokenValue());
tokenInfo tt2 = match(number_accepted);
match(right_bracket_2_accepted);
//addtype(ID,ARRAY,STRUCT_MEMBER,STRUCT_NAME);
printf("struct member %s [%s] created\n",tt.token_string,tt2.token_string);
match(end_accepted);
}else{
//addtype(ID,STRUCT_MEMBER,STRUCT_NAME);
addStructMemType(tt.token_string,currentT.token_string);
printf("struct member %s created\n",tt.token_string);
match(end_accepted);
}
}
match(right_bracket_3_accepted);
return STRUCT;
}
}else{
return NEITHER;
}
}
return NEITHER;
}
treeNode* decl_body(enum variable_type t){
treeNode *p = NULL;
tokenInfo tt = match(identifier_accepted);//考虑到不管是变量还是函数都有ID,直接取ID
//addtype(ID);//由于不知道这里ID是什么类型,所以先创一个ID
//printf("currentToken:%s\n",currentToken.token_string);
switch(currentToken.token_state){
case left_bracket_1_accepted:{
p = fun_decl(t,tt.token_string);//这里考虑再传个ID名字进去
break;
}
case end_accepted:{
//addtype(ID,ID_NAME);
addIdType(tt.token_string);
printf("int %s created\n",tt.token_string);
match(end_accepted);
break;
}
case left_bracket_2_accepted:{
match(left_bracket_2_accepted);
addArrayType(tt.token_string,getCurrentTokenValue());
tokenInfo tt2 = match(number_accepted);
match(right_bracket_2_accepted);
match(end_accepted);
//addtype(ID,ARRAY_NAME,NUM);
printf("array %s[%s] created\n",tt.token_string,tt2.token_string);
break;
}
default:{
//printf("decl_body error");
break;
}
}
return p;
}
treeNode* fun_decl(enum variable_type t,char name[]){
treeNode *p=NULL;
//addtype(ID,FUNCTION_NAME);
//addFunType(name);
printf("function %s() created\n",name);
p=new_node();
p->node_type=functionNode;
printf("treeNode function has created\n");
strcpy(p->id_string,name);
match(left_bracket_1_accepted);
p->params = param_list();
match(right_bracket_1_accepted);
p->body = compound_stmt();
return p;
}
treeNode* param_list(){
treeNode *t=NULL,*p=NULL,*root=NULL;
while(currentToken.token_state!=right_bracket_1_accepted){
t=new_node();
t->node_type=paramsNode;
t->type = type_specifier();
tokenInfo tt = match(identifier_accepted);
strcpy(t->id_string,tt.token_string);
if(currentToken.token_state==left_bracket_2_accepted){
match(left_bracket_2_accepted);
match(right_bracket_2_accepted);
//addtype(ID,REFERENCE);
addReferencedType(tt.token_string);
printf("parameter %s[] created\n",tt.token_string);
}else{
addIdType(tt.token_string);
printf("parameter %s created\n",tt.token_string);
}
if(root==NULL){
root=t;
}
if(p!=NULL){
p->sibling = t;
}
p=t;
if(currentToken.token_state!=right_bracket_1_accepted){
match(and_accepted);
}
}
return root;
}
/*--end--declaration()--*/
/*--begin--compound_stmt()--*/
/*
compound-statement -> "{" [local-declarations] [statment-list] "}"
local-declarations -> {variable-declaration}
statement-list -> {statement}
*/
treeNode* compound_stmt(){
treeNode *f=NULL;
//if(currentToken.token_state==left_bracket_3_accepted){
match(left_bracket_3_accepted);
//建个新的域
typeList *newTypeList = new_list_node();
list->sibling = newTypeList;
list = newTypeList;
f = new_node();
f->node_type=compoundNode;
printf("treeNode compound is created\n");
while(currentToken.token_state==keyword_accepted && (keyword_cmp(currentToken.token_string,"int")||keyword_cmp(currentToken.token_string,"struct"))){
f->decl = declaration();
}
//printf("stmt_list entered\n");
f->body = stmt_list();
//printf("fun_body:current:%s\n",currentToken.token_string);
match(right_bracket_3_accepted);
/*}else{
f = new_node();
printf("treeNode compound is created\n");
f->decl = declaration();
//printf("stmt_list entered\n");
f->body = stmt_list();
}
*/
return f;
}
treeNode* stmt_list(){
//int cnt = 1;
treeNode *t = statement();
treeNode *r = t;
//printf("stmt_list:current:%s\n",currentToken.token_string);
while(currentToken.token_state!=right_bracket_3_accepted){
treeNode *q = statement();
//printf("%d\n",cnt++);
t->sibling = q;
t=q;
}
return r;
}
/*--end--compound_stmt()--*/
/*--begin--statement()--*/
/*
statement -> compound-statement | expression-statement |
selection-statement | labeled-statement |
iteration-statement | jump-statement
expression-statement -> [expression]
*/
treeNode* statement(){
treeNode *t=NULL;
//printf("statement entered\n\n\n\n");
//printf("current:%s\n", currentToken.token_string);
switch(currentToken.token_state){
case keyword_accepted:{
if(keyword_cmp(currentToken.token_string,"if")||keyword_cmp(currentToken.token_string,"switch")){
t=selection_stmt();
}else if(keyword_cmp(currentToken.token_string,"while")||keyword_cmp(currentToken.token_string,"for")){
t=iteration_stmt();
}else if(keyword_cmp(currentToken.token_string,"return")||keyword_cmp(currentToken.token_string,"break")){
t=jump_stmt();
}else if(keyword_cmp(currentToken.token_string,"case")||keyword_cmp(currentToken.token_string,"default")){
t=labeled_stmt();
}else{
printf("keyword error\n");
//t = compound_stmt();
}
break;
}
case left_bracket_3_accepted:{
t=compound_stmt();
break;
}
default:{
//printf("exp_stmt entered\n");
t=exp_stmt();
break;
}
}
return t;
}
treeNode* exp_stmt(){
treeNode *t=NULL;
if(currentToken.token_state!=end_accepted){
t=expression();
}
match(end_accepted);
return t;
}
/*--end--statement()--*/
/*--begin--selection_stmt()--*/
/*
selection-statement -> if "(" expression ")" statement [else statement] |
switch "(" expression ")" statement
*/
treeNode* selection_stmt(){
treeNode *t = new_node();
switch(currentToken.token_state){
case keyword_accepted:{
if(keyword_cmp(currentToken.token_string,"if")){
printf("treeNode if has created\n");
t->node_type=ifStmtNode;
match(keyword_accepted);
match(left_bracket_1_accepted);
t->test = expression();
match(right_bracket_1_accepted);
t->then = statement();
if(keyword_cmp(currentToken.token_string,"else")){
match(keyword_accepted);
t->else_stmt = statement();
}
}else if(keyword_cmp(currentToken.token_string,"switch")){
printf("treeNode switch has created\n");
t->node_type=switchStmtNode;
match(keyword_accepted);
match(left_bracket_1_accepted);
t->test = expression();
match(right_bracket_1_accepted);
t->case_stmt = statement();
}else{
printf("keyword error");
}
break;
}
default:{
printf("selection error");
break;
}
}
return t;
}
/*--end--selection_stmt()--*/
/*--begin--iteration_stmt()--*/
/*
iteration-statement -> while "(" expression ")" statement |
for "(" [expression] ";" [expression] ";" [expression] ")" statement
*/
treeNode* iteration_stmt(){
treeNode *t = new_node();
switch(currentToken.token_state){
case keyword_accepted:{
if(keyword_cmp(currentToken.token_string,"while")){
printf("treeNode while has created\n");
t->node_type=whileStmtNode;
//printf("------------");
match(keyword_accepted);
match(left_bracket_1_accepted);
t->test = expression();
match(right_bracket_1_accepted);
t->body = statement();
}else if(keyword_cmp(currentToken.token_string,"for")){
printf("treeNode for has created\n");
t->node_type=forStmtNode;
match(keyword_accepted);
match(left_bracket_1_accepted);
t->part1 = expression();
match(end_accepted);
t->part2 = expression();
match(end_accepted);
t->part3 = expression();
match(right_bracket_1_accepted);
t->body = statement();
}else{
printf("keyword error");
}
break;
}
default:{
printf("iteration error");
break;
}
}
return t;
}
/*--end--iteration_stmt()--*/
/*--begin--jump_stmt()--*/
/*
jump-statement -> return [expression] ";" |
break ";"
*/
treeNode* jump_stmt(){
treeNode *t = new_node();
switch(currentToken.token_state){
case keyword_accepted:{
if(keyword_cmp(currentToken.token_string,"return")){
printf("treeNode return has created\n");
t->node_type=returnStmtNode;
match(keyword_accepted);
t->returnexp = expression();
match(end_accepted);
}else if(keyword_cmp(currentToken.token_string,"break")){
t->node_type=breakStmtNode;
match(keyword_accepted);
printf("treeNode break has created\n");
match(end_accepted);
}else{
printf("keyword error");
}
break;
}
default:{
printf("jump error");
break;
}
}
//printf("------------------\n");
//printf("current:%s",currentToken.token_string);
return t;
}
/*--end--jump_stmt()--*/
/*--begin--labeled_stmt()--*/
/*
labeled-statement -> case conditional-expression ':' statement |
default ":" statement
*/
treeNode* labeled_stmt(){
treeNode *t=new_node();
switch(currentToken.token_state){
case keyword_accepted:{
if(keyword_cmp(currentToken.token_string,"case")){
printf("treeNode case has created\n");
t->node_type=caseStmtNode;
match(keyword_accepted);
t->test = add_exp();
match(to_accepted);
t->body = statement();
}else if(keyword_cmp(currentToken.token_string,"default")){
printf("treeNode default has created\n");
t->node_type=defaultStmtNode;
match(keyword_accepted);
match(to_accepted);
t->body = statement();
}else{
printf("keyword error");
}
break;
}
default:{
printf("labeled error");
break;
}
}
return t;
}
/*--end--labeled_stmt()--*/
/*--begin--expression()==*/
/*
expression -> assighment-expression | conditional-expression
assignment-expression -> variavle "=" expression
variable -> ID [ "[" NUM "]" | "."ID]
conditional-expression -> additive-expression [relation-operator additive-expression]
relation-operator -> "<=" | ">=" | "<" | ">" | "==" | "!="
additive-expression -> multiplicative-expression {add-operator multiplicative-expression}
add-operator -> "+" | "-"
multiplicative-expression -> primary-expression {mul-operator primary-expression}
mul-operator -> "*" | "/"
primary-expression -> variable | NUM | "(" expression ")" | call-function
call-function -> ID "(" [argument-list] ")"
argument-list -> expression { "," expression}
*/
treeNode* expression(){
treeNode *p = add_exp();
treeNode *t = NULL;
//printf("expression:current:%s\n",currentToken.token_string);
switch(currentToken.token_state){
case assignment :{
match(assignment);
t = new_node();
printf("treeNode assign has created\n");
t->node_type=assignNode;
t->lvalue = p;
t->rvalue = expression();
break;
}
case bigger:
case smaller:
case big:
case small:
case equal:
case unequal:{
t = new_node();
t->token_type = currentToken.token_state;
match(currentToken.token_state);
printf("treeNode relop has created\n");
t->node_type=relopNode;
t->lvalue = p;
t->rvalue = add_exp();
break;
}
default:{
t = p;
}
}
return t;
}
treeNode* add_exp(){
treeNode *t = mul_exp();
while(currentToken.token_state==plus_accepted || currentToken.token_state == minus_accepted){
treeNode *newtemp= new_node();
newtemp->token_type = currentToken.token_state;
match(currentToken.token_state);
printf("treeNode add has created\n");
newtemp->node_type=addNode;
newtemp->left = t;
newtemp->right = mul_exp();
t = newtemp;
}
return t;
}
treeNode* mul_exp(){
treeNode *t = postfix_exp();
while(currentToken.token_state==multiply_accepted || currentToken.token_state == divide){
treeNode *newtemp= new_node();
newtemp->token_type = currentToken.token_state;
match(currentToken.token_state);
printf("treeNode mult has created\n");
newtemp->node_type=multNode;
newtemp->left = t;
newtemp->right = postfix_exp();
t = newtemp;
}
return t;
}
treeNode* postfix_exp(){
treeNode *t = primary_exp();
switch(currentToken.token_state){
case dot_accepted:{
match(dot_accepted);
treeNode *p = new_node();
printf("treeNode struct has created\n");
p->node_type=structNode;
p->struct_name = t;
strcpy(p->id_string,currentToken.token_string);
match(identifier_accepted);
t = p;
break;
}
case left_bracket_2_accepted:{
match(left_bracket_2_accepted);
treeNode *p = new_node();
printf("treeNode array has created\n");
p->node_type=arrayNode;
p->array_name = t;
p->index_exp = expression();
match(right_bracket_2_accepted);
t=p;
break;
}
case left_bracket_1_accepted:{
match(left_bracket_1_accepted);
treeNode *p = new_node();
printf("treeNode funcall has created\n");
p->node_type=funcallNode;
p->fun_name = t;
p->args_list = optional_args();
match(right_bracket_1_accepted);
t = p;
break;
}
default:{
//printf("error");
break;
}
}
return t;
}
treeNode* primary_exp(){
treeNode *t = NULL;
switch(currentToken.token_state){
case left_bracket_1_accepted:{
match(left_bracket_1_accepted);
t = expression();
match(right_bracket_1_accepted);
break;
}
case identifier_accepted:{
t = new_node();
printf("treeNode ID %s has created\n",currentToken.token_string);
t->node_type=IDNode;
strcpy(t->id_string,currentToken.token_string);
match(identifier_accepted);
break;
}
case number_accepted:{
t = new_node();
printf("treeNode NUM has created\n");
t->node_type=NUMNode;
t->token_type = number_accepted;
t->value = getCurrentTokenValue();
match(number_accepted);
break;
}
default:{
printf("primary_exp error");
break;
}
}
return t;
}
treeNode* optional_args(){
treeNode *r = expression();
treeNode *t = r;
while(currentToken.token_state!= right_bracket_1_accepted){
treeNode *p = expression();
t->sibling = p;
t = p ;
match(and_accepted);
}
return r;
}
/*--end--expression()--*/
简单总结:
递推关系式本身其实没太大问题,但是按照这个递推关系式得出来的文法相较于C语言文法会有一些限制:必须将变量全部申明完成之后才可以进行语句,且变量无法初始化。
(懒得删测试注释了= =)
3. 语义分析
在此只建立了一个变量表。
变量表是由一个域链表和域中变量名表组成的,变量名表使用的是哈希表的方式存储的。
enum typeType{idType,arrayType,funType,structType,structMemType,referencedType,structMemArrayType};
typedef struct TypeNode{
enum typeType thisType;
char type_id[1000];
struct TypeNode *sibling;
/*arrayType*/
int type_size;
/*structType*/
char parent_struct_name[1000];
int value;
} typeNode;
typedef struct TypeList{
struct TypeList *sibling;
typeNode *localTypeList[101];//这里用质数101来算
} typeList;
//这个是用来加到变量表里面的
void addIdType(char id[]){//id
int thisHashValue = getHashValue(id);
//printf("%d\n\n",thisHashValue);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = idType;
strcpy(newNode->type_id,id);
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addStructType(char id[]){//struct
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = structType;
strcpy(newNode->type_id,id);
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addFunType(char id[]){
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = funType;
strcpy(newNode->type_id,id);
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addReferencedType(char id[]){
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = referencedType;
strcpy(newNode->type_id,id);
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addArrayType(char id[],int num){
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = arrayType;
strcpy(newNode->type_id,id);
newNode->type_size = num;
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addStructMemType(char id[],char parentId[]){
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = structMemType;
strcpy(newNode->type_id,id);
strcpy(newNode->parent_struct_name,parentId);
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
void addStructMemArrayType(char id[],char parentId[],int num){
int thisHashValue = getHashValue(id);
typeNode *node = list->localTypeList[thisHashValue];
typeNode *newNode = new_type_node();
newNode->thisType = structMemType;
strcpy(newNode->type_id,id);
strcpy(newNode->parent_struct_name,parentId);
newNode->type_size = num;
//插到链表里面
if(node==NULL) node = newNode;
else{
newNode->sibling = node->sibling;
node->sibling = newNode;
}
}
自我反思:
语义分析其实没仔细看。。。所以只是做了一个简单的变量表,而且自认为是有错误的,比如:域是链表形式的,只能顺序读= =
而且这个代码的重复度。。。非常高!完全可以一个函数解决的,只是偷懒不想整合了。。ヽ(≧□≦)ノ