词法分析器 C++实现

#include<bits/stdc++.h>
#include<cstring>
using namespace std;
//保留字表 
static char reserve[32][20] = {
    "auto", "break", "case", "char", "const", "continue",
    "default", "do", "double", "else", "enum", "extern",
    "float", "for", "goto", "if", "int", "long",
    "register", "return", "short", "signed", "sizeof", "static",
    "struct", "switch", "typedef", "union", "unsigned", "void",
    "volatile", "while"
};

//界运算符表 
static char operatorLimit[36][10] = {
    "+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
    "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
    "&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
    "}", "\\", ".", "\?", ":", "!"
};

char IdenOperator[10000][50] = {""};

/********查找保留字*****************/
int searchRev(char reserveWord[][20], char s[])
{
    for (int i = 0; i < 32; i++)
    {
        if (strcmp(reserveWord[i], s) == 0)
        {//若成功查找,则返回种别码
            return i + 1;//返回种别码
        }
    }
    return -1;//否则返回-1,代表查找不成功,即为标识符
}
/*编译预处理*/
void Filter(char r[],int source)
{
	char cmptString[100000];
	int count = 0;
	for(int i = 0 ; i <= source ;i++)
	{
		if(r[i] == '/'&&r[i+1] == '/')
		{
			while(r[i] != '\n')
			{
				i++;
			}
		}
		if(r[i] =='/'&&r[i+1] == '*')
		{
			i+=2;
			while(r[i] !='*'||r[i+1]!='/')
			{
				i++;
				if (r[i] == '$')
                {
                    printf("注释出错,没有找到 */,程序结束!!!\n");
                    exit(0);
                }
			}
			i+=2;
		}
		if(r[i] != '\n'&&r[i] !='\t'&&r[i]!='\v'&&r[i]!='\r')
		cmptString[count++] = r[i];
	}
	cmptString[count] = '\0';
	strcpy(r,cmptString);
}
//扫描一个单词或者符号
void Scan(int &lab, char r[], char GetString[], int &source)
{
	int i,count = 0;
	char ch;
	ch = r[source];
	//识别空格 
	while(ch == ' ')
	{
		source++;
		ch = r[source];
	}
	
	for(int i = 0 ; i < 50 ; i++)
	{
		GetString[i] = '\0';
	} 
	
	if(isalpha(r[source]))
	{		//开头为字母 
		GetString[count++] = r[source];
		source++;
		while(isalpha(r[source])||isdigit(r[source]))
		{
			GetString[count++] = r[source];
			source++;
		}
		
		GetString[count] = '\0';
		
		lab = searchRev(reserve,GetString);
		if(lab == -1)
		{//若不是保留字,则是标识符 
			lab = 100;
		}
		return; 
	}
	else if(isdigit(r[source]))
	{
		while(isdigit(r[source]))
		{
			GetString[count++] = r[source++];
		}
		GetString[count] = '\0';
		lab = 99;
	}
	else if(ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')' || ch == '^'
        || ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%' || ch == '['
        || ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?' || ch == ':')
    {
    	GetString[0] = r[source];
    	GetString[1] = '\0';
    	for(int i = 0 ; i < 36 ; i++)
    	{
    		if(strcmp(GetString,operatorLimit[i]) == 0)
    		{
    			lab = 33+i;
    			break;
			}
		}
		source++;
		return;
	}
	else if(r[source] =='<')
	{
		source++;
		if(r[source] == '=')
		{
			lab = 38;
		}
		else if(r[source] == '<')
		{
			source--;
			lab = 58;
		}
		else 
		{
			source--;
			lab = 37;
		}
		source++;
		return;
	}  
	else if(r[source] =='>')
	{
		source++;
		if(r[source] == '=')
		{
			lab = 40;
		}
		else if(r[source] == '>')
		{
			lab = 59;
		}
		else 
		{
			source--;
			lab = 39;
		}
		source++;
		return;
	}
	else if(r[source] == '=')
	{
		source++;
		if(r[source] == '=')
		{
			lab = 42;
		}
		else 
		{
			source--;
			lab = 41;
		}
		source++;
		return;
	}
	else if(r[source] =='!')
	{
		source++;
		if (r[source] == '=')
        {
            lab = 43;
        }
        else
        {
            lab = 68;
            source--;
        }
        source++;
        return;
	}
	else if(r[source] == '&')
    {//&,&&
        source++;
        if (r[source] == '&')
        {
            lab = 53;
        }
        else
        {
            source--;
            lab = 52;
        }
        source++;
        return;
    }
    else  if (r[source] == '|')
    {
        source++;
        if (r[source] == '|')
        {
            lab = 55;
        }
        else
        {
            source--;
            lab = 54;
        }
        source++;
        return;
    }
    else  if (r[source] == '$')
    {//结束符
        lab = 0;//种别码为0
    }
    else
    {//不能被以上词法分析识别,则出错。
        printf("error:there is no exist %c \n", ch);
        exit(0);
    }
}

int main()
{
	char ResourceString[100000];
	char GetString[50] = {0};
	int lab = -1,i;//初始化 
	int source = 0;//源程序指针
	FILE *fp,*fp1,*fp2;
	if ((fp = fopen("c:\\in.txt", "r")) == NULL)
    {//打开源程序
        cout << "can't open this file";
        exit(0);
    }
	ResourceString[source] = fgetc(fp);    
	while(ResourceString[source] != '$')
	{
		source++;
		ResourceString[source] = fgetc(fp);
	}
	fclose(fp);
	ResourceString[++source] = '\0';
	//过滤 
	Filter(ResourceString,source);
	 cout << endl << "过滤之后的程序:" << endl;
    cout << ResourceString << endl;
	if((fp1 = fopen("C://out.txt","w+")) == NULL)
	{
		printf("打开失败\n");
		exit(0);
	} 
	source = 0;
	while(lab!=0)
	{
		Scan(lab,ResourceString,GetString,source);
		if(lab == 100)
		{
            for (i = 0; i<1000; i++)
            {//插入标识符表中
                if (strcmp(IdenOperator[i], GetString) == 0)
                {//已在表中
                    break;
                }
                if (strcmp(IdenOperator[i], "") == 0)
                {//查找空间
                    strcpy(IdenOperator[i], GetString);
                    break;
                }
            }
            printf("(%3d   ,%s)\n",lab, GetString);
            fprintf(fp1, "(%3d   ,%s)\n",lab, GetString);
        }
        else if(lab >= 1 && lab <= 32)
        {//保留字
            printf("(%3d   ,  %s)\n",lab, reserve[lab - 1]);
            fprintf(fp1, "(%3d   ,%s)\n",lab, reserve[lab - 1]);
        }
        else if (lab == 99)
        {//const 常数
            printf("(%3d   ,%s)\n",lab, GetString);
            fprintf(fp1, "(%3d   ,%s)\n",lab, GetString);
        }
        else if (lab >= 33 && lab <= 68)
        {
            printf("(%3d    ,%s)\n",lab, operatorLimit[lab - 33]);
            fprintf(fp1, "(%3d   ,%s)\n", lab,operatorLimit[lab - 33]);
        }     
	}
	fclose(fp1);
	if ((fp2 = fopen("c:\\out2.txt", "w+")) == NULL)
    {//打开源程序
        cout << "can't open this file";
        exit(0);
    }
	int num = 0;
	for(int i = 0 ; i < 100 ; i++)
	{
		if(strcmp(IdenOperator[i],"") != 0)
		num++;
	}
	for (i = 0; i<num; i++)
    {//插入标识符表中
        printf("第%d个标识符:  %s\n", i + 1, IdenOperator[i]);
        fprintf(fp2, "第%d个标识符:  %s\n", i + 1, IdenOperator[i]);
    }
    fclose(fp2);
	return 0;
}




猜你喜欢

转载自blog.csdn.net/qq_40240576/article/details/83270950