词法分析器的实现 C++

运行环境是VS2015+win10下,完成如下规则的词法分析器。

//(1)该语言大小写不敏感;
//(2)字母为 a - z A - Z,数字为 0 - 9;
//(3)可以对上述文法进行扩充和改造;
//(4) ‘/*……*/’为程序的注释部分。
//[设计要求] 
//(1)给出各单词符号的类别编码;
//(2)词法分析程序应能发现输入串中的错误;
//(3)词法分析作为单独一遍编写,词法分析结果为二元式序列组成的中间文件;
//(4)设计两个测试用例(尽可能完备),并给出测试结果。

/*<标识符>→字母︱ <标识符>字母︱ <标识符>数字
<无符号整数>→数字︱ <无符号整数>数字
<单字符分界符> →+ ︱- ︱* ︱; ︱(︱)
<双字符分界符>→<大于>=︱<小于>=︱<小于>>︱<冒号>=︱<斜竖>* 
<小于>→<
<等于>→= 
<大于>→> 
<冒号> →: 
<斜竖> →/
该语言的保留字 : begin end if then else for do while and or not
*/

#include "stdafx.h"
#include <ctype.h>

//类别编码说明
/*
保留字:
begin 1
end   2
if    3
then  4
else  5
for   6
do    7
while 8
and   9
or    10
not   11
标识符12
数字  13
<=    14
<>    15
<     16
:=    17
:     18
>=    19
>     20
+     21
-     22
*     23
;     24
|     25
左注释26
右注释27
(     28
)     29
,     30
.     31
*/

//保留字
char reverse[11][10] = {"begin","end","if","then","else","for","do","while","and","or","not"};

int line = 1;//行数

void out(int a, char* s)//输出单词符号的类别编码
{
	printf("%d,<%s>\n", a, s);
}
int lookup(char* token)  //实现查找保留字
{
	int i = 0;
	int index = 0;
	int match = 0;  //是否匹配
	while (index != 11)
	{
		match = 0;
		while (reverse[index][i] != '\0')
		{
			if (reverse[index][i] == token[i])
				i++;
			else {
				match = -1; 
				break; 
			}
		}
		if (match == -1)
			index++;
		else {
			return index+1;
		}
	}
	return 0;
}
void scanner(FILE *fp)
{
	char ch;
	int i, c;
	char TOKEN[20];
	ch = fgetc(fp);
	if (isalpha(ch))
	{
		ch = tolower(ch);
		TOKEN[0] = ch;
		ch = fgetc(fp);
		ch = tolower(ch);
		i = 1;
		while (isalnum(ch))
		{
			TOKEN[i] = ch;
			i++;
			ch = fgetc(fp);
			ch = tolower(ch);
		}
		TOKEN[i] = '\0';
		fseek(fp, -1, 1);
		c = lookup(TOKEN);  //查找保留字
		if (c == 0)
			out(12, TOKEN);  //标识符
		else
			out(c, TOKEN); //保留字
	}
	else if (isdigit(ch))  //判断整数
	{
		TOKEN[0] = ch;
		ch = fgetc(fp);
		i = 1;
		while (isdigit(ch))
		{
			TOKEN[i] = ch;
			i++;
			ch = fgetc(fp);
		}
		if (isalpha(ch))  //错误的标识符格式
			printf("error in line %d,wrong format for it!\n",line);
		TOKEN[i] = '\0';
		fseek(fp, -1, 1);
		out(13, TOKEN);  //整型
	}
	else switch (ch)
	{
	case '<':
		ch = fgetc(fp);
		if (ch == '=')
			out(14, "<=");
		else 
			if (ch == '>')
				out(15, "<>");
			else 
			{
				fseek(fp, -1, 1);
				out(16, "<");
			}
		break;
	case ':':
		ch = fgetc(fp);
		if (ch == '=')
			out(17, ":=");
		else 
		{
			fseek(fp, -1, 1);
			out(18, ":");
		}
		break;
	case '>':
		ch = fgetc(fp);
		if (ch == '=')
			out(19, ">=");
		else 
		{
			fseek(fp, -1, 1);
			out(20, ">");
		}
		break;
	case '+':
		out(21, "+"); 
		break;
	case '-':
		out(22, "-"); 
		break;
	case '*':
		if (fgetc(fp) == '/')
		{
			out(27, "*/");
		}
		else
		{
			out(23, "*");
			fseek(fp,-1,1);
		}
		break;
	case ';':
		out(24, ";"); 
		break;
	case '|':
		out(25, "|"); 
		break;
	case '/':
		ch = fgetc(fp);
		if (ch == '*')
		{
			out(26, "/*");
			printf("注释内容:");
			char x=fgetc(fp), 
				y=fgetc(fp);
			while (x != '*'|| y!='/') 
			{
				printf("%c", x);
				x = y;
				y = fgetc(fp);
				if (x == '*'&&y == '/') 
				{ 
					printf("\n"); 
					out(27, "*/"); 
				}
			}
		}
		else
			fseek(fp,-1,1); 
		break;
	case '(':
		out(28, "(");
		break;
	case ')':
		out(29, ")");
		break;
	case ',':
		out(30, ",");
		break;
	case '.':
		out(31, ".");
		break;
	default:
		if (ch == ' ' || ch == '\n' || ch == '\t')
			if (ch == '\n')
				line++;
			else;
		else
			printf("error in line %d,can't recognize this character! %c\n", line, ch);
		break;
	}
}


int main()
{
	FILE *fp;
	fp = fopen("code.txt", "r");
	char c;
	printf("\n源程序为:\n");
	while ((c = fgetc(fp)) != EOF)
	{
		printf("%c", c);
	}
	printf("\n\n");
	fp = fopen("code.txt", "r");
	
	while (fgetc(fp) != EOF)  //是否到代码末尾
	{
		fseek(fp,-1,1);
		scanner(fp);  //主要函数
	}
	
	return 0;
}

猜你喜欢

转载自blog.csdn.net/qq_35014850/article/details/80090791