C语言声明分析器

一、简单的C语言声明优先级规则：

A.声明从它的名字开始读取，然后按照优先级顺序依次读取。
B.优先级从高到低依次为：
b1.声明中被括号括起来的那部分
b2.后缀操作符：
括号（）表示这是一个函数，而方括号[]表示这是一个数组。
b3..前缀操作符：*表示“指向…的指针”。
C.如果const和volatile关键字的后面紧跟类型说明符（如int），那么它作用于类型说明符。其他情况下，const和volatile关键字作用于它左边紧邻的指针星号。

基于以上规则，我们可以设计一个简要的C语言声明解析器，通常被称作“cdecl”，其代码大致如下：

#define  _CRT_SECURE_NO_WARNINGS

#include<stdio.h>
#include<string.h>
#include<ctype.h>
#include<stdlib.h>
#define MAXTOKENS      100
#define MAXTOKENLEN     64

enum  type_tag{ IDENTIFIER, QUALIFIER, TYPE };

struct token
{
    char type;
    char string[MAXTOKENLEN];
};

int top = -1;
struct token  stack[MAXTOKENS];
struct token mythis;

#define pop stack[top--]
#define push(s) stack[++top] = s
#define STRCMP(a, R, b)  (strcmp(a, b) R 0)

int classify_string(void)
    /*推断标识符的类型*/
{
    char *s = mythis.string;
    if (STRCMP(s, ==, "const"))
    {
        strcpy(s, "常量");
        return QUALIFIER;
    }
    if (STRCMP(s, == , "volatile"))      return QUALIFIER;
    if (STRCMP(s, == , "void"))      return TYPE;
    if (STRCMP(s, == , "char"))      return TYPE;
    if (STRCMP(s, == , "signed"))      return TYPE;
    if (STRCMP(s, == , "unsigned"))      return TYPE;
    if (STRCMP(s, == , "short"))      return TYPE; 
    if (STRCMP(s, == , "int"))      return TYPE;
    if (STRCMP(s, == , "long"))      return TYPE;
    if (STRCMP(s, == , "float"))      return TYPE;
    if (STRCMP(s, == , "double"))      return TYPE;
    if (STRCMP(s, == , "struct"))      return TYPE;
    if (STRCMP(s, == , "union"))      return TYPE;
    if (STRCMP(s, == , "enum"))      return TYPE;
    return  IDENTIFIER;
}

void gettoken(void)
/*读取下一个标记到"mythis"*/
{
    char *p = mythis.string;
    /*略过空白字符*/
    while ( (*p = getchar()) == ' ' );

    if (isalnum(*p))
        /*读入的标识符以A-Z，0-9开头*/
    {
        while (isalnum(*++p = getchar()));
        ungetc(*p, stdin);
        *p = '\0';
        mythis.type = classify_string();
        return;
    }

    if (*p == '*')
    {
        strcpy(mythis.string, "指针，指向");
        mythis.type = '*';
        return;
    }
    mythis.string[1] = '\0';
    mythis.type = *p;
    return;
}

void read_to_first_identifer()
/*理解所有分析过程的代码段*/
{
    gettoken();
    while (mythis.type != IDENTIFIER)
    {
        push(mythis);
        gettoken();
    }
    printf("%s 是一个 ", mythis.string);
    gettoken();
}

void deal_with_arrays()
{
    while (mythis.type == '[')
    {
        printf("数组 ");
        gettoken();   /*数字或']'*/
        if (isdigit(mythis.string[0]))
        {
            printf("[0..%d], ", atoi(mythis.string)-1 );
            gettoken();
        }
        gettoken();
        printf("数组内各元素类型为");
    }
}

void deal_with_function_args()
{
    while (mythis.type != ')')
    {
        gettoken();
    }
    gettoken();
    printf("一个函数，该函数返回一个");
}

void deal_with_pointers()
{
    while (stack[top].type == '*')
    {
        printf("%s ", pop.string);
    }
}

void  deal_with_declarator()
/*处理标识符之后可能存在的数组或函数*/
{
    switch (mythis.type)
    {
    case '[': deal_with_arrays(); break;
    case '(': deal_with_function_args(); 
    }

    deal_with_pointers();

    while (top >= 0)
    {
        if (stack[top].type == '(')
        {
            pop;
            gettoken();
            deal_with_declarator();
        }
        else
        {

            printf("%s型元素", pop.string);
        }
    }
}

int main()
/*将标记压入堆栈中，直到遇见标识符*/
{
    read_to_first_identifer();
    deal_with_declarator();
    printf("\n");
    return 0;
}

举例运行结果如下：
这里写图片描述

由于改代码是修改而来，汉语转换方面有时候会出现小问题，还需要进一步改正，但大体思路没有问题，可以供大家一起思考和学习。

猜你喜欢