PL\0编译原理实验(南航)五：实验代码、PL\0代码、中间代码的详细说明

其他 2021-03-03 20:00:33 阅读次数: 0

编译原理实验的最后一部分，献上实验代码、PL\0代码、中间代码的详细说明

断断续续每天晚上干几十分钟至几个小时，中间还因为加班咕了几天，至此终于算是完成了！

PL\0代码

program id;
const m:=7, n:=85;
var x,y,z,q,r;
procedure multiply;
    var a,b;
    begin
        a:=x; b:=y; z:=0;
        while b>0 do
            begin
                if odd b then z:=z+a;
                    a:=2*a; b:=b/2
            end
    end
begin
    x:=m; y:=n;
    call multiply
end

编译原理实验代码

import sys


# 错误提示函数,提示信息为程序第几行发生了什么错误
def error(line_num, message):
    print('程序第' + str(line_num) + '行发生了错误：' + message)


'''词法分析部分所需的关键字表 算术符和分隔符表以及生成的token'''
key_word = ['program', 'const', 'var', 'procedure', 'begin', 'end', 'if', 'then', 'while', 'do', 'call', 'read',
            'write']  # 程序的关键字
symbol = ['+', '-', '*', '/', '(', ')', '=', ',', ';']  # 算术符和分隔符
token_list = []
token_index = 0


# 按照空格分割成一个个单词,单词中会包括运算符、语法符号等,需要进一步划分
def deal_word(word, line_num):
    length = len(word)
    index = 0
    while index < length:
        token = dict()
        value = ''  # token的值
        attribute = ''  # token的属性
        if word[index].isalpha():  # 字母开头 标识符或关键字
            while index < length and (word[index].isalpha() or word[index].isdigit()):
                value += word[index]
                index += 1
            if value in key_word:  # 判断是否是关键字
                attribute = 'keyword'
            else:
                attribute = 'identifier'
        elif word[index].isdigit():  # 数字开头 数字
            while index < length and word[index].isdigit():
                value += word[index]
                index += 1
            value = int(value)
            attribute = 'number'
        elif word[index] == ':':  # :开头 赋值符号:=
            value += word[index]
            index += 1
            if index < length and word[index] == '=':
                value += word[index]
                index += 1
            attribute = value
        elif word[index] == '<':  # <开头 可能是 < <= <>
            value += word[index]
            index += 1
            if index < length and (word[index] == '=' or word[index] == '>'):
                value += word[index]
                index += 1
            attribute = value
        elif word[index] == '>':  # >开头 > >=
            value += word[index]
            index += 1
            if index < length and word[index] == '=':
                value += word[index]
                index += 1
            attribute = value
        elif word[index] in symbol:  # 如果是语法符号
            value = word[index]
            attribute = word[index]
            index += 1
        else:
            print('行数' + str(line_num) + ':非法字符' + word[index])
            sys.exit(0)
        # 填充到token表
        token['value'] = value
        token['line_num'] = line_num
        token['attribute'] = attribute
        token_list.append(token)


# 每次调用返回一个token
def get_token():
    global token_index
    if token_index >= len(token_list):
        sys.exit(0)
    token = token_list[token_index]
    token_index += 1
    return token


'''语法分析生成的符号表'''
table_list = []  # 符号表
mid_code = []  # 中间代码集合
level = 0  # 记录层数，每次遇到函数定义就加1
address = 0  # 每次符号表中登记变量和过程需要设置,作用是结合level知道每个变量和过程在数据栈中的位置或者函数跳转的位置
dx = 3  # 用来记录每一层次的开辟空间个数 默认是3 SL DL RA


# 记录符号表,
def record_table(name, kind, value=None, level=None, address=None, size=None):
    table = dict()
    table['name'] = name
    table['kind'] = kind
    table['value'] = value
    table['level'] = level
    table['address'] = address
    table['size'] = size
    table_list.append(table)


# 生成中间代码
def emit(f, l, a):
    operation = dict()
    operation['F'] = f
    operation['L'] = l
    operation['A'] = a
    mid_code.append(operation)


# 查询符号表 返回在符号表中的下标
def find_table(val):
    index = 0
    for var in table_list:
        if var['name'] == val:
            return index
        index += 1
    return -1


# <prog> → program <id>；<block>
def prog():
    token = get_token()
    if token['value'] == 'program':  # 第一个单词是program
        token = get_token()
        if token['attribute'] == 'identifier':  # 第二个是程序名称
            token = get_token()
            if token['value'] == ';':  # 第三个是分号 然后进入block阶段
                block()
            else:
                error(token['line_num'], '前面缺少;')
        else:
            error(token['line_num'], '语法错误,缺少程序名称')
    else:
        error(token['line_num'], '缺少program关键字')


# <block> → [<condecl>][<vardecl>][<proc>]<body>
# 考虑到因为;缺失导致获取的下一个单词不方便放回去，所以condecl vardecl proc在block层进行解析
def block():
    global dx
    global level
    global mid_code
    global token_index
    dx = 3  # 主函数以及定义的函数会调用block 默认入栈三个参数 SL DL RA
    # 进入block先写一条跳转语句 地址后面回填
    # 原因是假如block里面先进行函数定义，那么后续的指令不会执行，需要跳转到body部分，而body部分指令
    # 的地址需要计算完函数的指令才会计算出来，所以先在这里插入一条，同时用cx1标记一下jmp指令的位置后面回填
    cx1 = len(mid_code)  # cx1表示jmp指令需要回填在指令集中的位置
    emit('JMP', 0, 0)
    token = get_token()
    if token['value'] == 'const':  # 常量表达式 <condecl> → const <const>{,<const>};
        const()
        token = get_token()
        while token['value'] == ',':
            const()
            token = get_token()
        if token['value'] == ';':
            token = get_token()
        else:
            error(token['line_num'], '缺少;')
    if token['value'] == 'var':  # 变量表达式 <vardecl> → var <id>{,<id>};
        token = get_token()
        if token['attribute'] == 'identifier':  # 对于变量要插入到符号表中 同时dx要加一表示栈空间增长
            record_table(token['value'], 'VARIABLE', level=level, address=dx)
            dx += 1
            token = get_token()
        else:
            error(token['line_num'], 'var后面需要跟标识符')
        while token['value'] == ',':
            token = get_token()
            if token['attribute'] == 'identifier':
                record_table(token['value'], 'VARIABLE', level=level, address=dx)
                dx += 1
                token = get_token()
                continue
        if token['value'] == ';':
            token = get_token()
        else:
            error(token['line_num'], '缺少;')
    # 这里用while循环表示函数定义可以嵌套
    while token['value'] == 'procedure':  # 函数定义 <proc> → procedure <id>（[<id>{,<id>}]）;<block>{;<proc>}
        token = get_token()
        if token['attribute'] == 'identifier':
            record_table(token['value'], 'PROCEDURE', level=level, address=len(mid_code))
            token = get_token()
        else:
            error(token['line_num'], '函数名必须是标识符')
        if token['value'] != ';':  # 这里先不考虑函数带参数的情况 无非就是多写几个变量
            error(token['line_num'], '缺少;')
            token_index -= 1
        # 下面进入block定义 进入block之前需要更新嵌套层数level 同时记录当前栈的情况便于恢复
        level += 1  # 层级+1
        cur_dx = dx  # 记录当前层的变量个数
        block()
        level -= 1  # 结束后要恢复
        dx = cur_dx  # 恢复当前栈的变量数量
        token = get_token()
        if token['value'] == ';':  # 如果是分号 继续进行proc
            token = get_token()
        else:
            break
    token_index -= 1  # 由于不再函数嵌套 需要回退一个单词
    # 进入body之前先回填block开头的jmp指令
    ins = dict()
    ins['F'] = 'JMP'
    ins['L'] = 0
    ins['A'] = len(mid_code)  # 跳转的地址就是body里指令的开头
    mid_code[cx1] = ins
    emit('INT', 0, dx)  # 进入当前函数的body部分需要给定义的变量和SL DL RA开辟栈空间
    body()  # 进入body
    emit('OPR', 0, 0)  # 过程调用结束后,返回调用点并退栈


# <const> → <id>:=<integer>
def const():
    token = get_token()
    variable = token['value']
    if token['attribute'] == 'identifier':  # 这里有变量 需要记录在符号表中
        token = get_token()
        if token['value'] == ':=':
            token = get_token()
            if token['attribute'] == 'number':
                record_table(variable, 'CONSTANT', value=token['value'], level=level)
            else:
                error(token['line_num'], ':=后面需要跟整数')
        else:
            error(token['line_num'], '缺少:=')
    else:
        error(token['line_num'], '缺少标识符')


# <body> → begin <statement>{;<statement>}end
def body():
    global token_index
    token = get_token()
    if token['value'] != 'begin':
        error(token['line_num'], '缺少begin')
        token_index -= 1
    statement()
    token = get_token()
    while token['value'] == ';':  # 循环statement
        statement()
        token = get_token()
    if token['value'] != 'end':
        error(token['line_num'], '缺少end')
        token_index -= 1


# <statement> → <id> := <exp>
#                |if <lexp> then <statement>[else <statement>]
#                |while <lexp> do <statement>
#                |call <id>（[<exp>{,<exp>}]）
#                |<body>
#                |read (<id>{，<id>})
#                |write (<exp>{,<exp>})
def statement():
    global token_index
    global level
    token = get_token()
    if token['value'] == 'end':  # 这一步是因为如果最后有人多写了一个; 会继续进入statement,但实际上会退出
        error(token['line_num'], ';是多余的')
        token_index -= 1
        return
    if token['attribute'] == 'identifier':  # <id> := <exp>
        index = find_table(token['value'])
        if index == -1:
            error(token['line_num'], token['value'] + '未定义')
        elif table_list[index]['kind'] != 'VARIABLE':
            error(token['line_num'], table_list[index]['name'] + '不是一个变量')
        token = get_token()
        if token['value'] != ':=':
            error(token['line_num'], '缺少:=')
            token_index -= 1  # 需要回退一个
        expression()
        if index != -1:  # 合法变量产生一个sto指令 从数据栈中取数据赋值给变量 关于为什么使用层差和地址会在解析指令地方解释
            emit('STO', level - table_list[index]['level'], table_list[index]['address'])
    elif token['value'] == 'if':  # if <lexp> then <statement>[else <statement>]
        lexp()
        token = get_token()
        if token['value'] != 'then':
            error(token['line_num'], '缺少关键字then')
            token_index -= 1
        cx2 = len(mid_code)  # cx2表示jpc指令需要回填在指令集中的位置
        emit('JPC', 0, 0)  # if语句先做jpc跳转到else的地方 后面回填
        statement()
        # 这里回填if语句不满足执行else的地址
        ins = dict()
        ins['F'] = 'JPC'
        ins['L'] = 0
        ins['A'] = len(mid_code)  # 跳转的地址就是if语句不满足的地方
        mid_code[cx2] = ins
        token = get_token()
        if token['value'] == 'else':  # 判断是否还有
            cx1 = len(mid_code)
            emit('JMP', 0, 0)
            statement()
            # 这里回填if语句结束的地址
            ins = dict()
            ins['F'] = 'JMP'
            ins['L'] = 0
            ins['A'] = len(mid_code)  # 跳转的地址就是if语句不满足的地方
            mid_code[cx1] = ins
        else:
            token_index -= 1  # 没有则回退
        # 这里回填if语句结束的地址
    elif token['value'] == 'while':  # while <lexp> do <statement>
        jmp_addr = len(mid_code)  # 这里保存while循环开始的语句 因为循环如果条件满足需要继续执行
        lexp()
        token = get_token()
        # 地址指令回头填写
        if token['value'] != 'do':
            error(token['line_num'], '缺少do关键字')
            token_index -= 1
        cx2 = len(mid_code)  # cx2表示jpc指令需要回填在指令集中的位置
        emit('JPC', 0, 0)  # if语句先做jpc跳转到else的地方 后面回填
        statement()
        # 插入一条jmp指令继续执行循环
        emit('JMP', 0, jmp_addr)
        # 回填jpc指令
        ins = dict()
        ins['F'] = 'JPC'
        ins['L'] = 0
        ins['A'] = len(mid_code)  # 跳转的地址就是while语句结束的地方
        mid_code[cx2] = ins
    elif token['value'] == 'call':  # call <id>（[<exp>{,<exp>}]）
        token = get_token()
        if token['attribute'] != 'identifier':
            error(token['line_num'], '函数名必须是标识符')
        else:
            index = find_table(token['value'])
            if index == -1:
                error(token['line_num'], token['value'] + '未定义')
            elif table_list[index]['kind'] == 'PROCEDURE':
                emit('CAL', level - table_list[index]['level'], table_list[index]['address'])
            else:
                error(token['line_num'], token['value'] + '不是函数名')
    else:  # body
        token_index -= 1
        body()


#  <exp> → [+|-]<term>{<aop><term>}
def expression():
    global token_index
    token = get_token()
    if token['value'] == '+' or token['value'] == '-':
        term()
        if token['value'] == '-':  # -号需要取反操作
            emit('OPR', 0, 1)
    else:
        token_index -= 1  # 回退
        term()
    token = get_token()
    while token['value'] == '+' or token['value'] == '-':
        term()
        if token['value'] == '+':
            emit('OPR', 0, 2)
        elif token['value'] == '-':
            emit('OPR', 0, 3)
        token = get_token()
    token_index -= 1


#  <term> → <factor>{<mop><factor>}
def term():
    global token_index
    factor()
    token = get_token()  # 处理乘除
    while token['value'] == '*' or token['value'] == '/':
        factor()
        if token['value'] == '*':
            emit('OPR', 0, 4)
        elif token['value'] == '/':
            emit('OPR', 0, 5)
        token = get_token()
    token_index -= 1  # 需要回退一个单词


#  <id>|<integer>|(<exp>)
def factor():
    global token_index
    token = get_token()
    if token['attribute'] == 'identifier':  # 标识符要查符号表
        index = find_table(token['value'])
        if index == -1:  # 未找到 报错
            error(token['line_num'], token['value'] + '未定义')
        else:
            if table_list[index]['kind'] == 'CONSTANT':  # 常量
                emit('LIT', 0, table_list[index]['value'])  # 把常量放入栈顶
            elif table_list[index]['kind'] == 'VARIABLE':
                emit('LOD', level - table_list[index]['level'], table_list[index]['address'])  # 把
            elif table_list[index]['kind'] == 'PROCEDURE':
                error(token['line_num'], table_list[index]['name'] + '为过程名, 出错')
    elif token['attribute'] == 'number':  # 遇到数字
        emit('LIT', 0, token['value'])
    elif token['attribute'] == '(':  # 遇到左括号要进入表达式
        expression()
        token = get_token()
        if token['attribute'] != ')':  # 没有右括号报错
            error(token['line_num'], '缺少右括号')
            token_index -= 1  # 要回退一个


# <lexp> → <exp> <lop> <exp>|odd <exp>
def lexp():
    global token_index
    token = get_token()
    if token['value'] == 'odd':
        expression()
        emit('OPR', 0, 6)  # 奇偶判断
    else:
        token_index -= 1  # 要先回退才能进入表达式
        expression()
        token = get_token()
        if token['value'] != '=' and token['value'] != '<>' and token['value'] != '<' and token['value'] != '<=' \
                and token['value'] != '>' and token['value'] != '>=':
            error(token['line_num'], '缺少比较运算符')
            token_index -= 1
        expression()
        if token['value'] == '=':
            emit('OPR', 0, 8)
        elif token['value'] == '<>':
            emit('OPR', 0, 9)
        elif token['value'] == '<':
            emit('OPR', 0, 10)
        elif token['value'] == '>=':
            emit('OPR', 0, 11)
        elif token['value'] == '>':
            emit('OPR', 0, 12)
        elif token['value'] == '<=':
            emit('OPR', 0, 13)


# 这里开始进行中间代码解释执行
stack = [0 for i in range(0, 8000)]  # 数据栈 前三个0是主函数的SL DL RA


#  根据当前B的值和level层差获取SL的值
def get_sl(B, level):
    global stack
    res_B = B
    while level > 0:
        res_B = stack[res_B]
        level -= 1
    return res_B


# 解释器
def interpreter():
    # 先定义好需要用到的数据
    global stack
    B = 0  # 基址寄存器
    T = 0  # 栈顶寄存器
    I = None  # 存放要执行的代码
    P = 0  # 存放下一条要执行的代码在mid_code数组的下标
    # 开始执行
    I = mid_code[P]
    P += 1
    while P != 0:  # P为0表示主函数结束 指令回到起点 那么就算执行结束
        if I['F'] == 'JMP':  # 直接跳转到对应指令
            P = I['A']
        elif I['F'] == 'JPC':
            if stack[T] == 0:  # 栈顶值为0才跳转
                P = I['A']
            T -= 1  # 无论是否跳转都要去除栈顶的值
        elif I['F'] == 'INT':
            T += I['A'] - 1  # 开辟空间
        elif I['F'] == 'LOD':
            T += 1
            stack[T] = stack[get_sl(B, I['L']) + I['A']]
        elif I['F'] == 'STO':
            stack[get_sl(B, I['L']) + I['A']] = stack[T]
            T -= 1
        elif I['F'] == 'LIT':
            T += 1
            stack[T] = I['A']
        elif I['F'] == 'CAL':  # 函数调用
            T += 1
            stack[T] = get_sl(B, I['L'])
            stack[T + 1] = B
            stack[T + 2] = P
            B = T
            P = I['A']
        elif I['F'] == 'OPR':
            if I['A'] == 0:  # 函数返回
                T = B - 1
                P = stack[T + 3]
                B = stack[T + 2]
            elif I['A'] == 1:  # 取反操作
                stack[T] = -stack[T]
            elif I['A'] == 2:  # 加法
                T -= 1
                stack[T] = stack[T] + stack[T + 1]
            elif I['A'] == 3:  # 减法
                T -= 1
                stack[T] = stack[T] - stack[T + 1]
            elif I['A'] == 4:  # 乘法
                T -= 1
                stack[T] = stack[T] * stack[T + 1]
            elif I['A'] == 5:  # 除法
                T -= 1
                stack[T] = int(stack[T] / stack[T + 1])
            elif I['A'] == 6:  # odd 奇偶
                stack[T] = stack[T] % 2
            elif I['A'] == 8:  # ==
                T -= 1
                stack[T] = stack[T] == stack[T + 1]
            elif I['A'] == 9:  # !=
                T -= 1
                stack[T] = stack[T] != stack[T + 1]
            elif I['A'] == 10:  # <
                T -= 1
                stack[T] = stack[T] < stack[T + 1]
            elif I['A'] == 11:  # >=
                T -= 1
                stack[T] = stack[T] >= stack[T + 1]
            elif I['A'] == 12:  # >
                T -= 1
                stack[T] = stack[T] > stack[T + 1]
            elif I['A'] == 13:  # <=
                T -= 1
                stack[T] = stack[T] <= stack[T + 1]
        I = mid_code[P]  # 获取下一条指令
        if P == 0:
            break
        P += 1  # 默认P+1获取下一条指令 除非跳转


if __name__ == '__main__':
    # 第一步词法分析,读取代码文件分解成单词,每个单词会记录内容、属性、行号
    with open('code.txt', 'r') as file:
        line_num = 1
        for line in file.readlines():
            for word in line.strip().split():
                deal_word(word, line_num)
            line_num += 1
    # 第二部进行语法分析,同时生成中间代码
    prog()  # 从prog作为入口去分析
    # 第三部分把生成的中间代码解释执行
    interpreter()

中间代码说明

上文的PL\0代码生成的中间代码

{'F': 'JMP', 'L': 0, 'A': 30}   # 进入主函数
{'F': 'JMP', 'L': 0, 'A': 2}    # 进入multiply函数
{'F': 'INT', 'L': 0, 'A': 5}    # 开辟5个空间
{'F': 'LOD', 'L': 1, 'A': 3}    # 将x值放入栈顶
{'F': 'STO', 'L': 0, 'A': 3}    # 将栈顶值放入变量a
{'F': 'LOD', 'L': 1, 'A': 4}    # 将y值放入栈顶
{'F': 'STO', 'L': 0, 'A': 4}    # 将栈顶值放入变量b
{'F': 'LIT', 'L': 0, 'A': 0}    # 将常数0放入栈顶
{'F': 'STO', 'L': 1, 'A': 5}    # 将栈顶值放入变量z
{'F': 'LOD', 'L': 0, 'A': 4}    # while循环开始 将变量b的值放入栈顶
{'F': 'LIT', 'L': 0, 'A': 0}    # 将常数0放入栈顶
{'F': 'OPR', 'L': 0, 'A': 12}   # 进行b > 0比较
{'F': 'JPC', 'L': 0, 'A': 29}   # 不满足条件结束循环
{'F': 'LOD', 'L': 0, 'A': 4}    # 将b值放入栈顶
{'F': 'OPR', 'L': 0, 'A': 6}    # 判断栈顶值是否是偶数
{'F': 'JPC', 'L': 0, 'A': 20}   # if语句不满足条件跳转
{'F': 'LOD', 'L': 1, 'A': 5}    # 取z的值放入栈顶
{'F': 'LOD', 'L': 0, 'A': 3}    # 取a的值放入栈顶
{'F': 'OPR', 'L': 0, 'A': 2}    # 次栈顶与栈顶相加
{'F': 'STO', 'L': 1, 'A': 5}    # 将栈顶值放入变量z
{'F': 'LIT', 'L': 0, 'A': 2}    # 取常数2放入栈顶
{'F': 'LOD', 'L': 0, 'A': 3}    # 取变量a放入栈顶
{'F': 'OPR', 'L': 0, 'A': 4}    # 次栈顶与栈顶相乘
{'F': 'STO', 'L': 0, 'A': 3}    # 栈顶数赋值给变量a
{'F': 'LOD', 'L': 0, 'A': 4}    # 取变量b放入栈顶
{'F': 'LIT', 'L': 0, 'A': 2}    # 取常数2放入栈顶
{'F': 'OPR', 'L': 0, 'A': 5}    # 次栈顶与栈顶相除
{'F': 'STO', 'L': 0, 'A': 4}    # 栈顶数赋值给变量b
{'F': 'JMP', 'L': 0, 'A': 9}    # 回到while循环
{'F': 'OPR', 'L': 0, 'A': 0}    # 结束multiply函数    
{'F': 'INT', 'L': 0, 'A': 8}    # 主函数开始的地方
{'F': 'LIT', 'L': 0, 'A': 7}    # 常数7放入栈顶
{'F': 'STO', 'L': 0, 'A': 3}    # 栈顶数赋给变量x
{'F': 'LIT', 'L': 0, 'A': 85}   # 常数85放入栈顶
{'F': 'STO', 'L': 0, 'A': 4}    # 栈顶数赋值给变量y
{'F': 'CAL', 'L': 0, 'A': 1}    # 跳转执行函数
{'F': 'OPR', 'L': 0, 'A': 0}    # 结束主函数