Python Preliminary Experiment 2 LL(1) Grammar Construction

The content of this article:

Based on the principle of reading in and outputting grammar in the previous article Python Preliminary Experiment 1

Duplicate expression removal, extraction of left factors and elimination of left recursion have also been added.

1. Converting a convertible non-LL(1) grammar into an LL(1) grammar will go through two stages, 1) Eliminate the left
recursion of the grammar, 2) Extract the left factor and eliminate backtracking.
2. Algorithm for extracting the left factor of grammar:
        1) Sort all non-terminal symbols of grammar G
        2) Execute each non-terminal symbol Pi sequentially in the above order:
                for( j=1;j< i-1; j++)
                        will Substitute Pj into the production of Pi (if it can be substituted); eliminate the direct left recursion about Pi:
                        Pi -> Pi a | β , where β does not start with Pi, then modify the production as:
                                                        Pi—>βPi'
                                                        Pi′— >a Pi'| e
        3) Simplify the grammar obtained above.
3. Algorithm for extracting left factors:         A ----
        >   δ β1 |   δ β2 | … |   δ βn | y1 | y2 |…| rewritten as

        A      ---->  δA'   |  y1  l  y 2  | … |  ym

        A ′ ----> β1 | β2 |...| βn
4. Use the above algorithm to construct an LL(1) grammar:
1) Read the grammar from the text file g.txt, use the result of Experiment 1, and store it in The data structure designed for Experiment 1
;

2) Design functions remove_left_recursion() and remove_left_gene() to realize
the algorithm of eliminating left recursion and extracting left factor, respectively operate on grammar, eliminate left recursion in grammar and propose left factor;

 code show as below:

def Delete_Duplicate_Production(grammar):  # 删除重复的产生式
    global non_terminator, production
    for i in grammar[non_terminator]:  # 遍历非终结符
        j = 0
        lengi = len(grammar[production][i])  # 该非终结符产生式个数
        while j < lengi - 1:  # 每个产生式
            k = j + 1
            while k < lengi:  # 遍历每个产生式后面的产生式
                if grammar[production][i][j] == grammar[production][i][k]:  # 碰到重复的了
                    grammar[production][i].pop(k)  # 删除
                    lengi -= 1
                else:
                    k += 1
            j += 1


def Get_New_Non_Terminator(grammar, new_non_terminator):  # 产生新的非终结符
    global non_terminator, production
    if new_non_terminator + '\'' in grammar[non_terminator]:  # 加个'看看有没有用过
        # 数字一般非终结符用不到(用到也无所谓,都把产生式部分分开了)
        j = 0
        while True:  # 看看后面加个数字的有没有
            if not new_non_terminator + str(j) in grammar[non_terminator]:  # 找不到才出循环
                new_non_terminator = new_non_terminator + str(j)
                break
            j += 1
    else:  # 没有加个'就行
        new_non_terminator = new_non_terminator + '\''
    # 到这里必有一个没用过的非终结符出来了(数字都不够你用?)找不到不出循环的
    return new_non_terminator


def Remove_Left_Gene(grammar):  # 提取左因子
    global non_terminator, production
    # 怕无限递归之类的,就不做隐式左因子了
    for i in grammar[non_terminator]:  # 遍历非终结符
        lenmax = len(grammar[production][i])  # 该非终结符产生式个数
        j = -1
        while j < lenmax - 1:  # 遍历该终结符的所有产生式(除最后一个)
            j += 1
            t = [1]  # 自己和自己肯定一样
            if not grammar[production][i][j]:  # 产生式为空跳过
                continue
            for k in range(j + 1, lenmax):  # 看看该产生式后面的产生式
                # 第一个元素一致就给个1不然给个0
                if not grammar[production][i][k]:  # 为空跳过
                    t.append(0)
                    continue
                if grammar[production][i][j][0] == grammar[production][i][k][0]:
                    t.append(1)
                else:
                    t.append(0)
            if sum(t) > 1:  # 有重复的
                nowlen = 1  # 当前重复位数,也是要比较的下标
                same = 1
                while same:  # 一直往后看,直到不同
                    if nowlen >= len(grammar[production][i][j]):  # 越界就停
                        same = 0
                        break
                    for k in range(1, len(t)):  # t第一位就不用看了
                        # 这一位之前相同但现在不同
                        if t[k] == 1 and not grammar[production][i][j][nowlen] == grammar[production][i][j + k][nowlen]:
                            nowlen -= 1  # 退一位结束(后面会+1.所以-1)
                            same = 0
                            break
                    nowlen += 1  # 每轮加一位
                new_non_terminator = Get_New_Non_Terminator(grammar, i)  # 给一个新的非终结符
                grammar[non_terminator].append(new_non_terminator)
                grammar[production][new_non_terminator] = [grammar[production][i][j][nowlen:]]  # 新的产生式
                grammar[production][i][j] = grammar[production][i][j][:nowlen] + [new_non_terminator]  # 改变第一个左因子产生式
                for k in range(len(t) - 1, 0, -1):  # 从后往前遍历
                    if t[k] == 1:
                        grammar[production][new_non_terminator].append((grammar[production][i].pop(j + k))[nowlen:])
                        lenmax -= 1
    pass


def Remove_Left_Recursion(grammar):  # 消除文法左递归
    # ε
    global non_terminator, production
    # 将间接左递归变为直接左递归
    for i in grammar[non_terminator]:  # 遍历非终结符
        j = 0  # 遍历产生式用
        for ii in grammar[non_terminator]:  # 遍历之前的非终结符
            if ii == i:  # 到一起就停
                break
            lengi = len(grammar[production][i])  # 替换前产生式个数
            while j < lengi:  # 根据非终结符遍历该终结符的产生式
                if (not len(grammar[production][i][j]) == 0) and grammar[production][i][j][0] is ii:  # 是之前的非终结符
                    g_p = grammar[production][i].pop(j)  # 弹出该产生式
                    lengi -= 1  # 由于弹出产生式了,要退一位
                    for jj in grammar[production][ii]:  # 遍历替代产生式
                        grammar[production][i].append(jj + g_p[1:])  # 新的产生式追加在后面
                else:
                    j += 1
    # 消除直接左递归
    end_production = grammar[non_terminator][-1]  # 最后一个非终结符,提前终止用,新增的非终结符不会有左递归
    for i in grammar[non_terminator]:  # 遍历非终结符
        t = []  # 记录有无左递归
        for j in grammar[production][i]:  # 根据非终结符遍历该终结符的产生式
            if len(j) == 0:
                t.append(0)
            else:
                if j[0] == i:
                    t.append(1)
                else:
                    t.append(0)
        lent = len(t)
        if sum(t):  # 有左递归
            if sum(t) == lent:  # 全是有左递归的
                print('消除直接左递归错误')  # 报错并弹出错误部分
                print(i + '->', end='')  # 前部
                for j in range(len(grammar[production][i])):  # 根据非终结符遍历该终结符的产生式
                    if j > 0:
                        print('|', end='')
                    for k in grammar[production][i][j]:
                        print(k, end='')
                print(';')
                print(i, '的所有产生式都有左递归')
                return
            new_non_terminator = Get_New_Non_Terminator(grammar, i)  # 给一个新的非终结符
            grammar[non_terminator].append(new_non_terminator)
            grammar[production][new_non_terminator] = []  # 新的产生式
            j = 0
            while j < lent:  # 遍历所有产生式
                if t[j] == 1:  # 该部分有左递归
                    g_p = grammar[production][i].pop(j)  # 弹出该产生式
                    t.pop(j)
                    lent -= 1  # 由于弹出产生式了,要退一位
                    if not g_p[1:] == []:  # 怕s->s|;这样的产生式
                        grammar[production][new_non_terminator].append(g_p[1:] + [new_non_terminator])
                else:
                    grammar[production][i][j].append(new_non_terminator)
                    j += 1
            grammar[production][new_non_terminator].append([])  # 最后加的空
        if end_production == i:  # 遇到最后就停
            break
    Delete_Duplicate_Production(grammar)


def GetGrammar(grammar):  # 输出文法
    global non_terminator, production
    for i in grammar[non_terminator]:  # 遍历非终结符
        print(i + '->', end='')  # 前部
        for j in range(len(grammar[production][i])):  # 根据非终结符遍历该终结符的产生式
            if j > 0:
                print('|', end='')
            for k in grammar[production][i][j]:
                print(k, end='')
        print(';')


def LongStr(sting, list_data):  # 从列表中寻找匹配最长的项
    maxlen = 0  # 最长的匹配字符串长度
    maxstr = -1  # 匹配的最长字符串位置
    for i in range(len(list_data)):  # 遍历
        if sting.startswith(list_data[i]):  # 判断字符串是否以list[i]开头
            leni = len(list_data[i])
            if leni > maxlen:  # 如果新匹配字符串比原来长,替换
                maxlen = leni
                maxstr = i
    return maxlen, maxstr


def PrimaryTreatment(grammar):  # 初步处理,使得产生式内部分开
    global non_terminator, production
    for i in grammar[non_terminator]:  # 遍历非终结符
        for j in range(len(grammar[production][i])):  # 根据非终结符遍历该终结符的产生式
            k = 0  # 所处位置指针
            str_production = grammar[production][i][j][0]  # 产生式字符串
            new_production = []  # 准备存初步处理后的产生式
            while k < len(str_production):
                maxlen, maxstr = LongStr(str_production, grammar[non_terminator])  # 寻找匹配最长的终结符
                if maxlen == 0:  # 没找到
                    new_production.append(str_production[k])  # 分出一个终结符
                    k += 1
                else:  # 找到了
                    new_production.append(str_production[k:k + maxlen])  # 分出一个非终结符
                    k += maxlen
            grammar[production][i][j] = new_production  # 产生式替换


def OpenGrammar(file):  # 从文件中打开(读取)文法。并作初级处理(产生式中各个非终结符和终结符分开)
    global non_terminator, production
    file = open(file)  # 读取文件
    non_terminator = '非终结符'
    production = '产生式'
    grammar = {non_terminator: [], production: {}}
    while i := file.readline():  # 一行一行的读取,并赋值给i
        for j in range(len(i)):  # 遍历i中每一个字符
            if i[j] == '-' and i[j + 1] == '>':  # 分割前面的字符就是非终结符
                if not i[0:j] in grammar[non_terminator]:  # 该非终结符还没有记录
                    grammar[non_terminator].append(i[0:j])  # 加入进去
                    grammar[production][i[0:j]] = []
                k = j + 2  # 直达产生式右部第一个字符
                for l in range(len(i) + 1):  # +1是为了处理最后一行又不带分号又不带回车的情况
                    # 这里由于用了断路特性即l == len(i)成立后不会运行后面的,从而不会产生数组越界报错
                    if l == len(i) or i[l] == ';' or i[l] == '\n' or i[l] == ';':  # 往后找到最后一个,结束
                        grammar[production][i[0:j]].append([i[k:l]])  # 添加到后面
                        break
                    if i[l] == '|':  # 遇到了中断
                        grammar[production][i[0:j]].append([i[k:l]])  # 添加到后面
                        k = l + 1  # 并且左边标记右移
                break
    file.close()
    PrimaryTreatment(grammar)
    Delete_Duplicate_Production(grammar)
    return grammar


global non_terminator, production
file = 'g.txt'  # 文件位置
grammar = OpenGrammar(file)  # 读取文法
print("读取文法后------------------------")
GetGrammar(grammar)  # 输出文法
print(grammar)
Remove_Left_Recursion(grammar)  # 消除左递归
print("消除左递归后------------------------")
GetGrammar(grammar)  # 输出文法
Remove_Left_Gene(grammar)  # 提取左因子
print("提取左因子后------------------------")
GetGrammar(grammar)  # 输出文法

Edit a text file g.txt, enter the following content in the file:

S->Qc|c;
Q->Rb|b;
R->Sa|a;

result:

读取文法后------------------------
S->Qc|c;
Q->Rb|b;
R->Sa|a;
{'非终结符': ['S', 'Q', 'R'], '产生式': {'S': [['Q', 'c'], ['c']], 'Q': [['R', 'b'], ['b']], 'R': [['S', 'a'], ['a']]}}
消除左递归后------------------------
S->Qc|c;
Q->Rb|b;
R->aR'|caR'|bcaR';
R'->bcaR'|;
提取左因子后------------------------
S->Qc|c;
Q->Rb|b;
R->aR'|caR'|bcaR';
R'->bcaR'|;

 Inputs like this are also supported:

S->Qc|c|cc|da|Q1
Q->Rb|b|V
R->Sa|a|cV2
Q1->aa|bd
V2->d|Q2
Q2->a
V->V2|e|||

 result:

读取文法后------------------------
S->Qc|c|cc|da|Q1;
Q->Rb|b|V;
R->Sa|a|cV2;
Q1->aa|bd;
V2->d|Q2;
Q2->a;
V->V2|e|;
{'非终结符': ['S', 'Q', 'R', 'Q1', 'V2', 'Q2', 'V'], '产生式': {'S': [['Q', 'c'], ['c'], ['c', 'c'], ['d', 'a'], ['Q1']], 'Q': [['R', 'b'], ['b'], ['V']], 'R': [['S', 'a'], ['a'], ['c', 'V', '2']], 'Q1': [['a', 'a'], ['b', 'd']], 'V2': [['d'], ['Q2']], 'Q2': [['a']], 'V': [['V2'], ['e'], []]}}
消除左递归后------------------------
S->Qc|c|cc|da|Q1;
Q->Rb|b|V;
R->aR'|cV2R'|caR'|ccaR'|daaR'|Q1aR'|bcaR'|VcaR';
Q1->aa|bd;
V2->d|Q2;
Q2->a;
V->V2|e|;
R'->bcaR'|;
提取左因子后------------------------
S->Qc|cS'|da|Q1;
Q->Rb|b|V;
R->aR'|cR0|daaR'|Q1aR'|bcaR'|VcaR';
Q1->aa|bd;
V2->d|Q2;
Q2->a;
V->V2|e|;
R'->bcaR'|;
S'->|c;
R0->V2R'|caR'|aR';

Guess you like

Origin blog.csdn.net/weixin_58196051/article/details/130689274