词法分析器设计

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/l450741881/article/details/50734945

词法分析是编制一个读单词的过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。程序语言的单词符号一般分为五种:关键字(保留字/基本字)if、while、begin…;标识符:常量名、变量名…;常数:34、56.78、true、‘a’、…;运算符:+、-、*、/、〈、and、or、….、;界限符:, ; (  )  {    }   /*…。

方法:

词法分析器的设计方法有如下四个步骤:

1.写出该语言的词法规则。

2.把词法规则转换为相应的状态转换图。

3.把各转换图的初态连在一起,构成识别该语言的自动机。

4.设计扫描器;把扫描器作为语法分析的一个过程,当语法分析需要一个单词时,就调用扫描器。扫描器从初态出发,当识别一个单词后便进入终态,送出二元式。

针对该程序设计的DFA 图大致如下:

核心代码(这段是分析代码。其他的定义代码,识别字母代码,识别数字代码,等等就不一一列举了,源程序中有):

/** 
     * 初始化并读取源代码文件 
     * 扫描程序开始执行,直到读取文件结束符EOF 
     * @throws Exception 
     */ 
    private void scanning(String originalFile) throws Exception { 
        this.sourceFile = new BufferedReader(new FileReader(originalFile)); 
         
        this.initial(); 
        while(!isEOF) { 
            getToken(); 
        } 
        System.out.println("========================> end scanning ..."); 
    } 
     
    /** 
     * 获取下一个字符 
     * @return 
     * @throws Exception 
     */ 
    private char getNextChar() throws Exception { 
        char nextChar = '\0'; 
         
        if(!(charPos < bufSize)) { 
            if((eachLine = sourceFile.readLine()) != null) { 
                lineNum++; 
                System.out.println(lineNum + ": " + eachLine); 
                lineBuf = eachLine.toCharArray(); 
                bufSize = eachLine.length(); 
                charPos = 0; 
                nextChar = lineBuf[charPos++]; 
            } else { 
                isEOF = true; 
                nextChar = '\0'; 
            } 
        } else { 
            nextChar = lineBuf[charPos++]; 
        } 
        return nextChar; 
    } 
     
    /** 
     * 【按步长(step)】取消获取下一个字符 
     */ 
    private void unGetNextChar(int step) { 
        if(!isEOF) { 
            charPos -= step; 
        } 
    } 
     
    /** 
     * 获取一个Token 
     * @return 
     * @throws Exception 
     */ 
    private String getToken() throws Exception { 
        String tokenStr = ""; 
        String currentToken = ""; 
        int currentState = Start; 
        boolean isSave; 
         
        // 不同时为EOF和Done状态 
        while(currentState != Done && !isEOF) { 
            char c = getNextChar(); 
            isSave = true; 
             
            switch(currentState) { 
                case Start: 
                    if(isDigit(c)) { 
                        currentState = Num; 
                    } else if(isLetter(c) || c == '.') { //点号是为了处理头文件iostream.h的格式 
                        currentState = ID; 
                    } else if(c == ' ' || c == '\t' || c == '\n') { 
                        isSave = false; 
                    } else if(c == '!') { 
                        currentState = NE; 
                    } else if(c == '=') { 
                        currentState = EQ; 
                    } else if(c == '<') { 
                        currentState = NM; 
                    } else if(c == '>') { 
                        currentState = NL; 
                    } else if(c == '/') { 
                        currentState = Coms; 
                        isSave = false; 
                    } else if(c == '"') { 
                        currentState = Str; 
                    } else { 
                        currentState = Done; 
//                      if(isSingle(c)) { 
//                          currentToken = "" + c; 
//                          currentState = Done; 
//                          isSave = false; 
//                      } 
                    } 
                    break; 
                case Num: 
                    if(!isDigit(c)) { 
                        currentState = Done; 
                        unGetNextChar(1); 
                        isSave = false; 
                    } 
                    break; 
                case ID: 
                    if(!isLetter(c) && !isDigit(c)) { 
                        currentState = Done; 
                        unGetNextChar(1); 
                        isSave = false; 
                    } 
                    break; 
                case NE: 
                    if(c != '=') { 
                        currentState = Special; 
                        unGetNextChar(2); 
                        isSave = false; 
                    } else { 
                        currentState = Done; 
                    } 
                    break; 
                case NM: 
                    if(c != '=' && c != '<') { 
                        currentState = Special; 
                        unGetNextChar(2); 
                        isSave = false; 
                    } else { 
                        currentState = Done; 
                    } 
                    break; 
                case NL: 
                    if(c != '=' && c != '>') { 
                        currentState = Special; 
                        unGetNextChar(2); 
                        isSave = false; 
                    } else { 
                        currentState = Done; 
                    } 
                    break; 
                case EQ: 
                    if(c != '=') { 
                        currentState = Special; 
                        unGetNextChar(2); 
                        isSave = false; 
                    } else { 
                        currentState = Done; 
                    } 
                    break; 
                case Str: 
                    if(c == '"') { 
                        currentState = Done; 
                    }  
                    break; 
                case Coms: 
                    isSave = false; 
                    if(c == '/') { 
                        currentState = LineCom; 
                    } else if(c == '*') { 
                        currentState = MulCom1; 
                    } else { 
                        currentState = Special; 
                        unGetNextChar(1); 
                    } 
                    break; 
                case LineCom: 
                    isSave = false; 
                    if(c == '\n') { 
                        currentState = Done; 
                    } 
                    break; 
                case MulCom2: 
                    isSave = false; 
                    if(c == '*') { 
                        currentState = MulCom2; 
                    } else if(c == '/') { 
                        currentState = Done; 
                    } else { 
                        currentState = MulCom1; 
                    } 
                    break; 
                case Special: 
                    if(c == '!' || c == '=' || c == '<' || c == '>') { 
//                  if(isSpecialSingle(c)) { 
                        currentToken = "" + c; 
                        currentState = Done; 
                        isSave = false; 
                    } else { 
                        currentToken = "Error"; 
                        currentState = Done; 
                    } 
                    break; 
                default: 
                    System.out.println(lineNum + " >> Scanner Bug : state = " + currentState); 
                    currentState = Done; 
                    currentToken = "Error"; 
                    break; 
            } 
            if(isSave) { 
                tokenStr += c; 
            } 
            if(currentState == Done) { 
                currentToken = tokenStr; 
                printToken(currentToken); 
            } 
        } 
        return currentToken; 
    }

运行结果如下:

待翻译代码:

#include"iostream.h"

main()

{

int i;

cin>>i;

i=i+1;

if(i>=3) cout<<"chenggong";

elsecout<<"shibai";

}

显示结果:


需要源码的朋友

猜你喜欢

转载自blog.csdn.net/l450741881/article/details/50734945