【编译原理课程设计】词法分析程序设计

getianao

2020-01-04

关注关注

【实验目的】

（1）理解词法分析在编译程序中的作用

（2）加深对有穷自动机模型的理解

（3）掌握词法分析程序的实现方法和技术

【实验内容】

对一个简单语言的子集编制一个一遍扫描的词法分析程序。

【实验要求】

（1）待分析的简单语言的词法

1) 关键字

begin if then while do end

2) 运算符和界符

:= + - * / < <= > >= <> = ; ( ) #

3) 其他单词是标识符(ID)和整形常数(NUM)，通过以下正规式定义：

ID=letter(letter|digit)*

NUM=digitdigit*

4) 空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、界符和关键字，词法分析阶段通常被忽略。

（2）各种单词符号对应的种别编码

单词符号	种别码	单词符号	种别码
begin	1	:	17
if	2	:=	18
then	3	<	20
while	4	<>	21
do	5	<=	22
end	6	>	23
letter(letter\|digit)*	10	>=	24
digitdigit*	11	=	25
+	13	;	26
-	14	(	27
*	15	)	28
/	16	#

（3）词法分析程序的功能

输入：所给文法的源程序字符串

输出：二元组（syn,token或sum）构成的序列。

syn为单词种别码；

token为存放的单词自身字符串；

Sum 为整形常数。

例如：对源程序begin x:=9;if x>0 then x:=2*x+1/3;end# 经词法分析后输出如下序列：（1，begin）(10,’x’) (18,:=) (11,9) (26,;) (2,if)……

直接上代码

#include<iostream>
using namespace std;
bool is_digit(char ch);
bool is_letter(char ch);
char example[10000]; //缓冲区
char token[10]; //标识符
int syn, sum;
const char* keyword[10] = { "begin","if","then","while","do","end" }; //关键字
int example_p;//缓冲区指针
int token_p;//标识符指针
char ch;

//扫描
void scan() {
    
    memset(token, 0, sizeof(token)); //数组清零
    token_p = 0;
    while (ch ==‘ ‘) {
        example_p++;
        ch = example[example_p];
    }
    
    //字符是数字
    if (is_digit(ch)) {
        sum = 0;
        //检索数字
        while (is_digit(ch)) {
            //得到结果
            sum = sum * 10 + ch - ‘0‘;  //将字符转换成数字
            example_p++;
            ch = example[example_p];
            syn = 11;        
        }
        
    }
    //字符是字母
    else if (is_letter(ch)) {
        //检索标识符、关键字
        while (is_digit(ch) || is_letter(ch)) {
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
        }
    

        token[token_p] = ‘\0‘;
        token_p++;
        syn = 10;
        //比对标识符和关键字
        for (int i = 0;i < 6;i++) {
            if (strcmp(token, keyword[i]) == 0) {
                syn = i + 1;
                break;
            }
        }
    }
    //其他情况
    else {
        switch (ch){
        case ‘#‘:
            syn = 0;
            token[0] = ch;
            break;
        case ‘+‘:
            syn = 13;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘-‘:
            syn = 14;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘*‘:
            syn = 15;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘/‘:
            syn = 16;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
            
        case ‘:‘:
            syn = 17;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 18;
                example_p++;
                ch = example[example_p];
            }
            break;
        
        case ‘<‘:
            syn = 20;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
            if (ch == ‘>‘) {
                token[token_p] = ch;
                token_p++;
                syn = 21;
                example_p++;
                ch = example[example_p];
            }
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 22;
                example_p++;
                ch = example[example_p];
            }
            break;
        case ‘>‘:
            syn = 23;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];    
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 24;
                example_p++;
                ch = example[example_p];
            }
            break;
        case ‘=‘:
            syn = 25;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘;‘:
            syn = 26;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘(‘:
            syn = 27;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘)‘:
            syn = 28;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;

        }

    }


    
}
//字符是数字
bool is_digit(char ch) {
    if (ch >= ‘0‘&&ch <= ‘9‘)
        return true;
    else
        return false;
}
//字符是字母
bool is_letter(char ch) {
    if((ch>=‘a‘&&ch<=‘z‘)||(ch>=‘A‘&&ch<=‘Z‘))
        return true;
    else
        return false;
}
int main() {
    example_p = 0;
    char str;
    do {
        str = getchar();
        example[example_p] = str;
        example_p++;
    } while (str != ‘#‘);

    example_p = 0;
    ch = example[example_p];
    
    
    do {
        scan();
        switch (syn) {
        case 11:
            cout <<"("<< syn << "," << sum <<")"<< endl;
            break;
        default:
            cout <<"("<< syn << "," << token<<")"<<endl;
        }
    } while (syn != 0);
    
    
    system("pause");

    return 0;
}

td 编译原理