【编译原理课程设计】词法分析程序设计

【实验目的】

(1)理解词法分析在编译程序中的作用

(2)加深对有穷自动机模型的理解

(3)掌握词法分析程序的实现方法和技术

【实验内容】

对一个简单语言的子集编制一个一遍扫描的词法分析程序。

【实验要求】

1)待分析的简单语言的词法

1) 关键字

begin  if  then  while  do  end

2) 运算符和界符

:=  +  -  *  /  <  <=  >  >=  <>  =  ;  (  )  #

3) 其他单词是标识符(ID)和整形常数(NUM),通过以下正规式定义:

ID=letter(letter|digit)*

NUM=digitdigit*

4) 空格由空白、制表符和换行符组成。空格一般用来分隔IDNUM、运算符、界符和关键字,词法分析阶段通常被忽略。

2)各种单词符号对应的种别编码

单词符号

种别码

单词符号

种别码

begin

1

:

17

if

2

:=

18

then

3

<

20

while

4

<>

21

do

5

<=

22

end

6

>

23

letter(letter|digit)*

10

>=

24

digitdigit*

11

=

25

+

13

;

26

-

14

(

27

*

15

)

28

/

16

#

3)词法分析程序的功能

输入:所给文法的源程序字符串

输出:二元组(syn,tokensum)构成的序列。

syn为单词种别码;

token为存放的单词自身字符串;

Sum 为整形常数。

例如:对源程序begin x:=9;if x>0 then x:=2*x+1/3;end# 经词法分析后输出如下序列:(1begin(10,’x’) (18,:=) (11,9) (26,;) (2,if)……

直接上代码

#include<iostream>
using namespace std;
bool is_digit(char ch);
bool is_letter(char ch);
char example[10000]; //缓冲区
char token[10]; //标识符
int syn, sum;
const char* keyword[10] = { "begin","if","then","while","do","end" }; //关键字
int example_p;//缓冲区指针
int token_p;//标识符指针
char ch;

//扫描
void scan() {
    
    memset(token, 0, sizeof(token)); //数组清零
    token_p = 0;
    while (ch ==‘ ‘) {
        example_p++;
        ch = example[example_p];
    }
    
    //字符是数字
    if (is_digit(ch)) {
        sum = 0;
        //检索数字
        while (is_digit(ch)) {
            //得到结果
            sum = sum * 10 + ch - ‘0‘;  //将字符转换成数字
            example_p++;
            ch = example[example_p];
            syn = 11;        
        }
        
    }
    //字符是字母
    else if (is_letter(ch)) {
        //检索标识符、关键字
        while (is_digit(ch) || is_letter(ch)) {
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
        }
    

        token[token_p] = ‘\0‘;
        token_p++;
        syn = 10;
        //比对标识符和关键字
        for (int i = 0;i < 6;i++) {
            if (strcmp(token, keyword[i]) == 0) {
                syn = i + 1;
                break;
            }
        }
    }
    //其他情况
    else {
        switch (ch){
        case ‘#‘:
            syn = 0;
            token[0] = ch;
            break;
        case ‘+‘:
            syn = 13;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘-‘:
            syn = 14;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘*‘:
            syn = 15;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘/‘:
            syn = 16;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
            
        case ‘:‘:
            syn = 17;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 18;
                example_p++;
                ch = example[example_p];
            }
            break;
        
        case ‘<‘:
            syn = 20;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];
            if (ch == ‘>‘) {
                token[token_p] = ch;
                token_p++;
                syn = 21;
                example_p++;
                ch = example[example_p];
            }
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 22;
                example_p++;
                ch = example[example_p];
            }
            break;
        case ‘>‘:
            syn = 23;
            token_p = 0;
            token[token_p] = ch;
            token_p++;
            example_p++;
            ch = example[example_p];    
            if (ch == ‘=‘) {
                token[token_p] = ch;
                token_p++;
                syn = 24;
                example_p++;
                ch = example[example_p];
            }
            break;
        case ‘=‘:
            syn = 25;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘;‘:
            syn = 26;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘(‘:
            syn = 27;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;
        case ‘)‘:
            syn = 28;
            token[0] = ch;
            example_p++;
            ch = example[example_p];
            break;

        }

    }


    
}
//字符是数字
bool is_digit(char ch) {
    if (ch >= ‘0‘&&ch <= ‘9‘)
        return true;
    else
        return false;
}
//字符是字母
bool is_letter(char ch) {
    if((ch>=‘a‘&&ch<=‘z‘)||(ch>=‘A‘&&ch<=‘Z‘))
        return true;
    else
        return false;
}
int main() {
    example_p = 0;
    char str;
    do {
        str = getchar();
        example[example_p] = str;
        example_p++;
    } while (str != ‘#‘);

    example_p = 0;
    ch = example[example_p];
    
    
    do {
        scan();
        switch (syn) {
        case 11:
            cout <<"("<< syn << "," << sum <<")"<< endl;
            break;
        default:
            cout <<"("<< syn << "," << token<<")"<<endl;
        }
    } while (syn != 0);
    
    
    system("pause");

    return 0;
}

相关推荐