求一个C语言词法分析器源代码

Python017

求一个C语言词法分析器源代码,第1张

我有,这是这学期刚做的,

#include <iostream>

#include <fstream>

#include <sstream>

#include <string>

#include <vector>

#include <algorithm>

using namespace std

bool isLetter(char ch){

if ((ch>='A' &&ch<='Z') || (ch>='a' &&ch<='z')) return true

else return false

}

bool isDigit(char ch){

if (ch>='0' &&ch<='9') return true

else return false

}

bool isP(char ch){

if(ch=='+'||ch=='*'||ch=='-'||ch=='/') return true

//ch==':'||ch==','||ch=='='||ch==''||ch=='('||ch==')'

else return false

}

bool isJ(char ch){

if(ch==','||ch==''||ch=='.'||ch=='('||ch==')'||ch=='['||ch==']'||ch=='='||ch==':'||ch=='<'||ch=='>'||ch=='{'||ch=='}'||ch=='#') return true

//

else return false

}

bool isBlank(char ch){

if(ch==' '||ch=='\t') return true

else return false

}

int main(){

string src,ste,s

char ch0,ch,ch1[2]

char ktt[48][20]={"and","begin","const","div","do","else","end","function","if","integer",

"not","or","procedure","program","read","real","then","type","var","while","write","标识符","无符号数",

",","",":",".","(",")","[","]","..","++","--","+","-","*","/","=","<",">","<>","<="

,">=",":=","{","}","#"}

int pos=0

FILE *fp

fp=fopen("d:\\in.txt","r")

ch0=fgetc(fp)

while(ch0!=EOF)

{

//if(ch0!='\t'){src+=ch0}

src+=ch0

ch0=fgetc(fp)

}

src+='#'

cout<<src<<endl

ch=src[pos++]

ste=" "

for(int j=0j<47j++){cout<<j<<ktt[j]<<endl}

cout<<"词法分析:\n"

while(ch!='#')

{

char str[20]

if(ch!='\n')

{

if(isDigit(ch))

{ //判断常数

int i=0

while(isDigit(ch)||ch=='.')

{

str[i++]=ch

//i++

ch=src[pos++]

}

str[i]='\0'

ste=ste+"|"+"22"

cout<<str

continue

}

else if(isLetter(ch))

{ //判断字符

int i=0,j

while(isLetter(ch)||isDigit(ch))

{

str[i++]=ch

//i++

ch=src[pos++]

}

str[i]='\0'

for(j=0j<21j++){ //判断是否关键字

int t=strcmp(str,ktt[j])

if(t==0) {

stringstream ss

ste+="|"

ss<<stess<<j

ss>>ste

break

}

}

if(j==21){ste=ste+"|"+"21"}

// cout<<" "

cout<<str

continue

}

else if(isP(ch)){ ///判断是否运算符

int i=0,j

str[i++]=ch

str[i]='\0'

for(j=34j<38j++){

int t=strcmp(str,ktt[j])

if(t==0) {

stringstream ss

ste+="|"

ss<<stess<<j

ss>>ste

break

}

}

cout<<str

ch=src[pos++]

continue

}

else if(isJ(ch)) //判断是否界符

{

int i=0,j

while(isJ(ch))

{

str[i++]=ch

ch=src[pos++]

}

str[i]='\0'

for(j=23j<47j++){

int t=strcmp(str,ktt[j])

if(t==0) {

stringstream ss

ste+="|"

ss<<stess<<j

ss>>ste

break

}

}

cout<<str

continue

}

else if(isBlank(ch))

{

cout<<ch

ch=src[pos++]

continue

}

}

else{

cout<<ste<<endl

ste=" "

}

ch=src[pos++]

}

return 0

}

还有运行效果图,和实验报告 ,你要的话留下邮箱

简而言之就是先画一个状态图,然后根据图来编码就行

一个简单的xml的词法分析器供参考

#include

<stdio.h>

#include

<stdlib.h>

#include

<string.h>

typedef

struct

{

char

*p

int

len

}

xml_Text

typedef

enum

{

xml_tt_U,

/*

Unknow

*/

xml_tt_H,

/*

Head

<?xxx?>*/

xml_tt_E,

/*

End

</xxx>

*/

xml_tt_B,

/*

Begin

<xxx>

*/

xml_tt_BE,

/*

Begin

End

<xxx/>

*/

xml_tt_T

/*

Text

xxx

*/

}

xml_TokenType

typedef

struct

{

xml_Text

text

xml_TokenType

type

}

xml_Token

int

xml_initText(xml_Text

*pText,

char

*s)

{

pText->p

=

s

pText->len

=

strlen(s)

return

0

}

int

xml_initToken(xml_Token

*pToken,

xml_Text

*pText)

{

pToken->text.p

=

pText->p

pToken->text.len

=

0

pToken->type

=

xml_tt_U

return

0

}

int

xml_print(xml_Text

*pText)

{

int

i

for

(i

=

0

i

<

pText->len

i++)

{

putchar(pText->p[i])

}

return

0

}

int

xml_println(xml_Text

*pText)

{

xml_print(pText)

putchar('\n')

return

0

}

int

xml_getToken(xml_Text

*pText,

xml_Token

*pToken)

{

char

*start

=

pToken->text.p

+

pToken->text.len

char

*p

=

start

char

*end

=

pText->p

+

pText->len

int

state

=

0

pToken->text.p

=

p

pToken->type

=

xml_tt_U

for

(

p

<

end

p++)

{

switch(state)

{

case

0:

switch(*p)

{

case

'<':

state

=

1

break

default:

state

=

7

break

}

break

case

1:

switch(*p)

{

case

'?':

state

=

2

break

case

'/':

state

=

4

break

default:

state

=

5

break

}

break

case

2:

switch(*p)

{

case

'?':

state

=

3

break

default:

state

=

2

break

}

break

case

3:

switch(*p)

{

case

'>':

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_H

return

1

default:

state

=

-1

break

}

break

case

4:

switch(*p)

{

case

'>':

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_E

return

1

default:

state

=

4

break

}

break

case

5:

switch(*p)

{

case

'>':

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_B

return

1

case

'/':

state

=

6

break

default:

state

=

5

break

}

break

case

6:

switch(*p)

{

case

'>':

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_BE

return

1

default:

state

=

-1

break

}

break

case

7:

switch(*p)

{

case

'<':

p--

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_T

return

1

default:

state

=

7

break

}

break

default:

pToken->text.len

=

p

-

start

+

1

pToken->type

=

xml_tt_T

return

1

}

}

return

0

}

int

main()

{

int

ret

=

0

xml_Text

xml

xml_initText(&xml,

"<?xml?><root>

ss

<haha>hoho</haha></root>")

xml_Token

token

xml_initToken(&token,

&xml)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

ret

=

xml_getToken(&xml,

&token)

printf("ret=%dtext=",ret)

xml_print(&token.text)

printf("type=%d\n\n",

token.type)

return

0

}

#include "stdio.h"                  /*定义I/O库所用的某些宏和变量*/

#include "string.h"                 /*定义字符串库函数*/

#include "conio.h"                  /*提供有关屏幕窗口操作函数*/

#include "ctype.h"                  /*分类函数*/

char prog[80]={'\0'},

token[8]                     /*存放构成单词符号的字符串*/

char ch

int syn,                           /*存放单词字符的种别码*/

n,

sum,                           /*存放整数型单词*/

m,p                           /*p是缓冲区prog的指针,m是token的指针*/

char *rwtab[6]={"begin","if","then","while","do","end"}

void scaner(){

m=0

sum=0

for(n=0n<8n++)

token[n]='\0'

ch=prog[p++]

while(ch==' ')

ch=prog[p++]

if(isalpha(ch))    /*ch为字母字符*/{

while(isalpha(ch)||isdigit(ch))    /*ch 为字母字符或者数字字符*/{

token[m++]=ch

ch=prog[p++]}

token[m++]='\0'

ch=prog[p--]

syn=10

for(n=0n<6n++)

if(strcmp(token,rwtab[n])==0)    /*字符串的比较*/{

syn=n+1

break}}

else

if(isdigit(ch))    /*ch是数字字符*/{

while(isdigit(ch))    /*ch是数字字符*/{

sum=sum*10+ch-'0'

ch=prog[p++]}

ch=prog[p--]

syn=11}

else

switch(ch){

case'<':m=0token[m++]=chch=prog[p++]

if(ch=='>'){

syn=21

token[m++]=ch}

else if(ch=='='){

syn=22

token[m++]=ch}

else{

syn=20

ch=prog[p--]}

break

case'>':m=0token[m++]=chch=prog[p++]

if(ch=='='){

syn=24

token[m++]=ch}

else{

syn=23

ch=prog[p--]}

break

case':':m=0token[m++]=chch=prog[p++]

if(ch=='='){

syn=18

token[m++]=ch}

else{

syn=17

ch=prog[p--]}

break

case'+':syn=13token[0]=chbreak

case'-':syn=14token[0]=chbreak

case'*':syn=15token[0]=chbreak

case'/':syn=16token[0]=chbreak

case'=':syn=25token[0]=chbreak

case'':syn=26token[0]=chbreak

case'(':syn=27token[0]=chbreak

case')':syn=28token[0]=chbreak

case'#':syn=0token[0]=chbreak

default:syn=-1}}

main()

{

printf("\n\nThe significance of the figures:\n"

"1.figures 1 to 6 said Keyword\n"

"2.figures 10 and 11 said Other indicators\n"

"3.figures 13 to 28 said Operators\n")

p=0

printf("\nplease input string:\n")

do {

ch=getchar()

prog[p++]=ch

}while(ch!='#')

p=0

do{

scaner()

switch(syn){

case 11: printf("(%d,%d)\n",syn,sum)break

case -1: printf("\n ERROR\n")break

default: printf("(%d,%s)\n",syn,token)

}

}while(syn!=0)

getch()

}

程序测试结果

对源程序begin x:=9: if x>9 then x:=2*x+1/3 end #的源文件,经过词法分析后输出如下图5-1所示:

具体的你在修改修改吧