#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std
bool isLetter(char ch){
if ((ch>='A' &&ch<='Z') || (ch>='a' &&ch<='z')) return true
else return false
}
bool isDigit(char ch){
if (ch>='0' &&ch<='9') return true
else return false
}
bool isP(char ch){
if(ch=='+'||ch=='*'||ch=='-'||ch=='/') return true
//ch==':'||ch==','||ch=='='||ch==''||ch=='('||ch==')'
else return false
}
bool isJ(char ch){
if(ch==','||ch==''||ch=='.'||ch=='('||ch==')'||ch=='['||ch==']'||ch=='='||ch==':'||ch=='<'||ch=='>'||ch=='{'||ch=='}'||ch=='#') return true
//
else return false
}
bool isBlank(char ch){
if(ch==' '||ch=='\t') return true
else return false
}
int main(){
string src,ste,s
char ch0,ch,ch1[2]
char ktt[48][20]={"and","begin","const","div","do","else","end","function","if","integer",
"not","or","procedure","program","read","real","then","type","var","while","write","标识符","无符号数",
",","",":",".","(",")","[","]","..","++","--","+","-","*","/","=","<",">","<>","<="
,">=",":=","{","}","#"}
int pos=0
FILE *fp
fp=fopen("d:\\in.txt","r")
ch0=fgetc(fp)
while(ch0!=EOF)
{
//if(ch0!='\t'){src+=ch0}
src+=ch0
ch0=fgetc(fp)
}
src+='#'
cout<<src<<endl
ch=src[pos++]
ste=" "
for(int j=0j<47j++){cout<<j<<ktt[j]<<endl}
cout<<"词法分析:\n"
while(ch!='#')
{
char str[20]
if(ch!='\n')
{
if(isDigit(ch))
{ //判断常数
int i=0
while(isDigit(ch)||ch=='.')
{
str[i++]=ch
//i++
ch=src[pos++]
}
str[i]='\0'
ste=ste+"|"+"22"
cout<<str
continue
}
else if(isLetter(ch))
{ //判断字符
int i=0,j
while(isLetter(ch)||isDigit(ch))
{
str[i++]=ch
//i++
ch=src[pos++]
}
str[i]='\0'
for(j=0j<21j++){ //判断是否关键字
int t=strcmp(str,ktt[j])
if(t==0) {
stringstream ss
ste+="|"
ss<<stess<<j
ss>>ste
break
}
}
if(j==21){ste=ste+"|"+"21"}
// cout<<" "
cout<<str
continue
}
else if(isP(ch)){ ///判断是否运算符
int i=0,j
str[i++]=ch
str[i]='\0'
for(j=34j<38j++){
int t=strcmp(str,ktt[j])
if(t==0) {
stringstream ss
ste+="|"
ss<<stess<<j
ss>>ste
break
}
}
cout<<str
ch=src[pos++]
continue
}
else if(isJ(ch)) //判断是否界符
{
int i=0,j
while(isJ(ch))
{
str[i++]=ch
ch=src[pos++]
}
str[i]='\0'
for(j=23j<47j++){
int t=strcmp(str,ktt[j])
if(t==0) {
stringstream ss
ste+="|"
ss<<stess<<j
ss>>ste
break
}
}
cout<<str
continue
}
else if(isBlank(ch))
{
cout<<ch
ch=src[pos++]
continue
}
}
else{
cout<<ste<<endl
ste=" "
}
ch=src[pos++]
}
return 0
}
还有运行效果图,和实验报告 ,你要的话留下邮箱
简而言之就是先画一个状态图,然后根据图来编码就行一个简单的xml的词法分析器供参考
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
typedef
struct
{
char
*p
int
len
}
xml_Text
typedef
enum
{
xml_tt_U,
/*
Unknow
*/
xml_tt_H,
/*
Head
<?xxx?>*/
xml_tt_E,
/*
End
</xxx>
*/
xml_tt_B,
/*
Begin
<xxx>
*/
xml_tt_BE,
/*
Begin
End
<xxx/>
*/
xml_tt_T
/*
Text
xxx
*/
}
xml_TokenType
typedef
struct
{
xml_Text
text
xml_TokenType
type
}
xml_Token
int
xml_initText(xml_Text
*pText,
char
*s)
{
pText->p
=
s
pText->len
=
strlen(s)
return
0
}
int
xml_initToken(xml_Token
*pToken,
xml_Text
*pText)
{
pToken->text.p
=
pText->p
pToken->text.len
=
0
pToken->type
=
xml_tt_U
return
0
}
int
xml_print(xml_Text
*pText)
{
int
i
for
(i
=
0
i
<
pText->len
i++)
{
putchar(pText->p[i])
}
return
0
}
int
xml_println(xml_Text
*pText)
{
xml_print(pText)
putchar('\n')
return
0
}
int
xml_getToken(xml_Text
*pText,
xml_Token
*pToken)
{
char
*start
=
pToken->text.p
+
pToken->text.len
char
*p
=
start
char
*end
=
pText->p
+
pText->len
int
state
=
0
pToken->text.p
=
p
pToken->type
=
xml_tt_U
for
(
p
<
end
p++)
{
switch(state)
{
case
0:
switch(*p)
{
case
'<':
state
=
1
break
default:
state
=
7
break
}
break
case
1:
switch(*p)
{
case
'?':
state
=
2
break
case
'/':
state
=
4
break
default:
state
=
5
break
}
break
case
2:
switch(*p)
{
case
'?':
state
=
3
break
default:
state
=
2
break
}
break
case
3:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_H
return
1
default:
state
=
-1
break
}
break
case
4:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_E
return
1
default:
state
=
4
break
}
break
case
5:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_B
return
1
case
'/':
state
=
6
break
default:
state
=
5
break
}
break
case
6:
switch(*p)
{
case
'>':
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_BE
return
1
default:
state
=
-1
break
}
break
case
7:
switch(*p)
{
case
'<':
p--
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_T
return
1
default:
state
=
7
break
}
break
default:
pToken->text.len
=
p
-
start
+
1
pToken->type
=
xml_tt_T
return
1
}
}
return
0
}
int
main()
{
int
ret
=
0
xml_Text
xml
xml_initText(&xml,
"<?xml?><root>
ss
<haha>hoho</haha></root>")
xml_Token
token
xml_initToken(&token,
&xml)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
ret
=
xml_getToken(&xml,
&token)
printf("ret=%dtext=",ret)
xml_print(&token.text)
printf("type=%d\n\n",
token.type)
return
0
}
#include "stdio.h" /*定义I/O库所用的某些宏和变量*/
#include "string.h" /*定义字符串库函数*/
#include "conio.h" /*提供有关屏幕窗口操作函数*/
#include "ctype.h" /*分类函数*/
char prog[80]={'\0'},
token[8] /*存放构成单词符号的字符串*/
char ch
int syn, /*存放单词字符的种别码*/
n,
sum, /*存放整数型单词*/
m,p /*p是缓冲区prog的指针,m是token的指针*/
char *rwtab[6]={"begin","if","then","while","do","end"}
void scaner(){
m=0
sum=0
for(n=0n<8n++)
token[n]='\0'
ch=prog[p++]
while(ch==' ')
ch=prog[p++]
if(isalpha(ch)) /*ch为字母字符*/{
while(isalpha(ch)||isdigit(ch)) /*ch 为字母字符或者数字字符*/{
token[m++]=ch
ch=prog[p++]}
token[m++]='\0'
ch=prog[p--]
syn=10
for(n=0n<6n++)
if(strcmp(token,rwtab[n])==0) /*字符串的比较*/{
syn=n+1
break}}
else
if(isdigit(ch)) /*ch是数字字符*/{
while(isdigit(ch)) /*ch是数字字符*/{
sum=sum*10+ch-'0'
ch=prog[p++]}
ch=prog[p--]
syn=11}
else
switch(ch){
case'<':m=0token[m++]=chch=prog[p++]
if(ch=='>'){
syn=21
token[m++]=ch}
else if(ch=='='){
syn=22
token[m++]=ch}
else{
syn=20
ch=prog[p--]}
break
case'>':m=0token[m++]=chch=prog[p++]
if(ch=='='){
syn=24
token[m++]=ch}
else{
syn=23
ch=prog[p--]}
break
case':':m=0token[m++]=chch=prog[p++]
if(ch=='='){
syn=18
token[m++]=ch}
else{
syn=17
ch=prog[p--]}
break
case'+':syn=13token[0]=chbreak
case'-':syn=14token[0]=chbreak
case'*':syn=15token[0]=chbreak
case'/':syn=16token[0]=chbreak
case'=':syn=25token[0]=chbreak
case'':syn=26token[0]=chbreak
case'(':syn=27token[0]=chbreak
case')':syn=28token[0]=chbreak
case'#':syn=0token[0]=chbreak
default:syn=-1}}
main()
{
printf("\n\nThe significance of the figures:\n"
"1.figures 1 to 6 said Keyword\n"
"2.figures 10 and 11 said Other indicators\n"
"3.figures 13 to 28 said Operators\n")
p=0
printf("\nplease input string:\n")
do {
ch=getchar()
prog[p++]=ch
}while(ch!='#')
p=0
do{
scaner()
switch(syn){
case 11: printf("(%d,%d)\n",syn,sum)break
case -1: printf("\n ERROR\n")break
default: printf("(%d,%s)\n",syn,token)
}
}while(syn!=0)
getch()
}
程序测试结果
对源程序begin x:=9: if x>9 then x:=2*x+1/3 end #的源文件,经过词法分析后输出如下图5-1所示:
具体的你在修改修改吧