所谓词法,源代码由字符流组成,字符流中包括关键字,变量名,方法名,括号等等符号,其中变量名要满足不能包括标点符号,不能以数字开头的数字与字母的字符串这个条件,对于括号要成对出现等等,这就是词法
而语法,词法没有问题才能进入语法分析,语法就是词排列的方法,字面意义,比如一句中文:我吃萝卜,里面有三个词我 吃 萝卜,除了“我吃萝卜”外,这三个词还可以组成,“萝卜吃我”“萝卜我吃”等,显然按照中文文法,后面的两句话是不对的,语法分析器就是分析类似这样的语法的。
简而言之就是先画一个状态图,然后根据图来编码就行一个简单的xml的词法分析器供参考#include<stdio.h>#include<stdlib.h>#include<string.h>typedefstruct{char*pintlen}xml_Texttypedefenum{xml_tt_U,/*Unknow*/xml_tt_H,/*Head<?xxx?>*/xml_tt_E,/*End</xxx>*/xml_tt_B,/*Begin<xxx>*/xml_tt_BE,/*BeginEnd<xxx/>*/xml_tt_T/*Textxxx*/}xml_TokenTypetypedefstruct{xml_Texttextxml_TokenTypetype}xml_Tokenintxml_initText(xml_Text*pText,char*s){pText->p=spText->len=strlen(s)return0}intxml_initToken(xml_Token*pToken,xml_Text*pText){pToken->text.p=pText->ppToken->text.len=0pToken->type=xml_tt_Ureturn0}intxml_print(xml_Text*pText){intifor(i=0i<pText->leni++){putchar(pText->p[i])}return0}intxml_println(xml_Text*pText){xml_print(pText)putchar('\n')return0}intxml_getToken(xml_Text*pText,xml_Token*pToken){char*start=pToken->text.p+pToken->text.lenchar*p=startchar*end=pText->p+pText->lenintstate=0pToken->text.p=ppToken->type=xml_tt_Ufor(p<endp++){switch(state){case0:switch(*p){case'<':state=1breakdefault:state=7break}breakcase1:switch(*p){case'?':state=2breakcase'/':state=4breakdefault:state=5break}breakcase2:switch(*p){case'?':state=3breakdefault:state=2break}breakcase3:switch(*p){case'>':pToken->text.len=p-start+1pToken->type=xml_tt_Hreturn1default:state=-1break}breakcase4:switch(*p){case'>':pToken->text.len=p-start+1pToken->type=xml_tt_Ereturn1default:state=4break}breakcase5:switch(*p){case'>':pToken->text.len=p-start+1pToken->type=xml_tt_Breturn1case'/':state=6breakdefault:state=5break}breakcase6:switch(*p){case'>':pToken->text.len=p-start+1pToken->type=xml_tt_BEreturn1default:state=-1break}breakcase7:switch(*p){case'<':p--pToken->text.len=p-start+1pToken->type=xml_tt_Treturn1default:state=7break}breakdefault:pToken->text.len=p-start+1pToken->type=xml_tt_Treturn1}}return0}intmain(){intret=0xml_Textxmlxml_initText(&xml,"<?xml?><root>ss<haha>hoho</haha></root>")xml_Tokentokenxml_initToken(&token,&xml)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)ret=xml_getToken(&xml,&token)printf("ret=%dtext=",ret)xml_print(&token.text)printf("type=%d\n\n",token.type)return0}