$ python baidu_qa.py
Enter Context, leave by empty line
def __init__(self, datafile, default=None):
self.datafile = datafile
self.default = default
self.data = shelve.BsdDbShelf(bsddb.open(self.datafile, 'c'))
def __del__(self):
self.data.sync()
self.data.close()
def __getitem__(self, k):
return self.data.get(k, self.default)
def __setitem__(self, k, v):
self.data[k] = v
Enter and/or keywords to query
and default data
[9]
or get k v
[8, 9, 10, 11]
$
# coding: utf-8import re
patt = re.compile("\w+")
def makeIndex():
index, lineno = {}, 0
while True:
ln = raw_input()
if ln:
lineno += 1
for word in patt.findall(ln):
index.setdefault(word, []).append(lineno)
else:
break
return index
def indexAndQuery(index, *args):
found = None
for word in args:
got = index.get(word, [])
if not got:
return None
if not found:
found = set(got)
else:
found &= set(got)
if not found:
return None
return list(found)
def indexOrQuery(index, *args):
found = set()
for word in args:
found |= set(index.get(word, []))
return list(found)
def lnparser(ln):
words = patt.findall(ln.lower())
if len(words)<2 or words[0] not in ('and','or'):
# 输入的行不是以and, or开始, 或未给出待查单词
return None
else:
return words[0], words[1:]
cmdswitch = {
'and': indexAndQuery,
'or': indexOrQuery,
}
print "Enter Context, leave by empty line"
index = makeIndex()
print "Enter and/or keywords to query"
while True:
got = lnparser(raw_input())
if not got:
break
print cmdswitch[got[0]](index, *got[1])
s = raw_input()lines = s.split('\n')
dictlines = lines[:100]
mydict = {}
# read
for i,line in enumerate(dictlines ):
for word in line.split():
mydict.setdefault(word,[]).append(i + 1)
# print indices
for word in mydict.keys():
print "%s: %s" % (word,", ".join(map(str,sorted(mydict[word]))))
def andSearch(words_list):
global mydict
a = set(range(1,101))
for word in words_list:
a = a.intersection(set(mydict[word]))
return a
def orSearch(words_list):
global mydict
a = set([])
for word in words_list:
a = a.union(set(mydict[word]))
return a
# Query
index = 100
u = lines[index]
while index < len(lines):
words_list = u.split()
if ":" in u:
if words_list[0] == "OR:":
a = orSearch(words_list)
else:
if words_list[0] == 'AND:':
words_list = words_list[1:]
a = andSearch(words_list)
if not a:
print ", ".join(map(str,list(a)))
else:
print "None"
index += 1
大致思想就是这样。。。。。。。。
你还可以用更灵活的 regular 正则式search()和match(),用起来更灵活
import re
str = "Welcome to my world. I have 12 apples."
if re.search(r"world", str).group() != "" :
print("match! ")
str = "abcABC"
if re.match(r"[a-zA-Z]+", str):
print("match! ", re.search(r"[A-Z]+", str).group())
else:
print("ummatch! ")