python如何高级检索

Python025

python如何高级检索,第1张

$ python baidu_qa.py

Enter Context, leave by empty line

  def __init__(self, datafile, default=None):

      self.datafile = datafile

      self.default = default

      self.data = shelve.BsdDbShelf(bsddb.open(self.datafile, 'c'))

  def __del__(self):

      self.data.sync()

      self.data.close()

  def __getitem__(self, k):

      return self.data.get(k, self.default)

  def __setitem__(self, k, v):

      self.data[k] = v

Enter and/or keywords to query

and default data

[9]

or get k v

[8, 9, 10, 11]

$

# coding: utf-8

import re

patt = re.compile("\w+")

def makeIndex():

    index, lineno = {}, 0

    while True:

        ln = raw_input()

        if ln:

            lineno += 1

            for word in patt.findall(ln):

                index.setdefault(word, []).append(lineno)

        else:

            break

    return index

def indexAndQuery(index, *args):

    found = None

    for word in args:

        got = index.get(word, [])

        if not got:

            return None

        if not found:

            found = set(got)

        else:

            found &= set(got)

        if not found:

            return None

    return list(found)

def indexOrQuery(index, *args):

    found = set()

    for word in args:

        found |= set(index.get(word, []))

    return list(found)

def lnparser(ln):

    words = patt.findall(ln.lower())

    if len(words)<2 or words[0] not in ('and','or'):

        # 输入的行不是以and, or开始, 或未给出待查单词

        return None

    else:

        return words[0], words[1:]

cmdswitch = {

    'and': indexAndQuery,

    'or': indexOrQuery,

}

print "Enter Context, leave by empty line"

index = makeIndex()

print "Enter and/or keywords to query"

while True:

    got = lnparser(raw_input())

    if not got:

        break

    print cmdswitch[got[0]](index, *got[1])

s = raw_input()

lines = s.split('\n')

dictlines = lines[:100]

mydict = {}

# read 

for i,line in enumerate(dictlines ):

    for word in line.split():

      mydict.setdefault(word,[]).append(i + 1)

# print indices

for word in mydict.keys():

    print "%s: %s"  % (word,", ".join(map(str,sorted(mydict[word]))))

 

def andSearch(words_list):

    global mydict

    a = set(range(1,101))

    for word in words_list:

        a = a.intersection(set(mydict[word])) 

     return a 

def orSearch(words_list):

    global mydict

    a = set([])

    for word in words_list:

        a = a.union(set(mydict[word]))

    return a 

    

# Query

index = 100

u = lines[index]

while index < len(lines):

    words_list = u.split()

    if ":" in u:

        if words_list[0] == "OR:":

            a = orSearch(words_list)

        else:

            if words_list[0] == 'AND:':

               words_list = words_list[1:]

            a = andSearch(words_list)

    if not a:

        print ", ".join(map(str,list(a)))

    else:

        print "None"

    index += 1

大致思想就是这样。。。。。。。。

你还可以用更灵活的 regular 正则式

search()和match(),用起来更灵活

import re

str = "Welcome to my world. I have 12 apples."

if re.search(r"world", str).group() != "" :

print("match! ")

str = "abcABC"

if re.match(r"[a-zA-Z]+", str):

print("match! ", re.search(r"[A-Z]+", str).group())

else:

print("ummatch! ")