* 入口:
* $filename 文件名
* $tag 标签名
* 返回数组,每项为:
* tagName String
* Text String
* Attrs Array
*
* 示例:
* print_r(tags(“test1.htm“,“a“))
* print_r(“http://localhost/index.htm“,“img“)
*/
function tags($filename,$tag) {
$buffer = join(““,file($filename))
$buffer = eregi_replace(“\r\n“,““,$buffer)
$tagkey = sql_regcase($tag)
$buffer = eregi_replace(“〈$tagkey “,“\n〈$tag “,$buffer)
$ar = split(“\n“,$buffer)
foreach($ar as $v) {
if(! eregi(“〈$tagkey “,$v)) continue
eregi(“〈$tagkey ([^〉]*)((.*)〈/$tagkey)?“,$v,$regs)
$p[tagName] = strtoupper($tag)
if($regs[3])
$p[Text] = $regs[3]
$s = trim(eregi_replace(“[ \t]+“,“ “,$regs[1])).“ “
$s = eregi_replace(“ *= *“,“=“,$s)
$a = split(“ “,$s)
for($i=0$i〈count($a)$i++) {
$ch = array()
if(eregi(“=[\“’]“,$a[$i])) {
$j = $i+1
while(!eregi(“[\“’]$“,$a[$i])) {
$a[$i] .= “ “.$a[$j]
unset($a[$j])
}
}
}
foreach($a as $k) {
$name = strtoupper(strtok($k,“=“))
$value = strtok(“\0“)
if(eregi(“^[\“’]“,$value))
$value = substr($value,1,-1)
if($name)
$p[Attrs][$name] = $value
}
$pp[] = $p
}
return $pp
}
?〉
如果是中间的数据直接就用bs4最简单
from bs4 import BeautifulSoup
#这里是请求过来的额数据处理,提取标签
html = BeautifulSoup(response.text, 'html.parser')
body = html.body # 获取body部分数据
div = body.find("div",{'id','today'}) #用find去找div标签,id叫 today的标签里面的数据
就可以了
如果要提取标签内容比如value的值
div = body.find("input",id='hidden_title')['value']