java关于html匹配的正则问题

html-css012

java关于html匹配的正则问题,第1张

你不要一行一行的匹配,你把这个网页全部读取下来再去通过正则匹配就对了。

URL url=new URL("http://www.weather.com.cn:80/weather/101210101.shtml")

BufferedReader bufIn=new BufferedReader(new InputStreamReader(url.openStream()))

StringBuilder builder = new StringBuilder()

String regex="<table.*"

String line=null

Pattern p=Pattern.compile(regex,Pattern.DOTALL)

while((line=bufIn.readLine())!=null){

builder.append(line)

}

line = builder.toString()

Matcher m=p.matcher(line)

//char[] buf=

while(m.find()){

//将符合规则的数据存储到集合中

list.add(m.group())

}

for(String list3 : list){

System.out.println(list3)

}

<head>

<script language="javascript">

var G=document.getElementById

function window_load(){

var strHTML = ""// document.body.innerHTML

strHTML += "<html>"

strHTML += " <head>"

strHTML += " </head>"

strHTML += " <body>"

strHTML += " <font color='red'>test1</font><br />"

strHTML += " <font size='18'>test2</font><br />"

strHTML += " <font >test3</font><br />"

strHTML += " <font></font>"

strHTML += " </body>"

strHTML += "</html>"

var reg = /<(font)\s*[^<>]*>[^<>]*<\/\1\s*>/ig

var aryResult = strHTML.match(reg)

alert("用match方法匹配 ,结果:\n\n" + aryResult.join("\n"))

}

</script>

</head>

<body onload="window_load()"> \

<!--

<font color='red'>test1</font><br />

<font size='18'>test2</font><br />

<font >test3</font><br />

<font></font>

-->

</body>

</html>

// 这是你的HTML内容\x0d\x0a String str = " abc def "\x0d\x0a // 这是正则表达式\x0d\x0a String p = "(]*>[^)"\x0d\x0a Pattern pt = Pattern.compile(p)\x0d\x0a Matcher m = pt.matcher(str)\x0d\x0a while (m.find())\x0d\x0a {\x0d\x0a // 输出匹配的内容\x0d\x0a System.out.println(m.group(1))\x0d\x0a } 回答于 2022-12-11 抢首赞 已踩 0 查看全部1个回答 — 为你推荐更多精彩内容 — 正在加载 加载失败 点击重新加载

微信

微博

QQ

QQ空间

答案纠错

举报

取消 赞赏答主 5 10 50 100 200

已赞赏0财富值

合计:0 财富值

登录后赞赏 选择举报类型 侵犯版权 色情低俗 涉嫌违法犯罪 时政信息不实 垃圾广告 低质灌水 工作人员会在48小时内处理,处理结果请关注系统通知,感谢您对百度知道的支持。 确定 void function(a,b,c,d,e,f){function g(b){a.attachEvent?a.attachEvent("onload",b,!1):a.addEventListener&&a.addEventListener("load",b)}function h(a,c,d){d=d||15var e=new Datee.setTime((new Date).getTime()+1e3*d),b.cookie=a+"="+escape(c)+"path=/expires="+e.toGMTString()}function i(a){var c=b.cookie.match(new RegExp("(^| )"+a+"=([^]*)(|$)"))return null!=c?unescape(c[2]):null}function j(){var a=i("PMS_JT")if(a){h("PMS_JT","",-1)try{a=a.match(/{["']s["']:(\d+),["']r["']:["']([\s\S]+)["']}/),a=a&&a[1]&&a[2]?{s:parseInt(a[1]),r:a[2]}:{}}catch(c){a={}}a.r&&b.referrer.replace(/#.*/,"")!=a.r||alog("speed.set","wt",a.s)}}if(a.alogObjectConfig){var k=a.alogObjectConfig.sample,l=a.alogObjectConfig.randd="https:"===a.location.protocol?"https://fex.bdstatic.com"+d:"http://fex.bdstatic.com"+d,k&&l&&l>k||(g(function(){alog("speed.set","lt",+new Date),e=b.createElement(c),e.async=!0,e.src=d+"?v="+~(new Date/864e5)+~(new Date/864e5),f=b.getElementsByTagName(c)[0],f.parentNode.insertBefore(e,f)}),j())}}(window,document,"script","/hunter/alog/dp.mobile.min.js") window.tt = 1676811933