String pattern = "file=\".+jpg\""
// 创建 Pattern 对象
Pattern r = Pattern.compile(pattern)
// 现在创建 matcher 对象
Matcher m = r.matcher(str)
if(m.find()){
String file = m.group()
System.out.println(file.substring(6,file.length()-1))
}
import java.io.BufferedReaderimport java.io.InputStreamReader
import java.net.HttpURLConnection
import java.net.URL
import java.util.ArrayList
import java.util.List
import java.util.regex.Matcher
import java.util.regex.Pattern
public class Main
{
public static String getHTML(String spec, String charsetName)
{
try
{
URL url = new URL(spec)
HttpURLConnection huc = (HttpURLConnection) url.openConnection()
InputStreamReader isr = new InputStreamReader(huc.getInputStream(), charsetName)
BufferedReader br = new BufferedReader(isr)
StringBuilder builder = new StringBuilder()
String line = null
while(null != (line = br.readLine()))
{
builder.append(line)
}
br.close()
isr.close()
huc.disconnect()
return builder.toString()
}
catch(Exception e)
{
e.printStackTrace()
}
return ""
}
public static List<String> getAttr(String html, String nodeName, String attr)
{
String regex = "(?i)<" + nodeName + "\\b[^>]*" + attr + "[=\"\'\\s]+([^\"\']*)[\"\']?[^>]*>"
Pattern pattern = Pattern.compile(regex)
Matcher matcher = pattern.matcher(html)
ArrayList<String> list = new ArrayList<String>()
while(matcher.find())
{
list.add(matcher.group(1))
}
return list
}
public static void main(String[] args)
{
String html = getHTML("ht和谐tp://zhidao.baidu.com/question/562228126305552124.html?entry=qb_ihome_tag", "UTF-8")
List<String> list = getAttr(html, "a", "href")
for(int i = 0 i < list.size() i++)
{
System.out.println(list.get(i))
}
}
}