import java.io.IOException
import java.io.InputStreamReader
import java.net.MalformedURLException
import java.net.URL
import java.util.ArrayList
import java.util.List
public class GetLinks {
private String webSource
private String url
public GetLinks(String url) throws MalformedURLException, IOException {
this.url = Complete(url)
webSource = getWebCon(this.url)
}
private String getWebCon(String strURL) throws MalformedURLException,
IOException {
StringBuffer sb = new StringBuffer()
java.net.URL url = new java.net.URL(strURL)
BufferedReader in = new BufferedReader(new InputStreamReader(url
.openStream()))
String line
while ((line = in.readLine()) != null) {
sb.append(line)
}
in.close()
return sb.toString()
}
private String Complete(String link)throws MalformedURLException{
URL url1 = new URL(link)
URL url2 = new URL(link+"/")
String handledUrl = link
try{
StringBuffer sb1 = new StringBuffer()
BufferedReader in1 = new BufferedReader(new InputStreamReader(url1
.openStream()))
String line1
while ((line1 = in1.readLine()) != null) {
sb1.append(line1)
}
in1.close()
StringBuffer sb2 = new StringBuffer()
BufferedReader in2 = new BufferedReader(new InputStreamReader(url2
.openStream()))
String line2
while ((line2 = in2.readLine()) != null) {
sb2.append(line2)
}
in1.close()
if(sb1.toString().equals(sb2.toString())){
handledUrl = link+"/"
}
}catch(Exception e){
handledUrl = link
}
return handledUrl
}
/**
* 处理链接的相对路径
* @param link 相对路径或绝对路径
* @return 绝对路径
*/
private String urlHandler(String link) {
if (link == null)
return null
link = link.trim()
if (link.toLowerCase().startsWith("http://")
|| link.toLowerCase().startsWith("https://")) {
return link
}
String pare = url.trim()
if (!link.startsWith("/")) {
if (pare.endsWith("/")) {
return pare + link
}
if (url.lastIndexOf("/") == url.indexOf("//") + 1 || url.lastIndexOf("/") == url.indexOf("//") + 2) {
return pare + "/" + link
} else {
int lastSeparatorIndex = url.lastIndexOf("/")
return url.substring(0, lastSeparatorIndex + 1) + link
}
}else{
if (url.lastIndexOf("/") == url.indexOf("//") + 1 || url.lastIndexOf("/") == url.indexOf("//") + 2) {
return pare + link
}else{
return url.substring(0,url.indexOf("/", url.indexOf("//")+3)) + link
}
}
}
public List<String>getAnchorTagUrls() {
if (webSource == null) {
System.out.println("没有网页源代码")
return null
}
ArrayList<String>list = new ArrayList<String>()
int index = 0
while (index != -1) {
index = webSource.toLowerCase().indexOf("<a ", index)
if (index != -1) {
int end = webSource.indexOf(">", index)
String str = webSource.substring(index, end == -1 ? webSource
.length() : end)
str = str.replaceAll("\\s*=\\s*", "=")
if (str.toLowerCase().matches("^<a.*href\\s*=\\s*[\'|\"]?.*")) {// "^<a\\s+\\w*\\s*href\\s*=\\s*[\'|\"]?.*"
int hrefIndex = str.toLowerCase().indexOf("href=")
int leadingQuotesIndex = -1
if ((leadingQuotesIndex = str.indexOf("\"", hrefIndex
+ "href=".length())) != -1) { // 形如<a
// href=".....">
int TrailingQuotesIndex = str.indexOf("\"",
leadingQuotesIndex + 1)
TrailingQuotesIndex = TrailingQuotesIndex == -1 ? str
.length() : TrailingQuotesIndex
str = str.substring(leadingQuotesIndex + 1,
TrailingQuotesIndex)
str = urlHandler(str)
list.add(str)
System.out.println(str)
index += "<a ".length()
continue
}
if ((leadingQuotesIndex = str.indexOf("\'", hrefIndex
+ "href=".length())) != -1) { // 形如<a
// href='.....'>
int TrailingQuotesIndex = str.indexOf("\'",
leadingQuotesIndex + 1)
TrailingQuotesIndex = TrailingQuotesIndex == -1 ? str
.length() : TrailingQuotesIndex
str = str.substring(leadingQuotesIndex + 1,
TrailingQuotesIndex)
str = urlHandler(str)
System.out.println(str)
list.add(str)
index += "<a ".length()
continue
}
int whitespaceIndex = str.indexOf(" ", hrefIndex
+ "href=".length())// 形如<a href=
// http://www.baidu.com >
whitespaceIndex = whitespaceIndex == -1 ? str.length()
: whitespaceIndex
str = str.substring(hrefIndex + "href=".length(),
whitespaceIndex)
str = urlHandler(str)
list.add(str)
System.out.println(str)
}
index += "<a ".length()
}
}
return list
}
public static void main(String[] args) throws Exception {
GetLinks gl = new GetLinks("http://www.baidu.com")
List<String>list = gl.getAnchorTagUrls()
for(String str:list) {
System.out.println(str)
}
}
}
java中确定url指向最终是靠页面跳转实现的。一、跳转到新页面,并且是在新窗口中打开页面:
function openHtml()
{
//do someghing here...
window.open("xxxx.html")
}
window是一个javascript对象,可以用它的open方法,需要注意的是,如果这个页面不是一相相对路径,那么要加“http://”,比如:
function openHtml()
{
window.open("http://www.baidu.com")
}
二、在本页面窗口中跳转:
function totest2()
{
window.location.assign("test2.html")
}
如果直接使用location.assgin()也可以,但是window.location.assign()更合理一些,当前窗口的location对象的assign()方法。
另外,location对象还有一个方法replace()也可以做页面跳转,它跟assign()方法的区别在于:
replace() 方法不会在 History 对象中生成一个新的纪录。当使用该方法时,新的 URL 将覆盖 History 对象中的当前纪录。
解析url,本想用正则表达式处理,但正则表达式速度较慢。用split处理一下就可以了。
package RequestPackage
import java.util.HashMap
import java.util.Map
public class CRequest {
/**
* 解析出url请求的路径,包括页面
* @param strURL url地址
* @return url路径
*/
public static String UrlPage(String strURL)
{
String strPage=null
String[] arrSplit=null
strURL=strURL.trim().toLowerCase()
arrSplit=strURL.split("[?]")
if(strURL.length()>0)
{
if(arrSplit.length>1)
{
if(arrSplit[0]!=null)
{
strPage=arrSplit[0]
}
}
}
return strPage
}
/**
* 去掉url中的路径,留下请求参数部分
* @param strURL url地址
* @return url请求参数部分
*/
private static String TruncateUrlPage(String strURL)
{
String strAllParam=null
String[] arrSplit=null
strURL=strURL.trim().toLowerCase()
arrSplit=strURL.split("[?]")
if(strURL.length()>1)
{
if(arrSplit.length>1)
{
if(arrSplit[1]!=null)
{
strAllParam=arrSplit[1]
}
}
}
return strAllParam
}
/**
* 解析出url参数中的键值对
* 如 "index.jsp?Action=del&id=123",解析出Action:del,id:123存入map中
* @param URL url地址
* @return url请求参数部分
*/
public static Map<String, String>URLRequest(String URL)
{
Map<String, String>mapRequest = new HashMap<String, String>()
String[] arrSplit=null
String strUrlParam=TruncateUrlPage(URL)
if(strUrlParam==null)
{
return mapRequest
}
//每个键值为一组 www.2cto.com
arrSplit=strUrlParam.split("[&]")
for(String strSplit:arrSplit)
{
String[] arrSplitEqual=null
arrSplitEqual= strSplit.split("[=]")
//解析出键值
if(arrSplitEqual.length>1)
{
//正确解析
mapRequest.put(arrSplitEqual[0], arrSplitEqual[1])
}
else
{
if(arrSplitEqual[0]!="")
{
//只有参数没有值,不加入
mapRequest.put(arrSplitEqual[0], "")
}
}
}
return mapRequest
}
}
测试类
package RequestPackage
import java.util.Map
public class TestCRequest {
/**用于测试CRequest类
* @param args
*/
public static void main(String[] args) {
// 请求url
String str = "index.jsp?Action=del&id=123&sort="
//url页面路径
System.out.println(CRequest.UrlPage(str))
//url参数键值对
String strRequestKeyAndValues=""
Map<String, String>mapRequest = CRequest.URLRequest(str)
for(String strRequestKey: mapRequest.keySet()) {
String strRequestValue=mapRequest.get(strRequestKey)
strRequestKeyAndValues+="key:"+strRequestKey+",Value:"+strRequestValue+""
}
System.out.println(strRequestKeyAndValues)
//获取无效键时,输出null
System.out.println(mapRequest.get("page"))
}
}