ASP.NET改变HTML标签

html-css021

ASP.NET改变HTML标签,第1张

using System.Text.RegularExpressions

/// <summary>

///去除HTML标记

/// </summary>

/// <paramname="NoHTML">包括HTML的源码 </param>

/// <returns>已经去除后的文字</returns>

public static stringNoHTML(stringHtmlstring)

{

//删除脚本

Htmlstring =Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase)

//删除HTML

Htmlstring =Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(quot|#34)","\"",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(amp|#38)","&",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(lt|#60)","<",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(gt|#62)",">",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(nbsp|#160)"," ",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(iexcl|#161)","\xa1",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(cent|#162)","\xa2",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(pound|#163)","\xa3",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring,@"&(copy|#169)","\xa9",RegexOptions.IgnoreCase)

Htmlstring =Regex.Replace(Htmlstring, @"(\d+)","",RegexOptions.IgnoreCase)

Htmlstring.Replace("<","")

Htmlstring.Replace(">","")

Htmlstring.Replace("\r\n","")

Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim()

returnHtmlstring

}

写一个静态方法

#region移除HTML标签

/// <summary>

///移除HTML标签

/// </summary>

/// <paramname="HTMLStr">HTMLStr</param>

public static string ParseTags(stringHTMLStr)

{

returnSystem.Text.RegularExpressions.Regex.Replace(HTMLStr, "<[^>]*>", "")

}

#endregion

#region取出文本中的图片地址

/// <summary>

///取出文本中的图片地址

/// </summary>

/// <paramname="HTMLStr">HTMLStr</param>

public static stringGetImgUrl(stringHTMLStr)

{

stringstr = string.Empty

stringsPattern = @"^<img\s+[^>]*>"

Regexr = newRegex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",

RegexOptions.Compiled)

Matchm =r.Match(HTMLStr.ToLower())

if(m.Success)

str =m.Result("${url}")

returnstr

}

#endregion

没有几种.

原理就是删掉正则匹配到的html标签

至于删除用repalce还是remove我觉得没那么重要了就..

html标签的正则,网上抄的不知道对不对:

"<(.[^>]*)>"

标签上加 runat="server" <img ID="xx" src=" " width="200" height="100" runat="server" >

后台this.xx.src=“src”试一下