在一些需要经常更新页面数据的网站中,一般访问量不是很大的都直接发布的是带后台代码,每次访问都是有数据库交互的。但是一旦访问量增加了,那么这些服务器开销变成本就要考虑进来了,像一些文章,后台编辑后,文章内容存入数据库,如果1000人访问,如果还是每次取数据库,那这1000次的io访问就显得比较大了,一个好的方法就是,文章确定之后,做成静态页面,而这个做的方法由程序来做,就是递归遍历整个网站,将网站内容都访问一遍,然后生成这些页面的静态文本页面,在将这些页面发布,这样对浏览者而言,他看到的还是同一个地址,同一份文章,只是这份是静态的而言。这样就提升了网站的效率节约了资源;
下面附上一份C#遍历网站内容,然后生成内容页面代码:
private ArrayList htmlCreatedList = new ArrayList();
/// <summary>
/// 递归实现页面静态化功能
/// </summary>
/// <param name="urlString">要访问的页面链接地址</param>
public void SaveHtmlCode( string urlString)
{
if (htmlCreatedList.Contains(urlString))
{
return ;
}
string htmlCode = GetHtmlCodeFromUrl(urlString);
string htmlPath = urlString.ToPhysicalPath();
string direcHtmlPath = Path.GetDirectoryName(htmlPath);
if (!Directory.Exists(direcHtmlPath))
{
Directory.CreateDirectory(direcHtmlPath);
}
File.WriteAllText(htmlPath, htmlCode);
htmlCreatedList.Add(urlString);
var urlList = GetUrlLinkFromHtmlCode(htmlCode);
string urlTemp = string .Empty;
foreach ( string url in urlList)
{
urlTemp = url;
urlTemp = Regex.Replace(urlTemp, "href\\s*=\\s*" , "" );
urlTemp = urlTemp.Replace( "\"" , "" );
urlTemp = urlTemp.Replace( "\\" , "/" );
urlTemp = WebConfigInfo.UrlPrefix + urlTemp;
SaveHtmlCode(urlTemp);
}
}
/// <summary>
/// 通过HttpWebRequest页面链接的html代码
/// </summary>
/// <param name="urlString">页面链接地址</param>
/// <returns>页面链接对应的html代码</returns>
private string GetHtmlCodeFromUrl( string urlString)
{
HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString);
hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705" ;
hwRequest.Accept = "*/*" ;
hwRequest.KeepAlive = true ;
hwRequest.Headers.Add( "Accept-Language" , "zh-cn,en-us;q=0.5" );
HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse();
Stream streamResponse = hwResponse.GetResponseStream();
StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding( "utf-8" ));
string strHtml = readerOfStream.ReadToEnd();
readerOfStream.Close();
streamResponse.Close();
hwResponse.Close();
return strHtml;
}
///<summary>
///正则表达式匹配出html代码中的超链接
///</summary>
///<param name="htmlCode">要找出超链接的html代码</param>
///<returns></returns>
private IEnumerable< string > GetUrlLinkFromHtmlCode( string htmlCode)
{
string strRegex = "href\\s*=\\s*(?:[\"'](?<1>[^\"'.#:]*)[\"'])" ;
Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
MatchCollection ms = r.Matches(htmlCode);
IEnumerable< string > listUrl = from Match cc in ms select cc.ToString().Replace( "&" , "&" );
return listUrl.Distinct();
}
}
给string 扩展了一个方法。
public static string ToPhysicalPath( this string urlString)
{
System.Uri uri = new System.Uri(urlString);
string htmlPath = string .Format( "{0}\\Html\\{1}\\" , System.Web.HttpContext.Current.Request.PhysicalApplicationPath, uri.AbsolutePath);
string [] querys = uri.Query.Split( new char [] { '?' , '&' , '=' }, StringSplitOptions.RemoveEmptyEntries);
htmlPath += string .Join( string .Empty, querys);
htmlPath += querys.Length.Equals(0) ? "Index.html" : ".html" ;
htmlPath = htmlPath.Replace( "/" , "\\" );
htmlPath = htmlPath.Replace( "\\\\" , "\\" );
return htmlPath;
}
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者工作具有一定的参考学习价值,谢谢大家对服务器之家的支持。如果你想了解更多相关内容请查看下面相关链接
原文链接:https://blog.csdn.net/chenqiangdage/article/details/49821189
dy("nrwz");