24
2013
09

.Net生成站点sitemap供百度抓取的类和使用


sitemap介绍


百度的sitemap说明地址:http://zhanzhang.baidu.com/wiki/93#01

SiteMap作用:利于百度搜索

举例说明:

http://award.kinpan.com/AwardsIntroduce/AwardNews/20130609172732828125097a79b4e4a

http://award.kinpan.com/AwardsIntroduce/AwardNews/20130624102726828125093ba40f210

http://award.kinpan.com/AwardsIntroduce/AwardNews/201306241038078125000d72ba2d9fe

http://award.kinpan.com/AwardsIntroduce/AwardNews/20130624104148890625018d09da3eb

……………………………………………………………………………………………………

像上面这类网址,如果网站某列表页面有出现链接,百度会抓取,但是一般的列表页面只出现最新的几个,百度自动搜索网站上的页面,只能搜索到:http://award.kinpan.com/AwardsIntroduce/AwardNews,大部分带有id的详细页面还是不会出现,所以,我们生成所有详细内容页面的地址去给百度抓取,那么所有这些页面的内容就能被百度收录,自然在搜索时就能搜索出来了。


有多个sitemap,按上述格式重复<sitemap></sitemap>之间的片断,列明所有sitemap地址,向站长平台进行提交。


如果网址超过50000条或文件大小超过10M,就要分隔成KinpanUrl1.xmlKinpanUrl2.xml……

那么KinpanUrl.xml就是存放KinpanUrl1.xmlKinpanUrl2.xml……的路径,这个文件很小。

在百度提交网址就是提交KinpanUrl.xml,百度会根据KinpanUrl.xml里面的地址去获取KinpanUrl1.xmlKinpanUrl2.xml……里面的网址。

 

我这里做了另外的优化,把所有子站点的生成sitemap的链接放到一起,简化生成步骤,还加入了10分钟只能生成一次的限制,防止恶意生成,因为生成sitemap就是要把整个数据库的内容对应的要显示的页面都查找出来告诉百度,数据库查询比较耗资源。

//生成sitemap的类

    /// <summary>
    /// 创建站点sitemap的类
    /// </summary>
    public class CreateSiteMapTxt
    {
        public CreateSiteMapTxt() { }
        private static CreateSiteMapTxt _Instance = null;
        /// <summary>
        /// 静态对象实例
        /// </summary>
        public static CreateSiteMapTxt Instance
        {
            get
            {
                if (_Instance == null) _Instance = new CreateSiteMapTxt();
                return _Instance;
            }
        }
        #region 创建站点的sitemap网址导航文件
        /// <summary>
        /// 创建站点的sitemap网址导航文件
        /// </summary>
        /// <returns>
        /// 成功返回空,失败返回错误信息
        /// </returns>
        public string CreateTxt()
        {
            try
            {
                ConfigSiteUrl site = GetSiteByDefaultControl();
                switch (site)
                {
                    case ConfigSiteUrl.KinpanUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            list.Add("/InfoNews");
                            list.Add("/InfoNews?typeID=1");
                            list.Add("/InfoNews?typeID=2");
                            NewsTopicBLL bll = new NewsTopicBLL();
                            List<string> listtemp = bll.GetIDs(1);
                            foreach (string id in listtemp)
                            {
                                list.Add("/NewsDescription/Index/" + id);
                            }
                            bll.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                    case ConfigSiteUrl.KinpanAwardUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            int sessionAward = new TangArt.GoldOnPubPlatform.App.Models.XmlHandler().GetXmlSession("~/Upload/Session.xml");
                            list.Add("/AwardsIntroduce/");//本届金盘奖页面
                            for (int i = 1; i < sessionAward; i++)
                            {
                                list.Add("/Awardsreview/Index/" + i);//历届金盘奖页面
                            }
                            list.Add("/Enroll");//评选申报表
                            list.Add("/audienceReg");//观众报名
                            list.Add("/CurrentIntroduce/");//关于金盘奖
                            NewsTopicBLL bll = new NewsTopicBLL();
                            List<string> listtemp = bll.GetIDs(2);//金盘奖新闻
                            foreach (string id in listtemp)
                            {
                                list.Add("/AwardsIntroduce/AwardNews/" + id);
                            }
                            bll.Dispose();
                            GuestinformationBLL GuestInfo_Bll = new BLL.GuestinformationBLL();
                            var guest_info = GuestInfo_Bll.GetServalRecoard(0, 1, sessionAward.ToString());//本届嘉宾
                            foreach (var item in guest_info)
                            {
                                list.Add("/Guest/Index/" + item.ID);
                            }
                            var ActivityLive = GuestInfo_Bll.GetServalRecoard(0, 2, (sessionAward - 1).ToString());//11表示获取11条数据,2表示活动现场,session表示当前届数
                            foreach (var item in ActivityLive)
                            {
                                list.Add("/Guest/Index/" + item.ID);
                            }
                            GuestInfo_Bll.Dispose();
                            AwardSubClasicBLL AC_BLL = new AwardSubClasicBLL();
                            var SubClasic = AC_BLL.GetByRefAwardClassMainID(sessionAward, 0);
                            for (int i = 0; i < SubClasic.Count; i++)
                            {
                                list.Add("/WebVote/Index/" + (i + 1));
                            }
                            AC_BLL.Dispose();
                            ActivityFlowClassificationBLL activity_bll = new BLL.ActivityFlowClassificationBLL();
                            var AwardWeek = activity_bll.GetAwardWeek(12, sessionAward);//12表示获取活动周新闻,session表示届数
                            foreach (var item in AwardWeek)
                            {
                                list.Add("/AwardsIntroduce/AwardDescription/" + item.ID);
                            }
                            activity_bll.Dispose();
                            kinpanAwardProjectBLL A_BLL = new kinpanAwardProjectBLL();
                            var kinpanAwardProjects = A_BLL.GetAll();
                            foreach (var item in kinpanAwardProjects)
                            {
                                list.Add("/WebVoteDetails/Index/" + item.ID);//所有金盘奖项目
                            }
                            A_BLL.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                    case ConfigSiteUrl.TopicUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            list.Add("/TopicList");
                            list.Add("/TopicList?hotOrNew=1");
                            TopicInformationBLL iTopicInformationBll = new TopicInformationBLL();
                            var listtemp = iTopicInformationBll.GetAll();
                            foreach (var item in listtemp)
                            {
                                list.Add("/TopicDetail/Index/" + item.ID);
                            }
                            iTopicInformationBll.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                    case ConfigSiteUrl.ExhibitionUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            list.Add("/Aboutus?id=1&classID=2013041714244184765625a6ac8f1f3");//展会介绍
                            list.Add("/AboutUs?id=1&classID=2013041714245473828129b784c41ac");//关于我们
                            list.Add("/ExhibitorsReg?id=2&classID=201304171425249550781926eb9025e");//参展商登记
                            list.Add("/ExhibiWeChoose?id=2&classID=2013041714251700683594951e43c45");//为什么选择我们
                            list.Add("/ActivitiesRow?id=2&classID=201304171425317929687756f9fd18e");//活动安排
                            list.Add("/ExhibiWeChoose?id=2&classID=2013041714253846191409b4a796120");//展位图
                            list.Add("/ExhibiWeChoose?id=2&classID=20130417142545844726507e95c8997");//收费标准
                            list.Add("/ExhibiWeChoose?id=2&classID=201304171425531689453bac4bc46ee");//广告赞助
                            list.Add("/ExhibitionManual?id=2&classID=201304171425598222656df75fa0cef");//参展商手册
                            list.Add("/ExhibiReg?id=3&classID=201304171426340244140e4260ea374");//参观登记
                            list.Add("/ExhibitionActivity?id=3&classID=201304171426268144531394568690f");//展会活动
                            list.Add("/hotelService?id=3&classID=20130417142648191406298eae94aa0");//酒店交通
                            list.Add("/hotelService?id=3&classID=2013041714264146289066ff9c76a91");//现场服务
                            list.Add("/ExhibitionDownLoad?id=5");//下载专区
                            list.Add("/ContactUs?id=7&classID=2013061816191867506073b036b8afc");//联系我们
                            list.Add("/NewsDynamic?id=4");//新闻媒体
                            list.Add("/NewsDynamic?id=4&newsType=1&classID=2013041714271025195311627d7a0f6");//展会动态
                            list.Add("/NewsDynamic?id=4&newsType=2&classID=201304171427171025390e6c540a6fe");//行业新闻
                            list.Add("/NewsMedia?id=4&classID=201304171427242744140985b276cbe");//合作媒体
                            ExhibitionNewsBLL exh_newsBll = new ExhibitionNewsBLL();
                            var listtemp = exh_newsBll.GetAll();//展会的新闻
                            foreach (var item in listtemp)
                            {
                                list.Add("/Exhibition/ExhibitionNewsDescription/" + item.ID);
                            }
                            exh_newsBll.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                    case ConfigSiteUrl.MagazineUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            list.Add("/Magazine/Magazine/sdlp/");//时代楼盘
                            list.Add("/Magazine/MagazineReView/sdlp/");
                            list.Add("/Magazine/MagazineReaderBack/sdlp/");
                            list.Add("/Magazine/MagazineActivity/sdlp/");
                            list.Add("/Magazine/Magazine/sdkj/");//时代空间
                            list.Add("/Magazine/MagazineReView/sdkj/");
                            list.Add("/Magazine/MagazineReaderBack/sdkj/");
                            list.Add("/Magazine/MagazineActivity/sdkj/");
                            list.Add("/Magazine/Magazine/chyue/");//超越
                            list.Add("/Magazine/MagazineReView/chyue/");
                            list.Add("/Magazine/MagazineReaderBack/chyue/");
                            list.Add("/Magazine/MagazineActivity/chyue/");
                            MagazineBookInfoBLL magazine_bll = new BLL.MagazineBookInfoBLL();
                            var sdlp = magazine_bll.GetIDsByTag("sdlp");
                            foreach (var item in sdlp)
                            {
                                list.Add("/Magazine/MagazineEachperiod/sdlp?id=" + item);
                            }
                            var sdkj = magazine_bll.GetIDsByTag("sdkj");
                            foreach (var item in sdkj)
                            {
                                list.Add("/Magazine/MagazineEachperiod/sdkj?id=" + item);
                            }
                            var chyue = magazine_bll.GetIDsByTag("chyue");
                            foreach (var item in chyue)
                            {
                                list.Add("/Magazine/MagazineEachperiod/chyue?id=" + item);
                            }
                            magazine_bll.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                    case ConfigSiteUrl.LiveUrl:
                        {
                            #region 添加网址
                            List<string> list = new List<string>();
                            list.Add("/");//主页
                            list.Add("/livevideo");//视频直播
                            AwardLivePictureBLL AP_BLL = new BLL.AwardLivePictureBLL();
                            var Videos = AP_BLL.GetOrderByTime();//直播视频列表
                            foreach (var item in Videos)
                            {
                                list.Add("/LiveVideo/index/" + item.ID);
                            }
                            AP_BLL.Dispose();
                            createSiteXml(list, site);
                            #endregion
                        }
                        break;
                }
                return "";
            }
            catch (Exception ex)
            {
                // throw ex;
                return ex.Message;
            }
        }
        private void createSiteXml(List<string> list, ConfigSiteUrl site)
        {
            string domain = ConfigurationManager.AppSettings[site.ToString()];
            string SiteMapIndexPath = HttpContext.Current.Server.MapPath("/Upload/SiteMap/" + site.ToString() + ".xml");
            if (File.Exists(SiteMapIndexPath))
            {
                FileInfo F_sitemapindex = new FileInfo(SiteMapIndexPath);
                if (F_sitemapindex.LastWriteTime.AddMinutes(10) > DateTime.Now)
                {
                    throw new Exception(string.Format("上次生成时间:{0},两次生成时间至少相隔10分钟,请不要生成太频繁!", F_sitemapindex.LastWriteTime));
                }
            }
            int allcount = list.Count;
            XmlDocument doc_index = new XmlDocument();
            XmlDeclaration dec_index = doc_index.CreateXmlDeclaration("1.0", "utf-8", null);
            doc_index.AppendChild(dec_index);
            XmlElement sitemapindex = doc_index.CreateElement("sitemapindex");
            XmlDocument doc = null;
            XmlElement Root = null;
            int cur_mod = 0;
            const int sitecountPerFile = 46000;//大概是10M,每个文件最多50000个网址
            int j = 1;
            for (int i = 0; i < allcount; i++)//循环创建xml网址文件
            {
                cur_mod = (i + 1) % sitecountPerFile;//
                if (cur_mod == 1)//第一个,创建新的xml文件
                {
                    doc = new XmlDocument();
                    XmlDeclaration dec = doc.CreateXmlDeclaration("1.0", "utf-8", null);
                    doc.AppendChild(dec);
                    Root = doc.CreateElement("urlset");
                }
                XmlElement url = doc.CreateElement("url");//一个网址的节点
                XmlElement url_1_loc = doc.CreateElement("loc");
                url_1_loc.InnerText = domain + list[i];//页面地址
                XmlElement url_2_lastmod = doc.CreateElement("lastmod");
                url_2_lastmod.InnerText = DateTime.Now.ToString();//内容最后更新时间
                XmlElement url_3_changefreq = doc.CreateElement("changefreq");
                url_3_changefreq.InnerText = "daily";//可能更新的频率
                XmlElement url_4_priority = doc.CreateElement("priority");
                url_4_priority.InnerText = "1";//权重(0~1之间)
                url.AppendChild(url_1_loc);
                url.AppendChild(url_2_lastmod);
                url.AppendChild(url_3_changefreq);
                url.AppendChild(url_4_priority);
                Root.AppendChild(url);
                if (cur_mod == 0 || i == allcount - 1)//最后一个,保存xml网址文件
                {
                    doc.AppendChild(Root);
                    string SiteMapDir = HttpContext.Current.Server.MapPath("/Upload/SiteMap");
                    Common.Upload.CreateDir(SiteMapDir);
                    string sitemap_fileName = site.ToString() + j + ".xml";
                    string path = Path.Combine(SiteMapDir, sitemap_fileName);
                    doc.Save(path);//sitemap文件
                    XmlElement sitemap = doc_index.CreateElement("sitemap");
                    XmlElement loc_index = doc_index.CreateElement("loc");
                    loc_index.InnerText = domain + "/Upload/SiteMap/" + sitemap_fileName;//sitemap地址
                    XmlElement lastmod_index = doc_index.CreateElement("lastmod");
                    lastmod_index.InnerText = DateTime.Now.ToString();//内容最后更新时间
                    sitemap.AppendChild(loc_index);
                    sitemap.AppendChild(lastmod_index);
                    sitemapindex.AppendChild(sitemap);
                    j++;
                }
            }
            doc_index.AppendChild(sitemapindex);
            doc_index.Save(SiteMapIndexPath);//sitemap索引文件
        }
        #endregion
        #region 根据配置文件获取当前网站的子站域名
        /// <summary>
        /// 根据配置文件获取当前网站的子站域名
        /// </summary>
        /// <returns></returns>
        public static ConfigSiteUrl GetSiteByDefaultControl()
        {
            string DefaultControl = ConfigurationManager.AppSettings["DefaultControl"].ToLower();
            switch (DefaultControl)
            {
                case "index":
                    return ConfigSiteUrl.KinpanUrl;
                case "awards":
                    return ConfigSiteUrl.KinpanAwardUrl;
                case "exhibition":
                    return ConfigSiteUrl.ExhibitionUrl;
                case "magazine":
                    return ConfigSiteUrl.MagazineUrl;
                case "onlinepub":
                    return ConfigSiteUrl.TopicUrl;
                case "awardlivehome":
                    return ConfigSiteUrl.LiveUrl;
                default:
                    return ConfigSiteUrl.Default;
            }
        }
        #endregion


//调用,我这个是分站生成的,在web.config中的appsettings下面配置各子站点的网址:

    bool isright = false;
    string str = "";
    if (Request.UrlReferrer != null)
    {
        string host = Request.UrlReferrer.Host.ToLower();
        if (host=="localhost"||host.EndsWith("test.com"))//自己网页上链接过来的才生成,防止恶意生成。
        {
            isright = true;
        }
    }
    if (isright)
    {
        string Info = CreateSiteMapTxt.Instance.CreateTxt();
        if (Info == "")
        {
            str = "生成成功" + DateTime.Now.ToString() + "<br/><br/>";
            ConfigSiteUrl site = CreateSiteMapTxt.GetSiteByDefaultControl();
            str = str + "当前生成的sitemap文件:" + ConfigurationManager.AppSettings[site.ToString()] + "/Upload/SiteMap/" + site.ToString() + ".xml <br/><br/><br/>";
        }
        else
        {
            str = "失败:" + Info + DateTime.Now.ToString() + "<br/><br/>";
        }
    }
    else
    {
        str = "请点击下面的链接生成sitemap文件" + DateTime.Now.ToString() + "<br/><br/>"; 
    }
    List<ConfigSiteUrl> list = new List<ConfigSiteUrl>();
    list.Add(ConfigSiteUrl.ExhibitionUrl);
    list.Add(ConfigSiteUrl.KinpanAwardUrl);
    list.Add(ConfigSiteUrl.KinpanUrl);
    list.Add(ConfigSiteUrl.LiveUrl);
    list.Add(ConfigSiteUrl.TopicUrl);
    list.Add(ConfigSiteUrl.MagazineUrl);
    str = str + string.Format("<font color='red'>提示:</font><br/>");
    foreach (var site in list)
    {
        string siteurl = ConfigurationManager.AppSettings[site.ToString()];
        str = str + string.Format("网址:{0} <br/>sitemap文件地址:{1}   <a href='{0}/index/SiteMap'>点击生成</a><br/><br/>", siteurl, siteurl + "/Upload/SiteMap/" + site.ToString() + ".xml");
    }
    string thisurl = Request.Url.AbsolutePath;
    str = str +string.Format("<a href='{0}'>点击生成当前站点sitemap</a><br/><br/>",thisurl);






版权声明:
作者:真爱无限 出处:http://www.pukuimin.top 本文为博主原创文章版权归作者所有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文链接.
« 上一篇下一篇 »

相关文章:

评论列表:

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。