Java实现爬取往期所有双色球开奖结果功能示例

本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考，具体如下：

梦想还是要有的，万一实现了呢？我相信经常买双色球的朋友和我都会有一个疑问，就是往期双色球的开奖结果是什么？我钟意的这一注双色球在往期是否开过一等奖，如果开过的话，基本上可以放弃这一注了，因为历史上应该没有出现过两期双色球开奖完全一致的吧？那么往期的开奖结果是什么呢？我自己用java写了一个简易的类，爬取所有双色球开奖结果，本来想开发安卓版本的，由于ui等需要时间准备，有缘再开发吧。

								
									 import   java.io.bufferedreader; 

									 import   java.io.bufferedwriter; 

									 import   java.io.file; 

									 import   java.io.filewriter; 

									 import   java.io.ioexception; 

									 import   java.io.inputstream; 

									 import   java.io.inputstreamreader; 

									 import   java.net.httpurlconnection; 

									 import   java.net.url; 

									 import   java.util.regex.matcher; 

									 import   java.util.regex.pattern; 

									 import   java.util.zip.gzipinputstream; 

									 public   class   allballs { 

									    private   static   stringbuffer mstringbuffer; 

									    public   static   void   main(string[] args) { 

									     system.out.println(  "正在获取..."  ); 

									     mstringbuffer =   new   stringbuffer(); 

									     string baseurlprefix =   "http://kaijiang.zhcw测试数据/zhcw/html/ssq/list_"  ; 

									     string baseurlsuffix =   ".html"  ; 

									     string homeurl =   "http://kaijiang.zhcw测试数据/zhcw/html/ssq/list_1.html"  ; 

									     string pagecountcontent = gethtmlstring(homeurl); 

									     int   pagecount = getpagecount(pagecountcontent); 

									     if   (pagecount >   0  ) { 

									      for   (  int   i =   1  ; i <= pagecount; i++) { 

									       string url = baseurlprefix + i + baseurlsuffix; 

									       string pagecontent = gethtmlstring(url); 

									       if   (pagecontent !=   null   && !pagecontent.equals(  ""  )) { 

									        getonetermcontent(pagecontent); 

									       }   else   { 

									        system.out.println(  "第"   + i +   "页丢失"  ); 

									       } 

									       try   { 

									        thread.sleep(  1200  ); 

									       }   catch   (exception e) { 

									        // todo: handle exception 

									       } 

									      } 

									      file file =   new   file(  "双色球.txt"  ); 

									      if   (file.exists()) { 

									       file.delete(); 

									      } 

									      try   { 

									       filewriter writer =   new   filewriter(file); 

									       bufferedwriter bufferedwriter =   new   bufferedwriter(writer); 

									       bufferedwriter.write(mstringbuffer.tostring()); 

									       bufferedwriter.close(); 

									       writer.close(); 

									      }   catch   (ioexception e) { 

									       // todo auto-generated catch block 

									       e.printstacktrace(); 

									      } 

									      //bufferedwriter writer = new bufferedwriter(new outputs) 

									     }   else   { 

									      system.out.println(  "结果页数为0"  ); 

									     } 

									     system.out.println(  "完成！"  ); 

									    } 

									    /** 

									     * 获取总页数 

									     * @param result 

									     */ 

									    private   static   int   getpagecount(string result) { 

									     string regex =   "\\d+\">末页"  ; 

									     pattern pattern = pattern测试数据pile(regex); 

									     matcher matcher = pattern.matcher(result); 

									     string[] splits =   null  ; 

									     while   (matcher.find()) { 

									      string content = matcher.group(); 

									      splits = content.split(  "\""  ); 

									      break  ; 

									     } 

									     if   (splits !=   null   && splits.length ==   2  ) { 

									      string countstring = splits[  0  ]; 

									      if   (countstring !=   null   && !countstring.equals(  ""  )) { 

									       return   integer.parseint(countstring); 

									      } 

									     } 

									     return   0  ; 

									    } 

									     /** 

									     * 获取网页源码 

									     * @return 

									     */ 

									    private   static   string gethtmlstring(string targeturl) { 

									     string content =   null  ; 

									     httpurlconnection connection =   null  ; 

									     try   { 

									      url url =   new   url(targeturl); 

									      connection = (httpurlconnection) url.openconnection(); 

									      connection.setrequestmethod(  "post"  ); 

									      connection.setrequestproperty(  "user-agent"  ,   "mozilla/4.0 (compatible; msie 7.0; windows 7)"  ); 

									      connection.setrequestproperty(  "accept"  ,   "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*"  ); 

									      connection.setrequestproperty(  "accept-language"  ,   "zh-cn"  ); 

									      connection.setrequestproperty(  "ua-cpu"  ,   "x86"  ); 

									      //为什么没有deflate呢 

									      connection.setrequestproperty(  "accept-encoding"  ,   "gzip"  ); 

									      connection.setrequestproperty(  "content-type"  ,   "text/html"  ); 

									      //keep-alive，有什么用呢，你不是在访问网站，你是在采集。嘿嘿。减轻别人的压力，也是减轻自己。 

									      connection.setrequestproperty(  "connection"  ,   "close"  ); 

									      //不要用cache，用了也没有什么用，因为我们不会经常对一个链接频繁访问。（针对程序） 

									      connection.setusecaches(  false  ); 

									      connection.setconnecttimeout(  6   *   1000  ); 

									      connection.setreadtimeout(  6   *   1000  ); 

									      connection.setdooutput(  true  ); 

									      connection.setdoinput(  true  ); 

									      connection.setrequestproperty(  "charset"  ,   "utf-8"  ); 

									      connection.connect(); 

									      if   (  200   == connection.getresponsecode()) { 

									       inputstream inputstream =   null  ; 

									       if   (connection.getcontentencoding() !=   null   && !connection.getcontentencoding().equals(  ""  )) { 

									        string encode = connection.getcontentencoding().tolowercase(); 

									        if   (encode !=   null   && !encode.equals(  ""  ) && encode.indexof(  "gzip"  ) >=   0  ) { 

									         inputstream =   new   gzipinputstream(connection.getinputstream()); 

									        } 

									       } 

									       if   (  null   == inputstream) { 

									        inputstream = connection.getinputstream(); 

									       } 

									       bufferedreader reader =   new   bufferedreader(  new   inputstreamreader(inputstream,   "utf-8"  )); 

									       stringbuilder builder =   new   stringbuilder(); 

									       string line =   null  ; 

									       while   ((line = reader.readline()) !=   null  ) { 

									        builder.append(line).append(  "\n"  ); 

									       } 

									       content = builder.tostring(); 

									      } 

									     }   catch   (exception e) { 

									      e.printstacktrace(); 

									     }   finally   { 

									      if   (connection !=   null  ) { 

									       connection.disconnect(); 

									      } 

									     } 

									     return   content; 

									    } 

									    private   static   void   getonetermcontent(string pagecontent) { 

									     string regex =   "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>"  ; 

									     pattern pattern = pattern测试数据pile(regex); 

									     matcher matcher = pattern.matcher(pagecontent); 

									     while   (matcher.find()) { 

									      string onetermcontent = matcher.group(); 

									      getonetermnumbers(onetermcontent); 

									     } 

									    } 

									    private   static   void   getonetermnumbers(string onetermcontent) { 

									     string regex =   ">\\d+<"  ; 

									     pattern pattern = pattern测试数据pile(regex); 

									     matcher matcher = pattern.matcher(onetermcontent); 

									     while   (matcher.find()) { 

									      string content = matcher.group(); 

									      string ballnumber = content.substring(  1  , content.length()-  1  ); 

									      mstringbuffer.append(ballnumber).append(  " "  ); 

									     } 

									     mstringbuffer.append(  "\r\n"  ); 

									    } 

									 }

运行结果：

希望本文所述对大家java程序设计有所帮助。

原文链接：https://blog.csdn.net/ithouse/article/details/50908296

查看更多关于Java实现爬取往期所有双色球开奖结果功能示例的详细内容...

声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://haodehen.cn/did252728

更新时间：2023-07-28 阅读：138次