本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考,具体如下:
梦想还是要有的,万一实现了呢?我相信经常买双色球的朋友和我都会有一个疑问,就是往期双色球的开奖结果是什么?我钟意的这一注双色球在往期是否开过一等奖,如果开过的话,基本上可以放弃这一注了,因为历史上应该没有出现过两期双色球开奖完全一致的吧?那么往期的开奖结果是什么呢?我自己用java写了一个简易的类,爬取所有双色球开奖结果,本来想开发安卓版本的,由于ui等需要时间准备,有缘再开发吧。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.io.inputstream; import java.io.inputstreamreader; import java.net.httpurlconnection; import java.net.url; import java.util.regex.matcher; import java.util.regex.pattern; import java.util.zip.gzipinputstream; public class allballs { private static stringbuffer mstringbuffer; public static void main(string[] args) { system.out.println( "正在获取..." ); mstringbuffer = new stringbuffer(); string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_" ; string baseurlsuffix = ".html" ; string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html" ; string pagecountcontent = gethtmlstring(homeurl); int pagecount = getpagecount(pagecountcontent); if (pagecount > 0 ) { for ( int i = 1 ; i <= pagecount; i++) { string url = baseurlprefix + i + baseurlsuffix; string pagecontent = gethtmlstring(url); if (pagecontent != null && !pagecontent.equals( "" )) { getonetermcontent(pagecontent); } else { system.out.println( "第" + i + "页丢失" ); } try { thread.sleep( 1200 ); } catch (exception e) { // todo: handle exception } } file file = new file( "双色球.txt" ); if (file.exists()) { file.delete(); } try { filewriter writer = new filewriter(file); bufferedwriter bufferedwriter = new bufferedwriter(writer); bufferedwriter.write(mstringbuffer.tostring()); bufferedwriter.close(); writer.close(); } catch (ioexception e) { // todo auto-generated catch block e.printstacktrace(); } //bufferedwriter writer = new bufferedwriter(new outputs) } else { system.out.println( "结果页数为0" ); } system.out.println( "完成!" ); } /** * 获取总页数 * @param result */ private static int getpagecount(string result) { string regex = "\\d+\">末页" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(result); string[] splits = null ; while (matcher.find()) { string content = matcher.group(); splits = content.split( "\"" ); break ; } if (splits != null && splits.length == 2 ) { string countstring = splits[ 0 ]; if (countstring != null && !countstring.equals( "" )) { return integer.parseint(countstring); } } return 0 ; } /** * 获取网页源码 * @return */ private static string gethtmlstring(string targeturl) { string content = null ; httpurlconnection connection = null ; try { url url = new url(targeturl); connection = (httpurlconnection) url.openconnection(); connection.setrequestmethod( "post" ); connection.setrequestproperty( "user-agent" , "mozilla/4.0 (compatible; msie 7.0; windows 7)" ); connection.setrequestproperty( "accept" , "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*" ); connection.setrequestproperty( "accept-language" , "zh-cn" ); connection.setrequestproperty( "ua-cpu" , "x86" ); //为什么没有deflate呢 connection.setrequestproperty( "accept-encoding" , "gzip" ); connection.setrequestproperty( "content-type" , "text/html" ); //keep-alive,有什么用呢,你不是在访问网站,你是在采集。嘿嘿。减轻别人的压力,也是减轻自己。 connection.setrequestproperty( "connection" , "close" ); //不要用cache,用了也没有什么用,因为我们不会经常对一个链接频繁访问。(针对程序) connection.setusecaches( false ); connection.setconnecttimeout( 6 * 1000 ); connection.setreadtimeout( 6 * 1000 ); connection.setdooutput( true ); connection.setdoinput( true ); connection.setrequestproperty( "charset" , "utf-8" ); connection.connect(); if ( 200 == connection.getresponsecode()) { inputstream inputstream = null ; if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals( "" )) { string encode = connection.getcontentencoding().tolowercase(); if (encode != null && !encode.equals( "" ) && encode.indexof( "gzip" ) >= 0 ) { inputstream = new gzipinputstream(connection.getinputstream()); } } if ( null == inputstream) { inputstream = connection.getinputstream(); } bufferedreader reader = new bufferedreader( new inputstreamreader(inputstream, "utf-8" )); stringbuilder builder = new stringbuilder(); string line = null ; while ((line = reader.readline()) != null ) { builder.append(line).append( "\n" ); } content = builder.tostring(); } } catch (exception e) { e.printstacktrace(); } finally { if (connection != null ) { connection.disconnect(); } } return content; } private static void getonetermcontent(string pagecontent) { string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(pagecontent); while (matcher.find()) { string onetermcontent = matcher.group(); getonetermnumbers(onetermcontent); } } private static void getonetermnumbers(string onetermcontent) { string regex = ">\\d+<" ; pattern pattern = pattern.compile(regex); matcher matcher = pattern.matcher(onetermcontent); while (matcher.find()) { string content = matcher.group(); string ballnumber = content.substring( 1 , content.length()- 1 ); mstringbuffer.append(ballnumber).append( " " ); } mstringbuffer.append( "\r\n" ); } } |
运行结果:
希望本文所述对大家java程序设计有所帮助。
原文链接:https://blog.csdn.net/ithouse/article/details/50908296
查看更多关于Java实现爬取往期所有双色球开奖结果功能示例的详细内容...