回复内容:
http:// github测试数据/zhu327/rss 既然你也用python就直接看代码吧
爬这里 http:// service.weibo测试数据/widge t/widget_blog.php?uid= {uid} 替换uid,无需登录,不会被挡
爬手机端
http:// weibo.cn
可以参考下面的代码,来自极客学院,侵删
#-*-coding:utf8-*-
import smtplib
from email.mime.text import MIMEText
import requests
from lxml import etree
import os
import time
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
class mailhelper(object):
'''
这个类实现发送邮件的功能
'''
def __init__(self):
self.mail_host="smtp.xxxx测试数据" #设置服务器
self.mail_user="xxxx" #用户名
self.mail_pass="xxxx" #密码
self.mail_postfix="xxxx测试数据" #发件箱的后缀
def send_mail(self,to_list,sub,content):
me="xxoohelper"+" "
msg = MIMEText(content,_subtype='plain',_charset='utf-8')
msg['Subject'] = sub
msg['From'] = me
msg['To'] = ";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(self.mail_host)
server.login(self.mail_user,self.mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
class xxoohelper(object):
'''
这个类实现将爬取微博第一条内容
'''
def __init__(self):
self.url = 'http://weibo.cn/u/xxxxxxx' #请输入准备抓取的微博地址
self.url_login = 'https://login.weibo.cn/login/'
self.new_url = self.url_login
def getSource(self):
html = requests.get(self.url).content
return html
def getData(self,html):
selector = etree.HTML(html)
password = selector.xpath('//input[@type="password"]/@name')[0]
vk = selector.xpath('//input[@name="vk"]/@value')[0]
action = selector.xpath('//form[@method="post"]/@action')[0]
self.new_url = self.url_login + action
data = {
'mobile' : 'xxxxx@xxx测试数据',
password : 'xxxxxx',
'remember' : 'on',
'backURL' : 'http://weibo.cn/u/xxxxxx', #此处请修改为微博地址
'backTitle' : u'微博',
'tryCount' : '',
'vk' : vk,
'submit' : u'登录'
}
return data
def getContent(self,data):
newhtml = requests.post(self.new_url,data=data).content
new_selector = etree.HTML(newhtml)
content = new_selector.xpath('//span[@class="ctt"]')
newcontent = unicode(content[2].xpath('string(.)')).replace('http://','')
sendtime = new_selector.xpath('//span[@class="ct"]/text()')[0]
sendtext = newcontent + sendtime
return sendtext
def tosave(self,text):
f= open('weibo.txt','a')
f.write(text + '\n')
f.close()
def tocheck(self,data):
if not os.path.exists('weibo.txt'):
return True
else:
f = open('weibo.txt', 'r')
existweibo = f.readlines()
if data + '\n' in existweibo:
return False
else:
return True
if __name__ == '__main__':
mailto_list=['xxxxx@qq测试数据'] #此处填写接收邮件的邮箱
helper = xxoohelper()
while True:
source = helper.getSource()
data = helper.getData(source)
content = helper.getContent(data)
if helper.tocheck(content):
if mailhelper().send_mail(mailto_list,u"女神更新啦",content):
print u"发送成功"
else:
print u"发送失败"
helper.tosave(content)
print content
else:
print u'pass'
time.sleep(30)
据说爬手机版会有奇效。
我以前爬过,不知道现在可行不爬他的移动端页面,当时限制比网页端少。
爬虫程序部署在google app engine多个节点上跑 新浪有开发者平台,有专门的API接口,用爬虫会被屏蔽
查看更多关于python抓取新浪微博,求教!!?的详细内容...
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://haodehen.cn/did89761