import os import urllib
import requests #import wx import time
from fake_useragent import UserAgent from lxml import etree from urllib import request
ua = UserAgent() headers ={ ‘user-agent‘: ua.random } opener=urllib.request.build_opener() opener.addheaders=[(‘User-Agent‘,‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36‘)] for i in range(2, 7): url = "https://HdhCmsTestfb532测试数据/tupian/list-清纯唯美-%d.html"%i print(url)
#https://HdhCmsTestfb532测试数据/tupian/list-%E6%B8%85%E7%BA%AF%E5%94%AF%E7%BE%8E-3.html #url = "https://HdhCmsTest128nm测试数据/" response = requests.get(url,headers = headers) print(url) html = response.content.decode("utf-8") content = etree.HTML(html)
li_list = content.xpath(‘//ul[@class="clearfix"]/li‘) print(li_list) print(len(li_list)) for li in li_list: img_name = li.xpath(‘./a/@title‘)[0] print(img_name) img_url = li.xpath(‘./a/@data-original‘)[0] # print("url:" + img_url) print(img_name, img_url) styles = img_url.split(".") laststyle = styles[len(styles)-1] print(img_url) path = "imgs3" if not os.path.exists(path): os.mkdir(path)
filename = path + "/" + img_name + "."+laststyle
print(filename) urllib.request.install_opener(opener) urllib.request.urlretrieve(img_url, filename)
查看更多关于Python-demo(photo)的详细内容...