01text
"""
变量:变化的量
1、变量值 value
2、变量的内存地址 id
3、变量的数据类型 type
"""
# import numpy as np
# arr = np.arange(10)
# print(type(arr))
# print(arr)
#
# l1 = [1, 2, 3, 4, 5]
# print(type(l1))
# print(l1)
# import requests
#
# header = {
# ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘
# }
#
# res = requests.get(‘https://HdhCmsTestbilibili测试数据/video/av68746541/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16‘,
# headers=header)
# res.encoding = res.apparent_encoding
# print(res.text)
# for i in range(0, 100, 25):
# print(i)
import re
print(re.findall(‘a.*?c‘, ‘a123c456dsdadac‘))
02模拟浏览器登陆
import requests
header = {
‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘
}
res = requests.get(‘https://dig.chouti测试数据/‘, headers=header)
data = res.text
print(data)
03爬豆瓣again
‘‘‘爬取豆瓣电影TOP250
第一页:
https://movie.douban测试数据/top250?start=0&filter=
第二页:
https://movie.douban测试数据/top250?start=25&filter=
requests:请求库
re:正则表达式
‘‘‘
import requests
import re
# 拼接电影爬去地址url
for line in range(0, 100, 25):
url = f‘https://movie.douban测试数据/top250?start={line}&filter=‘
response = requests.get(url)
# data = response.text
# 3.解析并提取数据
data = re.findall(
‘<div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘,
response.text, re.S) # re.S忽略换行
for d in data:
url, name, point, count = d
movie_data = ‘‘‘
电影名称: %s
电影地址: %s
电影评分: %s
评价人数: %s
\n
‘‘‘ % (name, url, point, count)
print(movie_data)
with open(‘豆瓣.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
f.write(movie_data)
# print(url)
# 往拼接好的ulr地址发送请求获取数据
# response = requests.get(url)
# # print(response.text) # 获取文本数据
# # 3.解析并提取数据
# # 电影名称、电影地址、电影评分、评价人数
# # re.findall(‘匹配文本的规则‘, ‘匹配的文本‘, ‘匹配模式‘) # 解析提取文本数据中 想要的数据
# # .*?: 过滤不想要的数据,直到想要的数据出现
# # (.*?): 提取想要的数据
# # 匹配规则
# # <div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>
# data = re.findall(
# ‘<div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘,
# response.text, re.S) # re.S忽略换行
# # print(data)
# for d in data:
# # print(d)
#
# url, name, point, count = d
#
# movie_data = ‘‘‘
# 电影名称: %s
# 电影地址: %s
# 电影评分: %s
# 评价人数: %s
# \n
# ‘‘‘ % (name, url, point, count)
#
# print(movie_data)
#
# # 4.保存数据
# # a: append
# with open(‘豆瓣.txt‘, ‘a‘, encoding=‘utf-8‘) as f:
# f.write(movie_data)
04友好爬豆瓣
import requests
# import re
from bs4 import BeautifulSoup
from openpyxl import Workbook
import time
wb = Workbook()
sheet = wb.active
count = 1
# 拼接电影爬去地址url
for line in range(0, 100, 25):
url = f‘https://movie.douban测试数据/top250?start={line}&filter=‘
# 向对方服务器发送请求,获取响应数据
response = requests.get(url)
# 拿到文本数据
data = response.text
# 通过‘html.parser‘解析器解析数据
soup = BeautifulSoup(data, ‘html.parser‘)
# 找到类名为:grid_view的ol标签
ol = soup.find(name="ol", attrs={‘class‘: ‘grid_view‘})
# 拿到中间的25个li标签,存入列表中
li_list = ol.find_all(name=‘li‘)
sheet.title = ‘好评电影‘
sheet[‘A1‘].value = ‘序号‘
sheet[‘B1‘].value = ‘电影名称‘
sheet[‘C1‘].value = ‘电影评分‘
sheet[‘D1‘].value = ‘电影链接‘
sheet[‘E1‘].value = ‘电影图片‘
for li in li_list:
# 找到类名为:title的span标签
name = li.find(name=‘span‘, attrs={‘class‘: ‘title‘})
url = li.find(name=‘a‘)
rat = li.find(name=‘span‘, attrs={‘class‘: ‘rating_num‘})
img = li.find(name=‘img‘)
count = count + 1
sheet[‘A%s‘ % (count)].value = count - 1
sheet[‘B%s‘ % (count)].value = name.text
sheet[‘C%s‘ % (count)].value = rat.text
sheet[‘D%s‘ % (count)].value = url[‘href‘]
sheet[‘E%s‘ % (count)].value = img[‘src‘]
time.sleep(1)
wb.save(‘好评电影.xlsx‘)
05金山词霸翻译
import requests
import json
def main(key=""):
header = {
‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘
}
url = ‘http://fy.iciba测试数据/ajax.php?a=fy‘
data = {
‘f‘: ‘auto‘,
‘t‘: ‘auto‘,
‘w‘: key
}
# 向对方服务器发送post请求,带上headers,数据data
res = requests.post(url=url, headers=header, data=data)
data = res.text
# 将二进制数据反序列化
data_list = json.loads(data)
# print(data_list)
try:
val = data_list[‘content‘][‘word_mean‘]
except:
val = data_list[‘content‘][‘out‘]
return val
if __name__ == ‘__main__‘:
# 接收用户输入
key = input(‘请输入要翻译的词语:‘).strip()
# 判断如果输入为空将要执行的代码
if not key:
print(‘输入为空‘)
else:
data = main(key=key)
print(data)
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://haodehen.cn/did171104