重构前的版本

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import requests
from moviepy.editor import *


def get_page(url_30280, url_30077, headers):
response1 = requests.get(url_30280, headers=headers).content
response2 = requests.get(url_30077, headers=headers).content
# print(response1,response2)
with open('B站视频1.mp4', 'wb') as f:
f.write(response1)
with open('B站视频2.mp4', 'wb') as f:
f.write(response2)
# 导入纯视频文件
video = VideoFileClip('B站视频2.mp4')
# 导入纯音乐文件
audio = AudioFileClip('B站视频1.mp3')
# 将视频文件中加入音乐文件
movie = video.set_audio(audio)
#
movie.weite_videofile('B站视频.mp4')


def main():
url_30280 = 'https://xy221x131x191x56xy.mcdn.bilivideo.cn:4483/upgcxcode/95/78/439527895/439527895_nb2-1-30280.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1636727944&gen=playurlv2&os=mcdn&oi=3748183839&trid=00015abe094f6dc94938917e8895505bead4u&platform=pc&upsig=f9b8eb9e4545f2d0d244804b9e65e780&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mcdnid=9001331&mid=671157361&bvc=vod&nettype=0&orderid=0,3&agrr=0&bw=40218&logo=A0000100'
url_30077 = 'https://xy221x131x191x56xy.mcdn.bilivideo.cn:4483/upgcxcode/95/78/439527895/439527895-1-30077.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1636727944&gen=playurlv2&os=mcdn&oi=3748183839&trid=00015abe094f6dc94938917e8895505bead4u&platform=pc&upsig=df692a5e07e5ceb77c354bf38dea609f&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mcdnid=9001331&mid=671157361&bvc=vod&nettype=0&orderid=0,3&agrr=0&bw=181576&logo=A0000100'
header = {
'referer': 'https://www.bilibili.com/video/BV1c341187m9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'
}
get_page(url_30280, url_30077, header)

之前是手动抓包的形式,如果说要爬取多个视频的话会很麻烦,这次就加强一下爬虫

首先把爬虫的基本框架搭起来

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class Video:
def __init__(self, url):
self.url = url
self.headers = {
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

# 获取页面源码
def get_page(self):
response = requests.get(self.url, headers=self.headers)
if response.status_code == 200:
return response.text
return None
def run(self):
html = self.get_page()

还是之前的网站,首先先抓一下包

这是视频和音频,之前已经讲过了,在元素中查找发现位置,再再源码中查一下

image-20220831205427024

image-20220831205600346

名字有些出入,源码为主

既然找到了位置就可以匹配,用xparh中的contains语法获取到script中的内容,在用正则匹配

1
2
3
4
5
6
7
8
9
10
11
# 解析源码获得视频名称、链接
def analysis(self, html):
tree = etree.HTML(html)
# 视频名称
name = tree.xpath('//*[@id="viewbox_report"]/h1/text()')[0]
# 视频链接列表
video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0]
# 视频链接
video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', video_list)[0]
audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"', video_list)[0]
return video, audio, name

分别获取到视频和音频的链接

就可以发起请求进行合并了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# 对链接发起请求,并进行合并存储
def save(self, link):
headers = {
'referer': self.url,
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
print('正在下载视频')
video = requests.get(link[0], headers=headers).content
audio = requests.get(link[1], headers=headers).content

name = link[2] + '1'

with open(f'{name}.mp4', 'wb') as f:
f.write(video)
with open(f'{name}.mp3', 'wb') as f:
f.write(audio)

# 合并
print('合并视频')

ffmpeg_tools.ffmpeg_merge_video_audio(f'{name}.mp4', f'{name}.mp3', f'{link[2]}.mp4')
print('*' * 50)
print('下载成功')
print()

# 删除之前的文件
os.remove(f'{name}.mp4')
os.remove(f'{name}.mp3')

全部代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class Video:
def __init__(self, url):
self.url = url
self.headers = {
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

# 获取页面源码
def get_page(self):
response = requests.get(self.url, headers=self.headers)
if response.status_code == 200:
return response.text
return None

# 解析源码获得视频名称、链接
def analysis(self, html):
tree = etree.HTML(html)
# 视频名称
name = tree.xpath('//*[@id="viewbox_report"]/h1/text()')[0]
# 视频链接列表
video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0]
# 视频链接
video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', video_list)[0]
audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"', video_list)[0]
return video, audio, name

# 对链接发起请求,并进行合并存储
def save(self, link):
headers = {
'referer': self.url,
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
print('正在下载视频')
video = requests.get(link[0], headers=headers).content
audio = requests.get(link[1], headers=headers).content

name = link[2] + '1'

with open(f'{name}.mp4', 'wb') as f:
f.write(video)
with open(f'{name}.mp3', 'wb') as f:
f.write(audio)

# 合并
print('合并视频')

ffmpeg_tools.ffmpeg_merge_video_audio(f'{name}.mp4', f'{name}.mp3', f'{link[2]}.mp4')
print('*' * 50)
print('下载成功')
print()

# 删除之前的文件
os.remove(f'{name}.mp4')
os.remove(f'{name}.mp3')

def run(self):
html = self.get_page()
print('获取到源码')
link = self.analysis(html)
print('获取到链接', link)
self.save(link)

你以为这样就结束了,还没有增强呢!

只爬取普通视频显得这个代码有些垃圾了,接下来添加爬取番剧及电影的代码

先看一下番剧的包

image-20220831211218759

包抓到了和普通视频一样

链接位置似乎有些不一样

image-20220831211348322

找到视频链接就可以写代码了,不过这次不用从头写了,前面已经写好了爬取普通视频的代码了,完全可以拿来就用,

上面用类来写的爬虫,所以我们完全可以继承他,只重写其中的解析视频链接的代码

直接上代码

1
2
3
4
5
6
7
8
9
10
class Drama(Video):
def analysis(self, html):
tree = etree.HTML(html)
# 视频名称
name = tree.xpath('//title/text()')[0]
# 视频链接列表
video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0]
video = re.findall(r'"video":\[.*?"backupUrl":\["(.*?)"', video_list)[0]
audio = re.findall(r'"audio":\[.*?"backupUrl":\["(.*?)"', video_list)[0]
return video, audio, name

电影的结构和番剧一样可以直接调用,

再判断一下传入的链接是普通视频还是番剧又或者是电影

普通视频

https://www.bilibili.com/video/BV1c341187m9

番剧

https://www.bilibili.com/bangumi/play/ep508404?from_spmid=666.25.episode.0&from_outer_spmid=666.4.0.0

电影

https://www.bilibili.com/bangumi/play/ep673044?from_spmid=666.7.banner.0

通过以上链接可以看出普通视频的链接包含video,番剧和电影的链接包含play,再加上爬取番剧和电影的代码一样,就可以这样写

1
2
3
4
5
6
7
8
9
10
11
12
13
if __name__ == '__main__':
url_list = [
'https://www.bilibili.com/bangumi/play/ss42077?theme=movie'
]
for i in url_list:
if 'video' in i:
p = Video(i)
p.run()
elif 'play' in i:
d = Drama(i)
d.run()
else:
error(i)

在写一个异常方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def error(url):
print('自动识别错误,请手动选择')
flash = True
while flash:
choose = input('1.视频, 2.番剧或电影\n')
if choose == '1':
print('视频')
p = Video(url)
p.run()
elif choose == '2':
print('番剧或电影')
d = Drama(url)
d.run()
else:
print('输入格式有误,请重新输入')

合并后代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# 视频爬取
class Video:
def __init__(self, url):
self.url = url
self.headers = {
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

# 获取页面源码
def get_page(self):
response = requests.get(self.url, headers=self.headers)
if response.status_code == 200:
return response.text
return None

# 解析源码获得视频名称、链接
def analysis(self, html):
tree = etree.HTML(html)
# 视频名称
name = tree.xpath('//*[@id="viewbox_report"]/h1/text()')[0]
# 视频链接列表
video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0]
# 视频链接
video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', video_list)[0]
audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"', video_list)[0]
return video, audio, name

# 对链接发起请求,并进行合并存储
def save(self, link):
headers = {
'referer': self.url,
"cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
print('正在下载视频')
video = requests.get(link[0], headers=headers).content
audio = requests.get(link[1], headers=headers).content

name = link[2] + '1'

with open(f'{name}.mp4', 'wb') as f:
f.write(video)
with open(f'{name}.mp3', 'wb') as f:
f.write(audio)

# 合并
print('合并视频')

ffmpeg_tools.ffmpeg_merge_video_audio(f'{name}.mp4', f'{name}.mp3', f'{link[2]}.mp4')
print('*' * 50)
print('下载成功')
print()

# 删除之前的文件
os.remove(f'{name}.mp4')
os.remove(f'{name}.mp3')

def run(self):
html = self.get_page()
print('获取到源码')
link = self.analysis(html)
print('获取到链接', link)
self.save(link)


# 番剧/电影爬取
class Drama(Video):
def analysis(self, html):
tree = etree.HTML(html)
# 视频名称
name = tree.xpath('//title/text()')[0]
# 视频链接列表
video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0]
video = re.findall(r'"video":\[.*?"backupUrl":\["(.*?)"', video_list)[0]
audio = re.findall(r'"audio":\[.*?"backupUrl":\["(.*?)"', video_list)[0]
return video, audio, name


def error(url):
print('自动识别错误,请手动选择')
flash = True
while flash:
choose = input('1.视频, 2.番剧或电影\n')
if choose == '1':
print('视频')
p = Video(url)
p.run()
elif choose == '2':
print('番剧或电影')
d = Drama(url)
d.run()
else:
print('输入格式有误,请重新输入')

if __name__ == '__main__':
url_list = [
'https://www.bilibili.com/bangumi/play/ss42077?theme=movie'
]
for i in url_list:
if 'video' in i:
p = Video(i)
p.run()
elif 'play' in i:
d = Drama(i)
d.run()
else:
error(i)
点击下载博客: