重构前的版本
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 import requestsfrom moviepy.editor import *def get_page (url_30280, url_30077, headers ): response1 = requests.get(url_30280, headers=headers).content response2 = requests.get(url_30077, headers=headers).content with open ('B站视频1.mp4' , 'wb' ) as f: f.write(response1) with open ('B站视频2.mp4' , 'wb' ) as f: f.write(response2) video = VideoFileClip('B站视频2.mp4' ) audio = AudioFileClip('B站视频1.mp3' ) movie = video.set_audio(audio) movie.weite_videofile('B站视频.mp4' ) def main (): url_30280 = 'https://xy221x131x191x56xy.mcdn.bilivideo.cn:4483/upgcxcode/95/78/439527895/439527895_nb2-1-30280.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1636727944&gen=playurlv2&os=mcdn&oi=3748183839&trid=00015abe094f6dc94938917e8895505bead4u&platform=pc&upsig=f9b8eb9e4545f2d0d244804b9e65e780&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mcdnid=9001331&mid=671157361&bvc=vod&nettype=0&orderid=0,3&agrr=0&bw=40218&logo=A0000100' url_30077 = 'https://xy221x131x191x56xy.mcdn.bilivideo.cn:4483/upgcxcode/95/78/439527895/439527895-1-30077.m4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=1636727944&gen=playurlv2&os=mcdn&oi=3748183839&trid=00015abe094f6dc94938917e8895505bead4u&platform=pc&upsig=df692a5e07e5ceb77c354bf38dea609f&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,platform&mcdnid=9001331&mid=671157361&bvc=vod&nettype=0&orderid=0,3&agrr=0&bw=181576&logo=A0000100' header = { 'referer' : 'https://www.bilibili.com/video/BV1c341187m9' , 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36' } get_page(url_30280, url_30077, header)
之前是手动抓包的形式,如果说要爬取多个视频的话会很麻烦,这次就加强一下爬虫
首先把爬虫的基本框架搭起来
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 class Video: def __init__(self, url): self.url = url self.headers = { "cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc", 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } # 获取页面源码 def get_page(self): response = requests.get(self.url, headers=self.headers) if response.status_code == 200: return response.text return None def run(self): html = self.get_page()
还是之前的网站 ,首先先抓一下包
这是视频和音频,之前已经讲过了,在元素中查找发现位置,再再源码中查一下
名字有些出入,源码为主
既然找到了位置就可以匹配,用xparh中的contains语法获取到script中的内容,在用正则匹配
1 2 3 4 5 6 7 8 9 10 11 # 解析源码获得视频名称、链接 def analysis(self, html): tree = etree.HTML(html) # 视频名称 name = tree.xpath('//*[@id="viewbox_report"]/h1/text()')[0] # 视频链接列表 video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0] # 视频链接 video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', video_list)[0] audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"', video_list)[0] return video, audio, name
分别获取到视频和音频的链接
就可以发起请求进行合并了
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 def save (self, link ): headers = { 'referer' : self.url, "cookie" : "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc" , 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } print ('正在下载视频' ) video = requests.get(link[0 ], headers=headers).content audio = requests.get(link[1 ], headers=headers).content name = link[2 ] + '1' with open (f'{name} .mp4' , 'wb' ) as f: f.write(video) with open (f'{name} .mp3' , 'wb' ) as f: f.write(audio) print ('合并视频' ) ffmpeg_tools.ffmpeg_merge_video_audio(f'{name} .mp4' , f'{name} .mp3' , f'{link[2 ]} .mp4' ) print ('*' * 50 ) print ('下载成功' ) print () os.remove(f'{name} .mp4' ) os.remove(f'{name} .mp3' )
全部代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 class Video : def __init__ (self, url ): self.url = url self.headers = { "cookie" : "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc" , 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } def get_page (self ): response = requests.get(self.url, headers=self.headers) if response.status_code == 200 : return response.text return None def analysis (self, html ): tree = etree.HTML(html) name = tree.xpath('//*[@id="viewbox_report"]/h1/text()' )[0 ] video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()' )[0 ] video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"' , video_list)[0 ] audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"' , video_list)[0 ] return video, audio, name def save (self, link ): headers = { 'referer' : self.url, "cookie" : "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc" , 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } print ('正在下载视频' ) video = requests.get(link[0 ], headers=headers).content audio = requests.get(link[1 ], headers=headers).content name = link[2 ] + '1' with open (f'{name} .mp4' , 'wb' ) as f: f.write(video) with open (f'{name} .mp3' , 'wb' ) as f: f.write(audio) print ('合并视频' ) ffmpeg_tools.ffmpeg_merge_video_audio(f'{name} .mp4' , f'{name} .mp3' , f'{link[2 ]} .mp4' ) print ('*' * 50 ) print ('下载成功' ) print () os.remove(f'{name} .mp4' ) os.remove(f'{name} .mp3' ) def run (self ): html = self.get_page() print ('获取到源码' ) link = self.analysis(html) print ('获取到链接' , link) self.save(link)
你以为这样就结束了,还没有增强呢!
只爬取普通视频显得这个代码有些垃圾了,接下来添加爬取番剧及电影的代码
先看一下番剧的包
包抓到了和普通视频一样
链接位置似乎有些不一样
找到视频链接就可以写代码了,不过这次不用从头写了,前面已经写好了爬取普通视频的代码了,完全可以拿来就用,
上面用类来写的爬虫,所以我们完全可以继承他,只重写其中的解析视频链接的代码
直接上代码
1 2 3 4 5 6 7 8 9 10 class Drama (Video ): def analysis (self, html ): tree = etree.HTML(html) name = tree.xpath('//title/text()' )[0 ] video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()' )[0 ] video = re.findall(r'"video":\[.*?"backupUrl":\["(.*?)"' , video_list)[0 ] audio = re.findall(r'"audio":\[.*?"backupUrl":\["(.*?)"' , video_list)[0 ] return video, audio, name
电影的结构和番剧一样可以直接调用,
再判断一下传入的链接是普通视频还是番剧又或者是电影
普通视频
https://www.bilibili.com/video/BV1c341187m9
番剧
https://www.bilibili.com/bangumi/play/ep508404?from_spmid=666.25.episode.0&from_outer_spmid=666.4.0.0
电影
https://www.bilibili.com/bangumi/play/ep673044?from_spmid=666.7.banner.0
通过以上链接可以看出普通视频的链接包含video
,番剧和电影的链接包含play
,再加上爬取番剧和电影的代码一样,就可以这样写
1 2 3 4 5 6 7 8 9 10 11 12 13 if __name__ == '__main__': url_list = [ 'https://www.bilibili.com/bangumi/play/ss42077?theme=movie' ] for i in url_list: if 'video' in i: p = Video(i) p.run() elif 'play' in i: d = Drama(i) d.run() else: error(i)
在写一个异常方法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 def error(url): print('自动识别错误,请手动选择') flash = True while flash: choose = input('1.视频, 2.番剧或电影\n') if choose == '1': print('视频') p = Video(url) p.run() elif choose == '2': print('番剧或电影') d = Drama(url) d.run() else: print('输入格式有误,请重新输入')
合并后代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 # 视频爬取 class Video: def __init__(self, url): self.url = url self.headers = { "cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc", 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } # 获取页面源码 def get_page(self): response = requests.get(self.url, headers=self.headers) if response.status_code == 200: return response.text return None # 解析源码获得视频名称、链接 def analysis(self, html): tree = etree.HTML(html) # 视频名称 name = tree.xpath('//*[@id="viewbox_report"]/h1/text()')[0] # 视频链接列表 video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0] # 视频链接 video = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', video_list)[0] audio = re.findall(r'"audio":\[{"id":30280,"baseUrl":"(.*?)"', video_list)[0] return video, audio, name # 对链接发起请求,并进行合并存储 def save(self, link): headers = { 'referer': self.url, "cookie": "CURRENT_FNVAL=4048; PVID=5; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_5BA763BD%22%3A%221825E14DE6D%22%2C%22666.7.fp.risk_5BA763BD%22%3A%221825E14EFFC%22%2C%22666.25.fp.risk_5BA763BD%22%3A%221825E17A189%22%2C%22666.4.fp.risk_5BA763BD%22%3A%221825E17D7C8%22%2C%22333.934.fp.risk_5BA763BD%22%3A%221825E197416%22%2C%22333.979.fp.risk_5BA763BD%22%3A%221825E1A122C%22%2C%22777.5.0.0.fp.risk_5BA763BD%22%3A%221825E1B15F3%22%7D%7D; fingerprint3=09a322960f9b29d88c1731cadd0dc98f; innersign=0; CURRENT_BLACKGAP=0; bp_video_offset_671157361=689759880761835600; b_lsid=D1039D10A10_1825E14DB91; i-wanna-go-back=-1; CURRENT_QUALITY=80; rpdid=0zbfVFUBEL|Ucq23nZm|3rq|3w1OiL9g; hit-dyn-v2=1; blackside_state=0; b_ut=5; buvid_fp=5905c67921a38e156c824e2d22d1b61f; LIVE_BUVID=AUTO6416594074645196; DedeUserID=671157361; DedeUserID__ckMd5=ba8273578e2f6b80; SESSDATA=a6489929%2C1674959436%2Ca4b2a*81; bili_jct=fe131d3907b8eea7c59b3abfaa60a6e2; sid=5qy9dddr; buvid_fp_plain=undefined; fingerprint=5905c67921a38e156c824e2d22d1b61f; _uuid=8E3D575D-4517-7613-7B25-C4B27FDAF510306266infoc; buvid4=49E57E44-822A-F96C-B9B5-C3CB36DEC69E07142-022080210-2vQi149BRC5hxFHhxBtSxnTgzdnGNZWCAywIAqBXeJ4aJAXCFMFEag%3D%3D; buvid3=5BA763BD-A667-A548-A705-1705D82E763306244infoc", 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' } print('正在下载视频') video = requests.get(link[0], headers=headers).content audio = requests.get(link[1], headers=headers).content name = link[2] + '1' with open(f'{name}.mp4', 'wb') as f: f.write(video) with open(f'{name}.mp3', 'wb') as f: f.write(audio) # 合并 print('合并视频') ffmpeg_tools.ffmpeg_merge_video_audio(f'{name}.mp4', f'{name}.mp3', f'{link[2]}.mp4') print('*' * 50) print('下载成功') print() # 删除之前的文件 os.remove(f'{name}.mp4') os.remove(f'{name}.mp3') def run(self): html = self.get_page() print('获取到源码') link = self.analysis(html) print('获取到链接', link) self.save(link) # 番剧/电影爬取 class Drama(Video): def analysis(self, html): tree = etree.HTML(html) # 视频名称 name = tree.xpath('//title/text()')[0] # 视频链接列表 video_list = tree.xpath('//script[contains(text(), "window.__playinfo__")]/text()')[0] video = re.findall(r'"video":\[.*?"backupUrl":\["(.*?)"', video_list)[0] audio = re.findall(r'"audio":\[.*?"backupUrl":\["(.*?)"', video_list)[0] return video, audio, name def error(url): print('自动识别错误,请手动选择') flash = True while flash: choose = input('1.视频, 2.番剧或电影\n') if choose == '1': print('视频') p = Video(url) p.run() elif choose == '2': print('番剧或电影') d = Drama(url) d.run() else: print('输入格式有误,请重新输入') if __name__ == '__main__': url_list = [ 'https://www.bilibili.com/bangumi/play/ss42077?theme=movie' ] for i in url_list: if 'video' in i: p = Video(i) p.run() elif 'play' in i: d = Drama(i) d.run() else: error(i)