下载涩图
壁纸网址: https://gelbooru.wjcodes.com/
python代码
已经多线程|可连续下载多页
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
| ''' @author=lthero ''' import os import re import time from bs4 import BeautifulSoup import requests import threading
headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'}
class myThread(threading.Thread): def __init__(self, url, fileName, file_path): threading.Thread.__init__(self) self.url = url self.file_path = file_path self.fileName = fileName
def open_url(self, url): response = requests.get(url, headers) return response.text
def run(self): img = requests.get(self.url, headers=headers) file = self.file_path + "\\" + self.fileName + ".jpg" if not os.path.exists(file): print("Downloading %s" % self.fileName) with open('%s/%s.jpg' % (self.file_path, self.fileName), 'wb') as f: f.write(img.content) else: print(self.file_path + "\\" + self.fileName + " exist")
class picDown(): def open_url(self, url): response = requests.get(url, headers) return response.text
def __init__(self, url, save_path, pageNum): self.starturl = url self.save_path = save_path if not os.path.exists(self.save_path): print("目录不存在,已经尝试创建") os.makedirs(self.save_path) if not os.path.exists(self.save_path): print("目录不存在,无法创建成功,请重新输入目录") return self.pageNum = pageNum if int(self.pageNum) <= 0: self.pageNum = "1" if int(self.pageNum) >= 10: self.pageNum = "10" self.run()
def run(self): page = self.starturl.split('&')[-1] startNum = 0 if len(page) <= 6: startNum += int(str(page).strip("&p=")) self.starturl = self.starturl[0:-int(len(page))]+"p=" else: self.starturl += "?p=" startNum += 1
for i in range(startNum, startNum + int(self.pageNum) ): soup = BeautifulSoup(self.open_url(self.starturl + str(i)), 'lxml') images = soup.find('ul', id='main-list') for li in images.find_all('li'): string = li.a['onclick'] pattern = re.compile(r'(https?:[^:<>"]*\/)([^:<>"]*)(\.((png!thumbnail)|(png)|(jpg)|(webp)))') each_url = pattern.findall(str(string))[0][0] name = str(each_url).split('/')[-2] each_url = each_url[0:-1] + ".jpg" thread1 = myThread(each_url, name, self.save_path) thread1.start() time.sleep(0.1)
if __name__ == '__main__': print("Author: lthero") print("HomePage: https://blog.lthero.cn") print('''使用说明: 1.点击右键可以粘贴! 2.先输入网址 3.再输入要保存到的目录完整路径,可以输入中文路径,路径一定要存在 4.如果不输入下面的页数,默认只下载一页 5.一次性最多下载10页[防止ip被封] ''') while 1: picDown(url=input("输入要下载的网址: 比如 https://gelbooru.wjcodes.com/index.php?tag=&p=3 请输入: "), save_path=input("输入要保存到的目录: "), pageNum=input("要连续下载几页?输入数字即可: ")) time.sleep(5) print("全部下载完成!")
|
打包程序下载
https://wwd.lanzouy.com/iBD1f079k2dc
密码:h3wj