时间:2022-10-07 11:26:59 | 栏目:Python代码 | 点击:次
遇到那种有很多图的微信公众号文章咋办?一个一个存很麻烦,应朋友的要求自己写了个爬虫。
2.0版本完成了!完善了生成pdf的功能,可根据图片比例自动调节大小,防止超出页面范围,增加了序号方面查看
#-----------------settings--------------- #url='https://mp.weixin.qq.com/s/8JwB_SXQ-80uwQ9L97BMgw' print('jd3096 for king 2.0 VIP8钻石永久会员版') print('愿你远离流氓软件每一天') url=input('请输入网址:') #-----------------get data---------------- import requests import re from bs4 import BeautifulSoup import os from PIL import Image try: os.makedirs('pics') except: pass os.chdir('pics') page=requests.get(url).text soup = BeautifulSoup(page, 'html.parser') jdata = soup.find_all('img') pn=0 for i in jdata: try: src=i['data-src'] print(src) rp = requests.get(src) with open(str(pn)+'.jpg','wb+')as f : # 循环写入图片 print(str(pn)+'.jpg') f.write(rp.content) pn+=1 except: pass #--------------------make pdf-------------------- from fpdf import FPDF import os path=os.getcwd() print(path) pdf = FPDF() pdf.set_auto_page_break(1) imagelist = [i for i in os.listdir()] imagelist.sort(key=lambda x: int(x.split('.')[0])) print(imagelist) for image in imagelist: try: img = Image.open(image) w = img.width #图片的宽 h = img.height #图片的高 ii=h/w print(ii) if ii>1.41: ww=int(250/ii) pdf.add_page() pdf.set_xy(0,0) pdf.set_font('arial','B',14) pdf.cell(60) pdf.cell(70,10,image,border=0, ln=1, align='C') pdf.image(os.path.join(path, image), w=ww, h=250) else: hh=int(180*ii) pdf.add_page() pdf.set_xy(0,0) pdf.set_font('arial','B',14) pdf.cell(60) pdf.cell(70,10,image,border=0, ln=1, align='C') pdf.image(os.path.join(path, image), w=180, h=hh) except: pass pdf.output(os.path.join(path, "merge.pdf"), "F")
爬完了长这样:
PDF长这样,比例适中适合阅读