python - 下载小说
这里以小说网站https://www.biqgg.cc/
为例,其他小说类网站并不适用,需要具体分析,主要是小说内容部分的html标签,大同小异.
import requests
from bs4 import BeautifulSoup
def save(sub_title, cont):
with open('xiaoshuo\\' + sub_title + '.txt', 'w') as f:
f.write(cont)
if __name__ == '__main__':
url = 'https://www.biqgg.cc/book/4422/'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'}
html = requests.get(url=url, headers=headers).text
soup = BeautifulSoup(html, "lxml")
Title = soup.select('h1')[0].string #小说名
down_urls = soup.select(".listmain dl dd a")
for i, down_url in enumerate(down_urls):
if i != 10:
d_url = 'https://www.biqgg.cc' + down_url.attrs.get('href')
html_conts = requests.get(url=d_url, headers=headers).text
soup_html = BeautifulSoup(html_conts, 'lxml')
sub_title = soup_html.select(".content h1")[0].string
cont = soup_html.select(".content #chaptercontent")[0].get_text()
save(sub_title, cont)
python - 下载小说
https://cfx.work/index.php/archives/9/