python爬取笔趣阁小说

Posted by DoubleWay on May 16, 2020
# -*- coding: utf-8 -*-
import requests
import random
import urllib3
from bs4 import BeautifulSoup
import sys

reload(sys)
sys.setdefaultencoding('utf-8')
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
html = requests.get("https://www.biduo.cc/biquge/53_53723/",headers=headers,verify=False)
soup = BeautifulSoup(html.text, 'html.parser')
#print(soup)
f=open('/home/android/tmp/1.txt','a+')
for item in soup.find_all(id='list'):
  for type_a in item.find_all('a'):
    print(type_a.text) #获得查询小说的章节名
    f.write('\n'+type_a.text+'\n')
    #type_a['href']是a标签里,各章节的链接
    text =requests.get('https://www.biduo.cc'+type_a['href'],headers=headers,verify=False)
    soup2 = BeautifulSoup(text.text, 'html.parser')
    for content in soup2.find(id='content'):
     print(str(content).replace('<br/>','')) #将查到的内容通过str转换成字符串可以更好的去掉‘<br/>’标签
     f.write(str(content).replace('<br/>','')+'\n')
  f.close()