

import requests
from bs4 import BeautifulSoup
from multiprocessing import Process, Queue
from threading import Thread
import os
req_header = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': '__jsluid=ffa49d477b3fb0a1979c5482a6046d94; UM_distinctid=16385d0b4c63d6-0eab1fcdfee39d-33617f06-fa000-16385d0b4c74e8; CNZZDATA1272873873=1745087307-1526954737-%7C1526970941',
'Host': 'www.biquge.com.tw',
'Connection': 'keep-alive',
'Referer': 'http://www.biquge.com.tw',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
}
class StoryContent(Thread):
def __init__(self, title, name, section_url, q):
super(StoryContent, self).__init__()
self.title = title
self.name = name
self.section_url = section_url
self.q = q
def run(self):
try:
req_url = 'http://www.biquge.com.tw' + self.section_url
r = requests.get(req_url, params=req_header)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text, "html.parser")
section_name = soup.select('#wrapper .content_read .box_con .bookname h1')[0].text
section_text = soup.select('#wrapper .content_read .box_con #content')[0].text
path = '/Users/lyf/test/' + self.title
if not os.path.exists(path):
try:
os.makedirs(path)
except:
print('创建%s错误' % path)
with open('/Users/lyf/test/' + self.title + '/' + self.name + '.txt', 'wb+') as f:
f.write(('\r章节名称:' + section_name + '\r\n').encode('utf-8'))
f.write(('\r章节内容:\n\n' + section_text + '\r\n').encode('utf-8'))
self.q.put('下载完成章节:%s' % self.title)
except:
print('下载出错')
class StorySection(Thread):
def __init__(self, title, href):
super(StorySection, self).__init__()
self.title = title
self.href = href
def run(self):
r = requests.get(self.href, params=req_header)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text, "html.parser")
section_list = soup.select('#wrapper .box_con #list dl dd a')
q = Queue()
Thread_list = []
for section in section_list:
p = StoryContent(self.title, section.text, section.get('href'), q)
p.start()
Thread_list.append(p)
print('下载完成《%s》' % self.title)
for t in Thread_list:
t.join()
while not q.empty():
print(q.get())
def get_story():
req_url = 'http://www.biquge.com.tw'
r = requests.get(req_url, params=req_header)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text, "html.parser")
story_list = soup.findAll('a')
Process_list = []
for story in story_list:
href = story.get('href')
title = story.text
if title and href and href.endswith('/') and href.startswith('http://'):
p = StorySection(title, href)
p.start()
for p in Process_list:
p.join()
get_story()
exit()