爬取豆瓣小demo
import os
import re
import sys
import json
from pprint import pprint
from collections import OrderedDict
import chardet
from tinydb import TinyDB, Query
_vars = globals()
if '__file__' not in _vars:
__file__ = 'selenium-douban.py'
task_result_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), r'task-result.json')
store_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), r'store.json')
db = TinyDB(store_file)
Q = Query()
task_result = None
with open(task_result_file, 'rb') as f:
bytes_data = f.read()
encoding = chardet.detect(bytes_data)['encoding']
if bool(encoding):
task_result = json.loads(bytes_data.decode(encoding))
if not bool(task_result):
print('没有数据哟')
sys.exit(1)
for book_url in task_result:
book_item = task_result[book_url]
db.insert(book_item)
pprint(db.search(Q.ISBN.exists()))
pprint(db.search(Q.作者.exists()))
pprint(db.search(Q['作者'].exists()))