Skip to content

Commit

Permalink
refactor(baidu): 重构搜索结果的获取和解析逻辑,及返回的结果样式 (close #35)
Browse files Browse the repository at this point in the history
  • Loading branch information
NekoAria committed Sep 12, 2022
1 parent eb59310 commit a7908a8
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 49 deletions.
12 changes: 9 additions & 3 deletions PicImageSearch/baidu.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from json import loads as json_loads
from pathlib import Path
from typing import Any, Dict, Optional, Union
Expand Down Expand Up @@ -25,8 +26,13 @@ async def search(
)
else:
raise ValueError("url or file is required")
resp_text, resp_url, _ = await self.post(
resp_text, _, _ = await self.post(
"https://graph.baidu.com/upload", params=params, data=data
)
resp_text, resp_url, _ = await self.get((json_loads(resp_text))["data"]["url"])
return BaiDuResponse(resp_text, resp_url)
next_url = (json_loads(resp_text))["data"]["url"]
resp_text, resp_url, _ = await self.get(next_url)
next_url = (re.search(r'"firstUrl":"([^"]+)"', resp_text)[1]).replace(r"\/", "/") # type: ignore
resp_text, _, _ = await self.get(next_url)
next_url = (json_loads(resp_text))["data"]["ajaxTextUrl"]
resp_text, _, _ = await self.get(next_url)
return BaiDuResponse(json_loads(resp_text), resp_url)
44 changes: 9 additions & 35 deletions PicImageSearch/model/baidu.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,19 @@
import json
import re
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List


class BaiDuItem:
def __init__(self, data: Dict[str, Any]):
self.origin: Dict[str, Any] = data # 原始数据
self.page_title: str = data["fromPageTitle"] # 页面标题
self.title: str = data["title"][0] # 标题
self.abstract: str = data["abstract"] # 说明文字
self.image_src: str = data["image_src"] # 图片地址
self.url: str = data["url"] # 图片所在网页地址
self.img_list: List[str] = data.get("imgList", []) # 其他图片地址列表
self.similarity: float = float(f"{float(data['simi']) * 100:.2f}")
self.title: str = data["fromPageTitle"] # 页面标题
self.thumbnail: str = data["thumbUrl"] # 图片地址
self.url: str = data["fromUrl"] # 图片所在网页地址


class BaiDuResponse:
def __init__(self, resp_text: str, resp_url: str):
def __init__(self, resp_json: Dict[str, Any], resp_url: str):
self.url: str = resp_url # 搜索结果地址
self.similar: List[Dict[str, Any]] = [] # 相似结果返回值
self.raw: List[BaiDuItem] = [] # 来源结果返回值
# 原始数据
self.origin: List[Dict[str, Any]] = json.loads(
re.search(r"cardData = (.+);window\.commonData", resp_text)[1] # type: ignore
)
self.same: Optional[Dict[str, Any]] = {}
for i in self.origin:
setattr(self, i["cardName"], i)
if self.same:
self.raw = [BaiDuItem(x) for x in self.same["tplData"]["list"]]
info = self.same["extData"]["showInfo"]
del info["other_info"]
for y in info:
for z in info[y]:
try:
self.similar[info[y].index(z)][y] = z
except IndexError:
self.similar.append({y: z})
# 获取所有卡片名
self.item: List[str] = [
attr
for attr in dir(self)
if not callable(getattr(self, attr))
and not attr.startswith(("__", "origin", "raw", "same", "url"))
]
self.origin: Dict[str, Any] = resp_json # 原始数据
# 来源结果返回值
self.raw: List[BaiDuItem] = [BaiDuItem(i) for i in resp_json["data"]["list"]]
17 changes: 6 additions & 11 deletions demo/demo_baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,12 @@ def test_sync() -> None:

def show_result(resp: BaiDuResponse) -> None:
# logger.info(resp.origin) # 原始数据
logger.info(resp.item)
if resp.same: # 存在来源结果
# logger.info(resp.raw[0].origin)
logger.info(resp.raw[0].page_title)
logger.info(resp.raw[0].title)
logger.info(resp.raw[0].abstract)
logger.info(resp.raw[0].url)
logger.info(resp.raw[0].image_src)
logger.info(resp.raw[0].img_list)
else:
logger.info(resp.similar)
logger.info(resp.url)
# logger.info(resp.raw[0].origin)
logger.info(resp.raw[0].similarity)
logger.info(resp.raw[0].title)
logger.info(resp.raw[0].url)
logger.info(resp.raw[0].thumbnail)
logger.info("-" * 50)


Expand Down

0 comments on commit a7908a8

Please sign in to comment.