mirror of
https://github.com/KimigaiiWuyi/GenshinUID.git
synced 2025-05-31 04:30:29 +08:00
157 lines
5.2 KiB
Python
157 lines
5.2 KiB
Python
import re
|
|
import random
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
import aiofiles
|
|
from bs4 import BeautifulSoup
|
|
from nonebot.log import logger
|
|
from aiohttp.client import ClientSession
|
|
|
|
from .RESOURCE_PATH import GACHA_IMG_PATH, CHAR_STAND_PATH, CHAR_NAMECARD_PATH
|
|
|
|
MAX_TASKS = 4
|
|
baseurl = 'https://genshin-impact.fandom.com/wiki/Genshin_Impact_Wiki'
|
|
|
|
|
|
async def get_url(url: str, sess: ClientSession):
|
|
req = await sess.get(url=url)
|
|
return await req.read()
|
|
|
|
|
|
async def _download(
|
|
url: str,
|
|
sess: ClientSession,
|
|
sem: asyncio.Semaphore,
|
|
file_name: str,
|
|
file_path: Path,
|
|
log_prefix: str,
|
|
):
|
|
async with sem:
|
|
logger.info(f'{log_prefix}正在下载 {file_name} ,URL为{url}')
|
|
async with sess.get(url, timeout=60) as res:
|
|
content = await res.read()
|
|
|
|
if res.status != 200:
|
|
logger.info(f"{log_prefix}{file_name} 下载失败: {res.status}")
|
|
|
|
async with aiofiles.open(file_path / file_name, "+wb") as f:
|
|
await asyncio.sleep(random.randint(0, 3))
|
|
await f.write(content)
|
|
logger.info(f"{log_prefix}{file_name} 下载成功: {res.status}")
|
|
|
|
|
|
async def get_char_url_list():
|
|
async with ClientSession() as sess:
|
|
base_data = await get_url(baseurl, sess)
|
|
content_bs = BeautifulSoup(base_data, 'lxml')
|
|
raw_data_5star = content_bs.find_all(
|
|
"div", class_='card_container card_5 hidden'
|
|
)
|
|
raw_data_4star = content_bs.find_all(
|
|
"div", class_='card_container card_4 hidden'
|
|
)
|
|
raw_data_5astar = content_bs.find_all(
|
|
"div", class_='card_container card_5a hidden'
|
|
)
|
|
raw_data = raw_data_5star + raw_data_4star + raw_data_5astar
|
|
char_list = {}
|
|
for i in raw_data:
|
|
char_url = (
|
|
"https://genshin-impact.fandom.com"
|
|
+ i.find("a")["href"]
|
|
+ "/Media"
|
|
)
|
|
if i.find("a")["title"] != "Traveler":
|
|
char_list[i.find("a")["title"]] = char_url
|
|
return char_list
|
|
|
|
|
|
async def download_by_fandom(char_list: dict):
|
|
# 判断需要下载哪些名片和抽卡图片
|
|
if len(list(CHAR_NAMECARD_PATH.iterdir())) < len(char_list) or len(
|
|
list(CHAR_STAND_PATH.iterdir())
|
|
) < len(char_list):
|
|
logger.info('[fandom] 本次需要下载图片')
|
|
await get_namecard_and_gacha_pic(char_list)
|
|
else:
|
|
logger.info('[fandom] 无需下载名片和抽卡图片!')
|
|
return ''
|
|
|
|
|
|
async def get_namecard_and_gacha_pic(char_list: dict):
|
|
tasks = []
|
|
sem = asyncio.Semaphore(MAX_TASKS)
|
|
async with ClientSession() as sess:
|
|
li = char_list.keys()
|
|
for index, i in enumerate(li):
|
|
log_prefix = f'[fandom {index + 1}/{len(li)}] '
|
|
|
|
char_data = await get_url(char_list[i], sess)
|
|
char_info_data = await get_url(char_list[i][:-6], sess)
|
|
info_bs = BeautifulSoup(char_info_data, 'lxml')
|
|
chinese_name = info_bs.find_all("span", lang='zh-Hans')[0].text
|
|
logger.info(f'{log_prefix}正在下载{chinese_name}的图片资源...')
|
|
char_data_bs = BeautifulSoup(char_data, 'lxml')
|
|
|
|
gachaImg_data = char_data_bs.find_all(
|
|
"img", {'data-caption': 'Full Wish'}
|
|
)
|
|
namecard_data = char_data_bs.find_all(
|
|
"div", class_='wikia-gallery-item'
|
|
)
|
|
|
|
# 特殊排除
|
|
if i == "Gorou":
|
|
namecard = namecard_data[-3].find_all("img")[0]["src"]
|
|
else:
|
|
namecard = namecard_data[-2].find_all("img")[0]["src"]
|
|
|
|
gachaImg_url = re.search(r"[\s\S]+.png", gachaImg_data[0]['src'])
|
|
if gachaImg_url:
|
|
gachaImg_url = gachaImg_url.group(0)
|
|
else:
|
|
continue
|
|
namecard_url = re.search(r"[\s\S]+.png", namecard)
|
|
if namecard_url:
|
|
namecard_url = namecard_url.group(0)
|
|
else:
|
|
continue
|
|
|
|
# 添加任务
|
|
logger.info(f'{log_prefix}添加{chinese_name}的名片资源下载任务...')
|
|
tasks.append(
|
|
asyncio.wait_for(
|
|
_download(
|
|
namecard_url,
|
|
sess,
|
|
sem,
|
|
f'{chinese_name}.png',
|
|
CHAR_NAMECARD_PATH,
|
|
log_prefix,
|
|
),
|
|
timeout=30,
|
|
)
|
|
)
|
|
logger.info(f'{log_prefix}添加{chinese_name}的抽卡图片资源下载任务...')
|
|
tasks.append(
|
|
asyncio.wait_for(
|
|
_download(
|
|
gachaImg_url,
|
|
sess,
|
|
sem,
|
|
f'{chinese_name}.png',
|
|
GACHA_IMG_PATH,
|
|
log_prefix,
|
|
),
|
|
timeout=30,
|
|
)
|
|
)
|
|
if len(tasks) >= MAX_TASKS:
|
|
await asyncio.gather(*tasks)
|
|
tasks = []
|
|
|
|
await asyncio.gather(*tasks)
|
|
logger.info('全部下载完成!')
|
|
return '资源下载成功!'
|