import os
import json
import argparse
from pathlib import Path
from urllib.parse import urlparse, unquote
import requests

def load_sources(path):
    try:
        text = Path(path).read_text(encoding='utf-8')
        data = json.loads(text)
    except Exception as e:
        print(f'读取或解析 JSON 失败: {e}')
        return []

    urls = []
    # 常见可能存放 URL 的字段名
    key_candidates = ('url', 'src', 'image', 'path', 'val', 'value', 'href', 'link')

    if isinstance(data, list):
        for item in data:
            if isinstance(item, str):
                urls.append(item)
            elif isinstance(item, dict):
                for key in key_candidates:
                    val = item.get(key)
                    if isinstance(val, str):
                        urls.append(val)
                        break
    elif isinstance(data, dict):
        # 尝试从字典中提取可能的列表字段
        for list_key in ('items', 'images', 'list', 'files'):
            if list_key in data and isinstance(data[list_key], list):
                for item in data[list_key]:
                    if isinstance(item, str):
                        urls.append(item)
                    elif isinstance(item, dict):
                        for k in key_candidates:
                            v = item.get(k)
                            if isinstance(v, str):
                                urls.append(v)
                                break
                break
    return urls

def filename_from_url(u):
    try:
        p = urlparse(u)
        name = os.path.basename(p.path)
        if name:
            # 对可能的 %E5%8C%96 编码进行解码
            return unquote(name)
    except Exception:
        pass
    # fallback: use the raw string as name (sanitize)
    return os.path.basename(u).split('?')[0]

def download_file(url, dest_path, timeout=15):
    try:
        r = requests.get(url, stream=True, timeout=timeout)
        r.raise_for_status()
        with open(dest_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        return True, None
    except Exception as e:
        return False, str(e)

def main():
    parser = argparse.ArgumentParser(description='从 huaji_source.json 下载图片并生成与源结构相同的 huaji.json')
    parser.add_argument('--source', '-s', default='huaji_source.json', help='源 JSON 文件（默认 huaji_source.json）')
    parser.add_argument('--outdir', '-o', default='huaji', help='下载到的本地目录（默认 huaji）')
    parser.add_argument('--base', '-b', default='https://dl.awa.cool/huangsam04/huaji/', help='生成 huaji.json 时使用的站点前缀 URL')
    parser.add_argument('--outjson', default='huaji.json', help='输出的映射 JSON 文件（默认 huaji.json）')
    args = parser.parse_args()

    src_path = Path(args.source)
    if not src_path.exists():
        print(f'源文件不存在: {src_path}')
        return

    # 读取源 JSON 原始结构
    try:
        text = src_path.read_text(encoding='utf-8')
        data = json.loads(text)
    except Exception as e:
        print(f'读取或解析 JSON 失败: {e}')
        return

    # 尝试取得 items 列表
    items = []
    if isinstance(data, dict) and isinstance(data.get('items'), list):
        items = data.get('items')
    else:
        # 退回到兼容模式：尝试从任意顶层列表中取 URL 列表
        urls = load_sources(src_path)
        items = []
        for u in urls:
            items.append({'key': filename_from_url(u), 'val': u})

    out_dir = Path(args.outdir)
    out_dir.mkdir(parents=True, exist_ok=True)

    allowed_exts = ('.jpg', '.jpeg', '.png', '.gif')
    key_candidates = ('val', 'url', 'src', 'image', 'path', 'href', 'link', 'value')

    new_items = []
    for item in items:
        # 保持原 item 的其它字段不变，优先取 val 字段作为源 URL
        src_url = None
        if isinstance(item, str):
            src_url = item
            new_item = {'key': filename_from_url(src_url)}
        elif isinstance(item, dict):
            new_item = dict(item)  # shallow copy 保留其他字段
            for k in key_candidates:
                v = item.get(k)
                if isinstance(v, str):
                    src_url = v
                    break
        else:
            continue

        if not src_url:
            new_items.append(new_item)
            continue

        name = filename_from_url(src_url)
        # 如果没有允许的扩展，仍尝试取名但不下载
        if not name.lower().endswith(allowed_exts):
            # 仍设置 val 为 base + name
            new_item['val'] = args.base.rstrip('/') + '/' + name
            new_items.append(new_item)
            continue

        local_path = out_dir / name
        if not local_path.exists():
            if src_url.startswith('http://') or src_url.startswith('https://'):
                ok, err = download_file(src_url, local_path)
                if not ok:
                    print(f'下载失败: {src_url} -> {err}')
            else:
                print(f'跳过非 URL 条目（未下载）: {src_url}')
        # 设置新的 val 为站点前缀 + 解码后的文件名
        new_item['val'] = args.base.rstrip('/') + '/' + name
        new_items.append(new_item)

    # 生成输出 JSON：保留源结构（如果是 dict 就复制并替换 items）
    out_data = data if isinstance(data, dict) else {}
    out_data = dict(out_data)  # shallow copy
    out_data['items'] = new_items

    outjson_path = Path(args.outjson)
    outjson_path.write_text(json.dumps(out_data, ensure_ascii=False, indent=2), encoding='utf-8')
    print(f'完成。已下载到: {out_dir.resolve()}，映射文件: {outjson_path.resolve()}')

if __name__ == '__main__':
    main()
