import re
import json

# 读取HTML文件
with open('/Users/v_sat/Documents/trae_projects/bingqi/pic/model/model.txt', 'r', encoding='utf-8') as f:
    html_content = f.read()

# 解析表格行
rows = re.findall(r'<tr>(.*?)</tr>', html_content, re.DOTALL)

# 提取表头
header_match = re.search(r'<th.*?>(.*?)</th>', rows[0])
headers = ['算子名称', '图标', '装甲', '行进间射击', '携带武器', '弹药量', '机动速度']

# 解析数据行
units = []
seen_names = set()

for row in rows[1:]:
    # 提取单元格内容
    cells = re.findall(r'<td>(.*?)</td>', row, re.DOTALL)
    if len(cells) < 7:
        continue
    
    name = cells[0].strip()
    
    # 跳过重复项
    if name in seen_names:
        continue
    seen_names.add(name)
    
    # 解析图标路径
    img_match = re.search(r'src="([^"]+)"', cells[1])
    icon_path = ''
    if img_match:
        src = img_match.group(1)
        # 从路径中提取type和index
        path_parts = src.split('/')
        if len(path_parts) >= 6:
            type_num = path_parts[4]  # 第5个元素（索引4）是type
            index = path_parts[5].split('-')[0]  # 第6个元素（索引5）是文件名
            icon_path = f'pic/model/{type_num}/{index}-0.png'
    
    # 解析弹药量
    ammo_text = cells[5].strip().rstrip('/')
    ammo = {}
    if ammo_text:
        ammo_items = ammo_text.split('/')
        for item in ammo_items:
            if ':' in item:
                key, value = item.split(':', 1)
                ammo[key.strip()] = int(value.strip())
    
    unit = {
        '名称': name,
        '图标': icon_path,
        '装甲': cells[2].strip(),
        '行进间射击': cells[3].strip(),
        '携带武器': cells[4].strip().rstrip('/').split('/'),
        '弹药量': ammo,
        '机动速度': int(cells[6].strip())
    }
    
    units.append(unit)

# 保存为JSON文件
with open('/Users/v_sat/Documents/trae_projects/bingqi/pic/model/model_data.json', 'w', encoding='utf-8') as f:
    json.dump(units, f, ensure_ascii=False, indent=2)

print(f"解析完成，共提取 {len(units)} 个唯一算子")
print("数据已保存到 model_data.json")
