import re import json # 读取HTML文件 with open('/Users/v_sat/Documents/trae_projects/bingqi/pic/model/model.txt', 'r', encoding='utf-8') as f: html_content = f.read() # 解析表格行 rows = re.findall(r'(.*?)', html_content, re.DOTALL) # 提取表头 header_match = re.search(r'(.*?)', rows[0]) headers = ['算子名称', '图标', '装甲', '行进间射击', '携带武器', '弹药量', '机动速度'] # 解析数据行 units = [] seen_names = set() for row in rows[1:]: # 提取单元格内容 cells = re.findall(r'(.*?)', row, re.DOTALL) if len(cells) < 7: continue name = cells[0].strip() # 跳过重复项 if name in seen_names: continue seen_names.add(name) # 解析图标路径 img_match = re.search(r'src="([^"]+)"', cells[1]) icon_path = '' if img_match: src = img_match.group(1) # 从路径中提取type和index path_parts = src.split('/') if len(path_parts) >= 6: type_num = path_parts[4] # 第5个元素（索引4）是type index = path_parts[5].split('-')[0] # 第6个元素（索引5）是文件名 icon_path = f'pic/model/{type_num}/{index}-0.png' # 解析弹药量 ammo_text = cells[5].strip().rstrip('/') ammo = {} if ammo_text: ammo_items = ammo_text.split('/') for item in ammo_items: if ':' in item: key, value = item.split(':', 1) ammo[key.strip()] = int(value.strip()) unit = { '名称': name, '图标': icon_path, '装甲': cells[2].strip(), '行进间射击': cells[3].strip(), '携带武器': cells[4].strip().rstrip('/').split('/'), '弹药量': ammo, '机动速度': int(cells[6].strip()) } units.append(unit) # 保存为JSON文件 with open('/Users/v_sat/Documents/trae_projects/bingqi/pic/model/model_data.json', 'w', encoding='utf-8') as f: json.dump(units, f, ensure_ascii=False, indent=2) print(f"解析完成，共提取 {len(units)} 个唯一算子") print("数据已保存到 model_data.json")