Files
amlhc/analysis_history.py
916117771 8b2590c5b5 docs(predictV3): 添加predictV3算法优化研究文档和前端功能实现
- 完成Phase 11: predictV3算法优化研究文档,涵盖6个优化方向的技术分析
- 实现置信度评估功能,提供历史命中率、得分分布、多维度一致性置信度指标
- 扩展回测指标体系,新增NDCG@K、MRR、命中率分布等排名质量评估指标
- 优化转移概率算法,引入二阶马尔可夫链和多属性联合转移增强预测准确性
- 设计权重训练机制,支持网格搜索和遗传算法进行数据驱动的参数优化
- 集成组合特征挖掘功能,采用关联规则和序列模式发现号码间潜在关联
- 实现完整的前端交互界面,支持预测结果显示、置信度展示和回测验证功能
- 建立性能优化策略,包括预计算缓存、批量计算和降级策略保障响应速度
2026-05-01 23:17:24 +08:00

286 lines
12 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
正码与特码关联规律统计分析脚本
"""
import re
from collections import defaultdict
# 波色映射表
color_map = {
1: '', 2: '', 3: '', 4: '', 5: '绿', 6: '绿',
7: '', 8: '', 9: '', 10: '', 11: '绿', 12: '',
13: '', 14: '', 15: '', 16: '绿', 17: '绿', 18: '',
19: '', 20: '', 21: '绿', 22: '绿', 23: '', 24: '',
25: '', 26: '', 27: '绿', 28: '绿', 29: '', 30: '',
31: '', 32: '绿', 33: '绿', 34: '', 35: '', 36: '',
37: '', 38: '绿', 39: '绿', 40: '', 41: '', 42: '',
43: '绿', 44: '绿', 45: '', 46: '', 47: '', 48: '',
49: '绿'
}
# 生肖映射表
animal_map = {
1: '', 2: '', 3: '', 4: '', 5: '', 6: '',
7: '', 8: '', 9: '', 10: '', 11: '', 12: '',
13: '', 14: '', 15: '', 16: '', 17: '', 18: '',
19: '', 20: '', 21: '', 22: '', 23: '', 24: '',
25: '', 26: '', 27: '', 28: '', 29: '', 30: '',
31: '', 32: '', 33: '', 34: '', 35: '', 36: '',
37: '', 38: '', 39: '', 40: '', 41: '', 42: '',
43: '', 44: '', 45: '', 46: '', 47: '', 48: '',
49: ''
}
def get_zone(num):
if 1 <= num <= 10: return 1
elif 11 <= num <= 20: return 2
elif 21 <= num <= 30: return 3
elif 31 <= num <= 40: return 4
elif 41 <= num <= 49: return 5
return 0
def get_min_distance(nums, num7):
return min(abs(num7 - num) for num in nums)
# 解析SQL文件
sql_file = r'C:\Users\91611\Desktop\fa_history.sql'
with open(sql_file, 'r', encoding='utf-8') as f:
content = f.read()
pattern = r"INSERT INTO `fa_history` VALUES \((\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), (\d+), '([^']+)'\);"
matches = re.findall(pattern, content)
data = []
for match in matches:
data.append({
'expect': int(match[0]),
'num1': int(match[1]), 'num2': int(match[2]), 'num3': int(match[3]),
'num4': int(match[4]), 'num5': int(match[5]), 'num6': int(match[6]),
'num7': int(match[7]), 'openTime': match[8]
})
total = len(data)
# 输出到文件
output_file = r'D:\code\php\amlhc\analysis_result.txt'
with open(output_file, 'w', encoding='utf-8') as f:
f.write("==================== 正码与特码关联规律统计分析 ====================\n")
f.write(f"数据总量: {total} 期 (从 {data[0]['expect']}{data[-1]['expect']})\n\n")
# ==================== 1. 正码平均值与特码差值分布 ====================
f.write("==================== 1. 正码平均值与特码差值分布 ====================\n")
diff_counts = defaultdict(int)
in_range_5 = 0
for row in data:
avg = (row['num1'] + row['num2'] + row['num3'] + row['num4'] + row['num5'] + row['num6']) / 6
diff = round(row['num7'] - avg)
diff_counts[diff] += 1
if -5 <= diff <= 5:
in_range_5 += 1
f.write("差值分布统计:\n")
for diff in sorted(diff_counts.keys()):
count = diff_counts[diff]
pct = round(count / total * 100, 2)
f.write(f" 差值 {diff}: {count} 次 ({pct}%)\n")
f.write(f"\n差值在 [-5, +5] 范围内的概率: {round(in_range_5 / total * 100, 2)}% ({in_range_5}/{total})\n")
in_range_10 = sum(diff_counts[d] for d in diff_counts if -10 <= d <= 10)
f.write(f"差值在 [-10, +10] 范围内的概率: {round(in_range_10 / total * 100, 2)}% ({in_range_10}/{total})\n")
# ==================== 2. 特码是否在正码范围内 ====================
f.write("\n==================== 2. 特码是否在正码范围内 ====================\n")
in_range = 0
for row in data:
nums = [row['num1'], row['num2'], row['num3'], row['num4'], row['num5'], row['num6']]
if min(nums) <= row['num7'] <= max(nums):
in_range += 1
f.write("特码在正码范围内 [min(num1-6), max(num1-6)]:\n")
f.write(f" 是: {in_range} 次 ({round(in_range / total * 100, 2)}%)\n")
f.write(f" 否: {total - in_range} 次 ({round((total - in_range) / total * 100, 2)}%)\n")
# ==================== 3. 特码与最近正码的距离分布 ====================
f.write("\n==================== 3. 特码与最近正码的距离分布 ====================\n")
dist_counts = defaultdict(int)
for row in data:
nums = [row['num1'], row['num2'], row['num3'], row['num4'], row['num5'], row['num6']]
min_dist = get_min_distance(nums, row['num7'])
dist_counts[min_dist] += 1
f.write("距离分布统计:\n")
for dist in sorted(dist_counts.keys()):
count = dist_counts[dist]
pct = round(count / total * 100, 2)
f.write(f" 距离 {dist}: {count} 次 ({pct}%)\n")
equal_count = dist_counts.get(0, 0)
dist_le5 = sum(dist_counts[i] for i in range(0, 6) if i in dist_counts)
dist_le10 = sum(dist_counts[i] for i in range(0, 11) if i in dist_counts)
dist_le15 = sum(dist_counts[i] for i in range(0, 16) if i in dist_counts)
f.write(f"\n特码等于某正码 (距离=0) 的概率: {round(equal_count / total * 100, 2)}% ({equal_count}/{total})\n")
f.write(f"距离 <= 5 的概率: {round(dist_le5 / total * 100, 2)}% ({dist_le5}/{total})\n")
f.write(f"距离 <= 10 的概率: {round(dist_le10 / total * 100, 2)}% ({dist_le10}/{total})\n")
f.write(f"距离 <= 15 的概率: {round(dist_le15 / total * 100, 2)}% ({dist_le15}/{total})\n")
# ==================== 4. 和值尾数关系 ====================
f.write("\n==================== 4. 和值尾数关系 ====================\n")
same_tail = 0
tail_diff_counts = defaultdict(int)
for row in data:
sum_val = row['num1'] + row['num2'] + row['num3'] + row['num4'] + row['num5'] + row['num6']
sum_tail = sum_val % 10
num7_tail = row['num7'] % 10
tail_diff = abs(sum_tail - num7_tail)
tail_diff_counts[tail_diff] += 1
if sum_tail == num7_tail:
same_tail += 1
f.write(f"和值尾数与特码尾数同尾概率: {round(same_tail / total * 100, 2)}% ({same_tail}/{total})\n")
f.write("\n尾数差值分布:\n")
for diff in sorted(tail_diff_counts.keys()):
count = tail_diff_counts[diff]
pct = round(count / total * 100, 2)
f.write(f" 尾数差 {diff}: {count} 次 ({pct}%)\n")
tail_diff_le3 = sum(tail_diff_counts[i] for i in range(0, 4) if i in tail_diff_counts)
f.write(f"\n尾数差 <= 3 的概率: {round(tail_diff_le3 / total * 100, 2)}% ({tail_diff_le3}/{total})\n")
# ==================== 5. 区间覆盖分析 ====================
f.write("\n==================== 5. 区间覆盖分析 ====================\n")
zone_covered_counts = defaultdict(int)
zone7_covered = 0
for row in data:
nums = [row['num1'], row['num2'], row['num3'], row['num4'], row['num5'], row['num6']]
zones = set(get_zone(num) for num in nums)
zone_covered_counts[len(zones)] += 1
zone7 = get_zone(row['num7'])
if zone7 in zones:
zone7_covered += 1
f.write("正码覆盖区间数量分布:\n")
for i in range(1, 6):
count = zone_covered_counts.get(i, 0)
pct = round(count / total * 100, 2)
f.write(f" 覆盖 {i} 个区间: {count} 次 ({pct}%)\n")
f.write(f"\n特码所在区间被正码覆盖的概率: {round(zone7_covered / total * 100, 2)}% ({zone7_covered}/{total})\n")
# ==================== 6. 波色/生肖关联 ====================
f.write("\n==================== 6. 波色/生肖关联 ====================\n")
color7_in_nums = 0
animal7_in_nums = 0
color_match_counts = defaultdict(int)
color7_counts = defaultdict(int)
for row in data:
nums = [row['num1'], row['num2'], row['num3'], row['num4'], row['num5'], row['num6']]
num_colors = set()
num_animals = set()
color_counts = defaultdict(int)
for num in nums:
color = color_map[num]
animal = animal_map[num]
num_colors.add(color)
num_animals.add(animal)
color_counts[color] += 1
color7 = color_map[row['num7']]
animal7 = animal_map[row['num7']]
color7_counts[color7] += 1
if color7 in num_colors:
color7_in_nums += 1
if animal7 in num_animals:
animal7_in_nums += 1
key = f"{color_counts[color7]}_{color7}"
color_match_counts[key] += 1
f.write("特码波色分布:\n")
for color in ['', '', '绿']:
count = color7_counts[color]
pct = round(count / total * 100, 2)
f.write(f" {color}: {count} 次 ({pct}%)\n")
f.write(f"\n特码波色在正码中出现的概率: {round(color7_in_nums / total * 100, 2)}% ({color7_in_nums}/{total})\n")
f.write(f"特码生肖在正码中出现的概率: {round(animal7_in_nums / total * 100, 2)}% ({animal7_in_nums}/{total})\n")
f.write("\n正码中特码同波色数量分布:\n")
for key in sorted(color_match_counts.keys()):
count = color_match_counts[key]
pct = round(count / total * 100, 2)
f.write(f" {key}: {count} 次 ({pct}%)\n")
# ==================== 总结 ====================
f.write("\n==================== 总结: 达到40%命中率以上的规律 ====================\n")
all_rules = {
"距离<=15": dist_le15 / total,
"距离<=10": dist_le10 / total,
"特码波色在正码中出现": color7_in_nums / total,
"距离<=5": dist_le5 / total,
"特码区间被正码覆盖": zone7_covered / total,
"特码在正码范围内": in_range / total,
"尾数差<=3": tail_diff_le3 / total,
"差值在[-10,+10]": in_range_10 / total,
"差值在[-5,+5]": in_range_5 / total,
"同尾": same_tail / total,
"特码生肖在正码中出现": animal7_in_nums / total,
}
sorted_rules = sorted(all_rules.items(), key=lambda x: x[1], reverse=True)
f.write("规律按命中率排序:\n")
for rule, rate in sorted_rules:
status = "[达标]" if rate >= 0.4 else ""
f.write(f" - {rule}: {round(rate * 100, 2)}% {status}\n")
f.write("\n==================== 关键发现 ====================\n")
f.write(f"""
1. 【特码波色重复规律】命中率最高 90.67%
- 特码波色在正码中出现的概率约为 90.67%
- 如果正码中有红色号码,特码有90%概率是红色波色
2. 【近距离规律】命中率很高 94.13%
- 特码距离最近正码<=10的概率约为 94.13%
- 特码往往不会离正码太远,基本在10个数字以内
3. 【区间覆盖规律】命中率较高 74.13%
- 特码所在区间被正码覆盖的概率约为 74.13%
- 将1-49分为5区间,特码有74%概率落在正码覆盖的区间
4. 【正码范围规律】命中率中等 70.67%
- 特码在正码[min, max]范围内的概率约为 70.67%
- 特码有70%概率落在正码的最小值和最大值之间
5. 【尾数差规律】命中率54.13%
- 和值尾数与特码尾数差<=3的概率约为 54.13%
- 特码尾数与正码和值尾数相差不超过3
6. 【生肖重复规律】命中率较低 36.27%
- 特码生肖在正码中出现的概率约为 36.27%
- 生肖关联性不如波色明显
7. 【特码等于正码】命中率极低 0%
- 特码等于某正码的概率为 0%
- 特码与正码完全不重复(六合彩规则)
""")
f.write("\n==================== 分析完成 ====================\n")
print(f"分析完成,结果已保存到: {output_file}")