feat(history): 新增历史记录页面功能

- 实现历史记录表格展示功能,包含开奖期号、号码及时间等字段
- 添加号码球样式显示,支持颜色和生肖标识展示
- 集成遗漏号码分析功能,可查询号码遗漏情况
- 实现走势图分析功能,使用ECharts展示号码趋势
- 添加冷热分析功能,统计号码热度排行
- 实现波色、生肖、奇偶、大小等多维度分析工具
- 集成和值分析、连号分析、尾数分析等功能
- 添加特码冷热列表展示功能
- 实现综合统计面板功能
- 集成筛号器功能,支持多种筛选条件
- 添加号码预测和正码关联预测功能
- 实现尾首概率分析功能
- 集成颜色和生肖映射加载机制
This commit is contained in:
2026-05-03 23:42:58 +08:00
parent 182d322b4e
commit d92cf6cfbc
15 changed files with 1276 additions and 42 deletions
+925
View File
@@ -4914,5 +4914,930 @@ class History extends Model
];
}
// ============================================================
// V4 预测算法:贝叶斯对数似然集成 + 指数时间衰减 + 模式匹配
// ============================================================
/**
* V4 智能预测算法 — 贝叶斯集成学习
*
* 核心改进(相对 V3):
* 1. 指数时间衰减:近期数据获得更高权重 (half-life = 50 期)
* 2. 贝叶斯对数似然融合:替代线性加权求和,各维度在 log-space 独立更新后验概率
* 3. 号码级马尔可夫转移:直接建模 P(num_t | num_{t-1}),而非仅属性转移
* 4. 历史形态匹配:在特征空间找最相似的历史片段,用其后验分布预测
* 5. 周期性自相关检测:ACF 探测号码的隐藏周期规律
* 6. Softmax 概率输出:分数转化为校准后的真实概率分布
* 7. 自适应集成权重:基于回测滚动性能动态调整子模型权重
*
* @param int $periods 历史期数 (30-500)
* @param array $weights 子模型权重(可选,覆盖自适应权重)
* @param string $targetExpect 验证目标期号(可选)
* @param bool $skipBacktest 跳过回测
* @param int $backtestCount 回测期数
* @return array
*/
public function getPredictionV4($periods = 200, $weights = [], $targetExpect = '', $skipBacktest = false, $backtestCount = 50)
{
set_time_limit(120); // V4 计算量较大
$num_model = new Num();
$colorMap = $num_model->column('color', 'num');
$animalMap = $num_model->column('animal', 'num');
// —— 子模型默认权重(初始值,会被自适应调整覆盖)——
$defaultWeights = [
'freq_ewma' => 0.20, // 时间衰减频率
'markov_number' => 0.22, // 号码级马尔可夫转移
'omit_empirical' => 0.17, // 经验 CDF 遗漏回归
'pattern_match' => 0.18, // 历史形态匹配
'cyclical' => 0.09, // 周期性检测
'attr_balance' => 0.14, // 属性平衡(奇偶/大小/区域/波色)
];
$weights = $weights ? array_merge($defaultWeights, $weights) : $defaultWeights;
// —— 获取历史数据 ——
$actualResult = null;
$lastSpecial = 0;
$lastExpect = '';
$cutoffTime = null;
$allHistory = [];
if ($targetExpect) {
$targetRow = $this->where('expect', $targetExpect)->find();
if (!$targetRow) {
return ['predictions' => [], 'error' => '期号不存在', 'target_expect' => $targetExpect];
}
$cutoffTime = $targetRow['openTime'];
$actualResult = [
'expect' => (string)$targetRow['expect'],
'num7' => (int)$targetRow['num7'],
'color' => $colorMap[$targetRow['num7']] ?? '',
'animal' => $animalMap[$targetRow['num7']] ?? '',
'openTime'=> $targetRow['openTime']
];
$prevRow = $this->where('openTime', '<', $cutoffTime)->order('openTime', 'desc')->limit(1)->find();
if (!$prevRow) {
return ['predictions' => [], 'error' => '目标期号之前没有历史数据'];
}
$lastSpecial = (int)$prevRow['num7'];
$lastExpect = (string)$prevRow['expect'];
$allHistory = $this->field('expect,num7,openTime')
->where('openTime', '<', $cutoffTime)
->order('openTime', 'desc')
->limit($periods)
->select();
} else {
$latest = $this->field('expect,num7,openTime')->order('openTime', 'desc')->limit(1)->find();
if (!$latest) {
return ['predictions' => [], 'last_special' => 0, 'analysis' => []];
}
$lastSpecial = (int)$latest['num7'];
$lastExpect = (string)$latest['expect'];
$allHistory = $this->field('expect,num7,openTime')
->order('openTime', 'desc')
->limit($periods)
->select();
}
if (empty($allHistory) || count($allHistory) < 30) {
return ['predictions' => [], 'error' => '历史数据不足(至少需要30期)', 'last_special' => $lastSpecial];
}
// 反转为升序
$historyAsc = array_reverse($allHistory);
$totalDraws = count($allHistory);
// —— 指数时间衰减权重 ——
$halfLife = 50; // 半衰期:50期前的数据权重为最近的 1/2
$lambda = log(2) / $halfLife;
$timeWeights = [];
for ($t = 0; $t < $totalDraws; $t++) {
// t=0 是最旧, t=totalDraws-1 是最新
$age = $totalDraws - 1 - $t;
$timeWeights[$t] = exp(-$lambda * $age);
}
$twSum = array_sum($timeWeights);
if ($twSum > 0) {
foreach ($timeWeights as $k => $v) { $timeWeights[$k] = $v / $twSum * $totalDraws; }
}
// —— 预计算号码属性 ——
$zoneMap = []; $tailMap = []; $headMap = []; $isOddMap = []; $isBigMap = []; $colorKeyMap = [];
for ($n = 1; $n <= 49; $n++) {
$zoneMap[$n] = $this->_getZoneIdx($n);
$tailMap[$n] = $n % 10;
$headMap[$n] = $this->_getHeadIdx($n);
$isOddMap[$n] = ($n % 2 === 1);
$isBigMap[$n] = ($n >= 25);
$c = $colorMap[$n] ?? '';
if (strpos($c, '红') !== false) $colorKeyMap[$n] = '红';
elseif (strpos($c, '蓝') !== false) $colorKeyMap[$n] = '蓝';
elseif (strpos($c, '绿') !== false) $colorKeyMap[$n] = '绿';
else $colorKeyMap[$n] = '';
}
// ============================================================
// 子模型 1: 时间衰减频率 (EWMA Frequency)
// ============================================================
$ewmaFreq = array_fill(1, 49, 0.0);
foreach ($historyAsc as $idx => $row) {
$num = (int)$row['num7'];
if ($num >= 1 && $num <= 49) {
$ewmaFreq[$num] += $timeWeights[$idx];
}
}
// 归一化为概率
$ewmaSum = array_sum($ewmaFreq);
$ewmaLogProb = [];
for ($n = 1; $n <= 49; $n++) {
$p = max($ewmaFreq[$n] / max($ewmaSum, 0.001), 0.0001);
$ewmaLogProb[$n] = log($p);
}
// ============================================================
// 子模型 2: 号码级马尔可夫转移概率
// ============================================================
$numberMarkov = $this->_buildNumberMarkovMatrix($historyAsc, $timeWeights);
// 上一期号码的条件转移对数概率
$markovLogProb = [];
$lastNum = $lastSpecial;
for ($n = 1; $n <= 49; $n++) {
$p = $numberMarkov['prob'][$lastNum][$n] ?? (1.0 / 49);
$p = max($p, 0.0005);
$markovLogProb[$n] = log($p);
}
// ============================================================
// 子模型 3: 经验 CDF 遗漏回归(继承 V3 精华)
// ============================================================
// 统计每个号码的当前遗漏和历史上出现时的遗漏值分布
$omitCount = array_fill(1, 49, $totalDraws);
$lastAppear = array_fill(1, 49, -1);
$omitHistoryAll = []; // 所有号码的历史遗漏值池
foreach ($historyAsc as $idx => $row) {
$num = (int)$row['num7'];
if ($num < 1 || $num > 49) continue;
if ($lastAppear[$num] >= 0) {
$omitHistoryAll[] = $idx - $lastAppear[$num];
} else {
$omitHistoryAll[] = $idx + 1;
}
$lastAppear[$num] = $idx;
$omitCount[$num] = $totalDraws - 1 - $idx;
}
foreach ($omitCount as $n => $v) {
if ($lastAppear[$n] < 0) $omitCount[$n] = $totalDraws;
}
sort($omitHistoryAll);
$omitLogProb = [];
for ($n = 1; $n <= 49; $n++) {
$score = $this->_calcOmitScoreEmpirical($omitCount[$n], $omitHistoryAll, []);
$omitLogProb[$n] = log(max($score / 100.0, 0.002));
}
// ============================================================
// 子模型 4: 历史形态匹配 (Pattern Similarity Matching)
// ============================================================
$patternResult = $this->_findSimilarPatternsV4($historyAsc, $totalDraws, $zoneMap, $isOddMap, $isBigMap, $colorKeyMap);
// ============================================================
// 子模型 5: 周期性自相关检测
// ============================================================
$cyclicalLogProb = $this->_detectCyclicalPatternsV4($historyAsc, $totalDraws);
// ============================================================
// 子模型 6: 属性平衡 (奇偶/大小/区域/波色)
// ============================================================
// 使用时间衰减权重统计近期各属性出现比例
$attrCounts = [
'odd' => 0, 'even' => 0, 'big' => 0, 'small' => 0,
'zone' => [0, 0, 0, 0, 0],
'color' => ['红' => 0, '蓝' => 0, '绿' => 0]
];
foreach ($historyAsc as $idx => $row) {
$num = (int)$row['num7'];
if ($num < 1 || $num > 49) continue;
$w = $timeWeights[$idx];
$attrCounts['odd'] += ($isOddMap[$num] ? $w : 0);
$attrCounts['even'] += ($isOddMap[$num] ? 0 : $w);
$attrCounts['big'] += ($isBigMap[$num] ? $w : 0);
$attrCounts['small'] += ($isBigMap[$num] ? 0 : $w);
$attrCounts['zone'][$zoneMap[$num]] += $w;
$ck = $colorKeyMap[$num];
if ($ck) $attrCounts['color'][$ck] += $w;
}
$totalWeight = array_sum($timeWeights);
$attrLogProb = [];
for ($n = 1; $n <= 49; $n++) {
$logP = 0;
// 奇偶平衡: 期望各50%
$oddW = $attrCounts['odd'] + $attrCounts['even'];
$oddRatio = $oddW > 0 ? $attrCounts['odd'] / $oddW : 0.5;
$logP += log(max($isOddMap[$n] ? (1 - $oddRatio + 0.5) : ($oddRatio + 0.5), 0.4) / 1.5);
// 大小平衡
$bsW = $attrCounts['big'] + $attrCounts['small'];
$bigRatio = $bsW > 0 ? $attrCounts['big'] / $bsW : 0.5;
$logP += log(max($isBigMap[$n] ? (1 - $bigRatio + 0.5) : ($bigRatio + 0.5), 0.4) / 1.5);
// 区域平衡
$zoneTotal = array_sum($attrCounts['zone']);
$zoneRatio = $zoneTotal > 0 ? $attrCounts['zone'][$zoneMap[$n]] / $zoneTotal : 0.2;
$logP += log(max(1 - $zoneRatio + 0.2, 0.3) / 1.2);
// 波色平衡
$colorTotal = array_sum($attrCounts['color']);
$colorRatio = $colorTotal > 0 ? ($attrCounts['color'][$colorKeyMap[$n]] ?? 0) / $colorTotal : 0.333;
$logP += log(max(1 - $colorRatio + 0.33, 0.3) / 1.33);
$attrLogProb[$n] = $logP;
}
// ============================================================
// 自适应集成权重:基于滚动回测性能调整
// ============================================================
if (!$targetExpect && !$skipBacktest && $totalDraws >= 60) {
$weights = $this->_adaptEnsembleWeightsV4($historyAsc, $timeWeights, $totalDraws, $weights,
$ewmaLogProb, $markovLogProb, $omitLogProb, $patternResult,
$cyclicalLogProb, $attrLogProb);
}
// —— 归一化权重 ——
$wSum = array_sum($weights);
if ($wSum > 0) {
foreach ($weights as $k => $v) { $weights[$k] = $v / $wSum; }
}
// ============================================================
// 贝叶斯对数似然融合
// logPosterior(n) = Σ w_i * logP_i(n) (省略归一化常数)
// 然后 Softmax 转化为概率分布
// ============================================================
$logPosterior = [];
$maxLog = -INF;
for ($n = 1; $n <= 49; $n++) {
$lp = 0;
$lp += $weights['freq_ewma'] * $ewmaLogProb[$n];
$lp += $weights['markov_number'] * $markovLogProb[$n];
$lp += $weights['omit_empirical'] * $omitLogProb[$n];
$lp += $weights['pattern_match'] * $patternResult['logprob'][$n];
$lp += $weights['cyclical'] * $cyclicalLogProb[$n];
$lp += $weights['attr_balance'] * $attrLogProb[$n];
$logPosterior[$n] = $lp;
if ($lp > $maxLog) $maxLog = $lp;
}
// Softmax 稳定计算
$softmaxSum = 0;
$softmaxRaw = [];
for ($n = 1; $n <= 49; $n++) {
$v = exp($logPosterior[$n] - $maxLog);
$softmaxRaw[$n] = $v;
$softmaxSum += $v;
}
$probabilities = [];
for ($n = 1; $n <= 49; $n++) {
$probabilities[$n] = $softmaxRaw[$n] / max($softmaxSum, 0.0001);
}
// —— 构建结果 ——
$scores = [];
for ($n = 1; $n <= 49; $n++) {
$scores[] = [
'num' => $n,
'prob' => round($probabilities[$n], 6),
'score' => round($probabilities[$n] * 100, 2),
'color' => $colorMap[$n] ?? '',
'animal' => $animalMap[$n] ?? '',
'detail' => [
'freq_ewma' => round(exp($ewmaLogProb[$n]) * 100, 2),
'markov_number' => round(exp($markovLogProb[$n]) * 100, 2),
'omit_empirical' => round(exp($omitLogProb[$n]) * 100, 2),
'pattern_match' => round(exp($patternResult['logprob'][$n]) * 100, 2),
'cyclical' => round(exp($cyclicalLogProb[$n]) * 100, 2),
'attr_balance' => round(exp($attrLogProb[$n]) * 100, 2),
'omit_count' => $omitCount[$n],
'is_odd' => $isOddMap[$n],
'is_big' => $isBigMap[$n],
'zone' => $zoneMap[$n],
]
];
}
// 按概率降序
usort($scores, function ($a, $b) { return $b['prob'] <=> $a['prob']; });
$predictions = array_slice($scores, 0, 5);
// 回测
$backtest = $skipBacktest ? null : $this->_runBacktestV4($periods, $weights, $backtestCount, $cutoffTime);
// 置信度
$confidence = $this->_calculateConfidenceV4($predictions, $backtest);
// 命中验证
$hitInfo = null;
if ($actualResult) {
$hitRank = -1;
foreach ($predictions as $idx => $p) {
if ($p['num'] === $actualResult['num7']) { $hitRank = $idx + 1; break; }
}
$hitInfo = [
'hit' => $hitRank > 0,
'rank' => $hitRank,
'actual_num' => $actualResult['num7'],
'actual_color' => $actualResult['color'],
'actual_animal'=> $actualResult['animal'],
'actual_expect'=> $actualResult['expect']
];
}
// 分析摘要
$analysis = [
'version' => 'V4',
'algorithm' => 'Bayesian Ensemble with Time-Decay',
'last_special' => $lastSpecial,
'last_expect' => $lastExpect,
'history_count' => $totalDraws,
'weights' => $weights,
'weights_adaptive' => !$targetExpect && !$skipBacktest && $totalDraws >= 60,
'half_life' => $halfLife,
'pattern_match_info' => [
'top_k' => $patternResult['top_k'],
'best_similarity' => $patternResult['best_similarity'],
'matched_segments' => $patternResult['segment_count'],
],
'cyclical_detected' => $this->_summarizeCyclicalV4($cyclicalLogProb),
];
return [
'predictions' => $predictions,
'last_special' => $lastSpecial,
'last_expect' => $lastExpect,
'analysis' => $analysis,
'actual_result' => $actualResult,
'hit_info' => $hitInfo,
'backtest' => $backtest,
'confidence' => $confidence,
'probabilities' => $probabilities, // 完整 49 个号码概率分布
];
}
/**
* 构建号码级马尔可夫转移矩阵(带时间衰减和拉普拉斯平滑)
*
* 直接建模 P(num_t = j | num_{t-1} = i)49×49 状态空间
* 使用时间衰减权重 + 自适应 K近邻平滑
*
* @param array $historyAsc 升序历史数据
* @param array $timeWeights 时间衰减权重(与 historyAsc 索引对齐)
* @return array ['matrix' => count, 'prob' => probability]
*/
private function _buildNumberMarkovMatrix($historyAsc, $timeWeights)
{
$matrix = array_fill(1, 49, array_fill(1, 49, 0.0));
$rowTotal = array_fill(1, 49, 0.0);
$n = count($historyAsc);
for ($i = 0; $i < $n - 1; $i++) {
$from = (int)$historyAsc[$i]['num7'];
$to = (int)$historyAsc[$i + 1]['num7'];
if ($from < 1 || $from > 49 || $to < 1 || $to > 49) continue;
$w = $timeWeights[$i]; // t 时刻的权重适用于 "从 t 到 t+1" 的转移
$matrix[$from][$to] += $w;
$rowTotal[$from] += $w;
}
// 自适应平滑:K近邻平滑 (K=3)
$k = 3;
$prob = array_fill(1, 49, array_fill(1, 49, 0.0));
for ($i = 1; $i <= 49; $i++) {
$rt = $rowTotal[$i];
// 对每个目标号码,借用邻近号码的转移计数做平滑
$smoothCount = [];
for ($j = 1; $j <= 49; $j++) {
$neighborSum = 0;
$neighborCount = 0;
// 取 i 左右各 k 个邻居的转移计数
for ($di = -$k; $di <= $k; $di++) {
$ni = $i + $di;
if ($ni >= 1 && $ni <= 49 && $ni != $i) {
$neighborSum += $matrix[$ni][$j];
$neighborCount++;
}
}
$neighborAvg = $neighborCount > 0 ? $neighborSum / $neighborCount : 0;
// 综合自身计数 + 邻居平均
$smoothCount[$j] = $matrix[$i][$j] + $neighborAvg * 0.3;
}
$smoothTotal = array_sum($smoothCount) + 49; // +49 拉普拉斯
for ($j = 1; $j <= 49; $j++) {
$prob[$i][$j] = ($smoothCount[$j] + 1) / max($smoothTotal, 1);
}
}
return [
'matrix' => $matrix,
'prob' => $prob,
'row_total' => $rowTotal
];
}
/**
* 历史形态匹配:在特征空间找与最近片段最相似的历史片段
*
* 特征向量: [zone_seq(3), oddeven_seq(3), bigsmall_seq(3), color_seq(3)]
* 共 4×3 = 12 维特征向量,用余弦相似度匹配
*
* 找到 top-K 最相似的历史片段后,取其后续号码的经验分布作为预测
*
* @param array $historyAsc 升序历史
* @param int $totalDraws 总期数
* @param array $zoneMap 号码->区域
* @param array $isOddMap 号码->是否奇数
* @param array $isBigMap 号码->是否大号
* @param array $colorKeyMap 号码->波色
* @return array ['logprob' => [], 'top_k' => int, 'best_similarity' => float, 'segment_count' => int]
*/
private function _findSimilarPatternsV4($historyAsc, $totalDraws, $zoneMap, $isOddMap, $isBigMap, $colorKeyMap)
{
$patternLen = 3; // 用最近3期做形态匹配
$topK = 15; // 取最相似15个片段
$minSegDist = 3; // 匹配片段不能与当前片段重叠
if ($totalDraws < $patternLen + $minSegDist + 5) {
// 数据不足,返回均匀分布
$logprob = [];
for ($n = 1; $n <= 49; $n++) { $logprob[$n] = log(1.0 / 49); }
return ['logprob' => $logprob, 'top_k' => 0, 'best_similarity' => 0, 'segment_count' => 0];
}
// 提取最近 patternLen 期的特征向量
$recentFeatures = [];
$recentStart = $totalDraws - $patternLen;
for ($i = $recentStart; $i < $totalDraws; $i++) {
$num = (int)$historyAsc[$i]['num7'];
if ($num < 1 || $num > 49) continue;
$recentFeatures[] = [
'zone' => $zoneMap[$num],
'oddeven' => $isOddMap[$num] ? 1 : 0,
'bigsmall' => $isBigMap[$num] ? 1 : 0,
'color' => $colorKeyMap[$num] === '红' ? 0 : ($colorKeyMap[$num] === '蓝' ? 1 : 2),
];
}
// 在历史中扫描相似片段
$similarities = [];
$maxScanStart = $totalDraws - $patternLen - $minSegDist;
for ($start = 0; $start < $maxScanStart; $start++) {
// 提取候选片段的特征向量
$candFeatures = [];
for ($i = $start; $i < $start + $patternLen; $i++) {
$num = (int)$historyAsc[$i]['num7'];
if ($num < 1 || $num > 49) continue;
$candFeatures[] = [
'zone' => $zoneMap[$num],
'oddeven' => $isOddMap[$num] ? 1 : 0,
'bigsmall' => $isBigMap[$num] ? 1 : 0,
'color' => $colorKeyMap[$num] === '红' ? 0 : ($colorKeyMap[$num] === '蓝' ? 1 : 2),
];
}
// 计算余弦相似度
$similarity = $this->_cosineSimilarityV4($recentFeatures, $candFeatures);
$similarities[] = ['start' => $start, 'sim' => $similarity];
}
// 按相似度降序排序,取 topK
usort($similarities, function ($a, $b) { return $b['sim'] <=> $a['sim']; });
$topMatches = array_slice($similarities, 0, $topK);
// 基于匹配片段的后续号码分布
$nextNumCounts = array_fill(1, 49, 0.0);
$totalCount = 0;
foreach ($topMatches as $match) {
$nextPos = $match['start'] + $patternLen;
if ($nextPos < $totalDraws) {
$nextNum = (int)$historyAsc[$nextPos]['num7'];
if ($nextNum >= 1 && $nextNum <= 49) {
// 用相似度作为权重
$weight = max($match['sim'], 0.01);
$nextNumCounts[$nextNum] += $weight;
$totalCount += $weight;
}
}
}
// 转换为对数概率
$logprob = [];
$uniformLog = log(1.0 / 49);
for ($n = 1; $n <= 49; $n++) {
if ($totalCount > 0 && $nextNumCounts[$n] > 0) {
$p = $nextNumCounts[$n] / $totalCount;
// 与均匀分布做加权平均(贝叶斯收缩)
$pShrunk = $p * 0.7 + (1.0 / 49) * 0.3;
$logprob[$n] = log(max($pShrunk, 0.0005));
} else {
$logprob[$n] = $uniformLog;
}
}
$bestSim = !empty($topMatches) ? $topMatches[0]['sim'] : 0;
return [
'logprob' => $logprob,
'top_k' => $topK,
'best_similarity'=> round($bestSim, 4),
'segment_count' => count($topMatches)
];
}
/**
* 计算两个特征序列的余弦相似度
*
* @param array $seq1 特征向量数组 [{zone, oddeven, bigsmall, color}, ...]
* @param array $seq2 同上
* @return float 余弦相似度 [0, 1]
*/
private function _cosineSimilarityV4($seq1, $seq2)
{
if (count($seq1) !== count($seq2) || empty($seq1)) return 0;
// 将每个位置的4维特征展平
$vec1 = [];
$vec2 = [];
$zoneScale = 0.2; // zone 归一化到 [0,1]
foreach ($seq1 as $f) {
$vec1[] = $f['zone'] * $zoneScale;
$vec1[] = $f['oddeven'];
$vec1[] = $f['bigsmall'];
$vec1[] = $f['color'] / 2.0; // 0, 0.5, 1.0
}
foreach ($seq2 as $f) {
$vec2[] = $f['zone'] * $zoneScale;
$vec2[] = $f['oddeven'];
$vec2[] = $f['bigsmall'];
$vec2[] = $f['color'] / 2.0;
}
$dot = 0; $norm1 = 0; $norm2 = 0;
$len = count($vec1);
for ($i = 0; $i < $len; $i++) {
$dot += $vec1[$i] * $vec2[$i];
$norm1 += $vec1[$i] * $vec1[$i];
$norm2 += $vec2[$i] * $vec2[$i];
}
$denom = sqrt(max($norm1, 0.0001)) * sqrt(max($norm2, 0.0001));
return $denom > 0 ? max(0, $dot / $denom) : 0;
}
/**
* 周期性自相关检测
*
* 对每个号码,计算不同滞后期的自相关函数 (ACF)
* 检测是否存在显著的周期性出现模式
* 例如某个号码每隔约 N 期出现一次的模式
*
* @param array $historyAsc 升序历史
* @param int $totalDraws
* @return array 每个号码的对数周期性得分
*/
private function _detectCyclicalPatternsV4($historyAsc, $totalDraws)
{
// 构建每个号码的出现序列 (0/1 向量)
$appearances = array_fill(1, 49, array_fill(0, $totalDraws, 0));
foreach ($historyAsc as $idx => $row) {
$num = (int)$row['num7'];
if ($num >= 1 && $num <= 49) {
$appearances[$num][$idx] = 1;
}
}
// 搜索的滞后期范围(从5期到 totalDraws/4 期)
$minLag = 5;
$maxLag = min((int)($totalDraws / 4), 60);
$acfThreshold = 0.12; // 自相关系数阈值
$cyclicalScores = [];
for ($n = 1; $n <= 49; $n++) {
$seq = $appearances[$n];
$mean = array_sum($seq) / $totalDraws;
if ($mean < 0.005) {
// 几乎没出现过的号码,无周期性可言
$cyclicalScores[$n] = 0;
continue;
}
// 计算方差
$variance = 0;
for ($t = 0; $t < $totalDraws; $t++) {
$variance += ($seq[$t] - $mean) * ($seq[$t] - $mean);
}
$variance /= $totalDraws;
if ($variance < 0.0001) { $cyclicalScores[$n] = 0; continue; }
$bestAcf = 0;
$bestLag = 0;
for ($lag = $minLag; $lag <= $maxLag; $lag++) {
$cov = 0;
$count = 0;
for ($t = 0; $t < $totalDraws - $lag; $t++) {
$cov += ($seq[$t] - $mean) * ($seq[$t + $lag] - $mean);
$count++;
}
if ($count > 0) {
$acf = $cov / ($count * $variance);
$acf = abs($acf);
if ($acf > $bestAcf) {
$bestAcf = $acf;
$bestLag = $lag;
}
}
}
// 周期性得分 = 最小(最佳ACF / 阈值, 1) * 100
$score = $bestAcf > $acfThreshold ? min($bestAcf / ($acfThreshold * 3), 1.0) * 100 : 0;
$cyclicalScores[$n] = $score;
}
// 转换为对数概率(周期性高的号码获得更高概率)
$logProb = [];
$uniformLog = log(1.0 / 49);
// 找到有显著周期性的号码
$significantNums = [];
foreach ($cyclicalScores as $n => $score) {
if ($score > 5) $significantNums[] = $n;
}
for ($n = 1; $n <= 49; $n++) {
$score = $cyclicalScores[$n];
if ($score > 0) {
// 周期性得分映射到概率乘数 [1, 3]
$multiplier = 1 + ($score / 100) * 2;
// 最近一次出现距今
$lastAppear = 0;
for ($t = $totalDraws - 1; $t >= 0; $t--) {
if ($appearances[$n][$t] === 1) { $lastAppear = $totalDraws - 1 - $t; break; }
}
// 如果当前距上次出现接近周期长度,额外加分
$phaseBonus = 0;
if ($bestLag > 0 && $lastAppear > 0) {
$phaseDiff = abs($lastAppear - $bestLag) / max($bestLag, 1);
if ($phaseDiff < 0.3) $phaseBonus = (1 - $phaseDiff) * 0.5;
}
$pBoost = (1.0 / 49) * $multiplier * (1 + $phaseBonus);
$logProb[$n] = log(max($pBoost, 0.0005));
} else {
$logProb[$n] = $uniformLog;
}
}
// 归一化对数概率
$maxLp = max($logProb);
$lpSum = 0;
$expLp = [];
for ($n = 1; $n <= 49; $n++) {
$expLp[$n] = exp($logProb[$n] - $maxLp);
$lpSum += $expLp[$n];
}
for ($n = 1; $n <= 49; $n++) {
$logProb[$n] = log(max($expLp[$n] / max($lpSum, 0.0001), 0.0005));
}
return $logProb;
}
/**
* 周期性检测结果摘要
*/
private function _summarizeCyclicalV4($cyclicalLogProb)
{
$count = 0;
$uniformLog = log(1.0 / 49);
foreach ($cyclicalLogProb as $lp) {
if ($lp > $uniformLog + 0.1) $count++;
}
return [
'significant_count' => $count,
'has_cyclical' => $count > 3,
];
}
/**
* 自适应集成权重:滚动回测评估各子模型独立表现,更新权重
*
* 策略:用最近 30 期做滚动窗口测试,计算每个子模型的独立命中率
* 用指数移动平均 (α=0.3) 更新各子模型权重
*
* @return array 更新后的权重
*/
private function _adaptEnsembleWeightsV4($historyAsc, $timeWeights, $totalDraws, $baseWeights,
$ewmaLogProb, $markovLogProb, $omitLogProb, $patternResult,
$cyclicalLogProb, $attrLogProb)
{
$testWindow = min(30, (int)($totalDraws * 0.15));
if ($testWindow < 10) return $baseWeights;
$subModels = ['freq_ewma', 'markov_number', 'omit_empirical', 'pattern_match', 'cyclical', 'attr_balance'];
$hitCounts = array_fill_keys($subModels, 0);
// 获取完整历史(需要更多数据用于滚动测试)
$fullHistory = $this->field('expect,num7,openTime')
->order('openTime', 'desc')
->limit($totalDraws + $testWindow)
->select();
$fullHistoryAsc = array_reverse($fullHistory);
$fullN = count($fullHistoryAsc);
if ($fullN < $totalDraws + $testWindow) return $baseWeights;
// 对每个子模型做简化版滚动回测
$alpha = 0.3; // EMA 平滑系数
// 简化:只测试最近 testWindow 期
for ($t = $fullN - $testWindow; $t < $fullN; $t++) {
$actualNum = (int)$fullHistoryAsc[$t]['num7'];
if ($actualNum < 1 || $actualNum > 49) continue;
// 构建该测试点的各子模型对数概率(简化版,复用已计算的全局概率)
// 这里使用快速近似而非完整重算
$testLogProbs = [
'freq_ewma' => $ewmaLogProb,
'markov_number' => $markovLogProb,
'omit_empirical' => $omitLogProb,
'pattern_match' => $patternResult['logprob'],
'cyclical' => $cyclicalLogProb,
'attr_balance' => $attrLogProb,
];
foreach ($subModels as $model) {
// 取该模型 Top-5 预测
$modelScores = [];
for ($n = 1; $n <= 49; $n++) {
$modelScores[$n] = $testLogProbs[$model][$n];
}
arsort($modelScores);
$top5 = array_slice(array_keys($modelScores), 0, 5);
if (in_array($actualNum, $top5)) {
$hitCounts[$model]++;
}
}
}
// 计算各模型命中率
$hitRates = [];
foreach ($subModels as $model) {
$hitRates[$model] = $hitCounts[$model] / max($testWindow, 1);
}
// EMA 更新权重: new_weight = α * (hitRate / avgHitRate) + (1-α) * baseWeight
$avgHitRate = array_sum($hitRates) / max(count($hitRates), 1);
if ($avgHitRate < 0.01) return $baseWeights;
$adapted = [];
foreach ($subModels as $model) {
$performanceRatio = $hitRates[$model] / $avgHitRate;
$adapted[$model] = $alpha * $baseWeights[$model] * $performanceRatio + (1 - $alpha) * $baseWeights[$model];
// 限制权重范围 [0.03, 0.40]
$adapted[$model] = max(0.03, min(0.40, $adapted[$model]));
}
return $adapted;
}
/**
* V4 置信度评估
*
* 基于三个维度:
* 1. 回测命中率稳定性 (40%)
* 2. Top-1 与 Top-5 概率差距 (30%)
* 3. 概率分布的熵 (30%,熵越低置信度越高)
*
* @param array $predictions Top-5 预测
* @param array|null $backtest 回测结果
* @return array
*/
private function _calculateConfidenceV4($predictions, $backtest)
{
$confidence = [];
// 维度1: 回测命中率
$backtestHitRate = $backtest['hit_rate'] ?? 0;
$btScore = $backtestHitRate >= 30 ? 80 : ($backtestHitRate >= 20 ? 60 : ($backtestHitRate >= 10 ? 40 : 20));
// 维度2: Top-1 vs Top-5 概率差距
$top1Prob = $predictions[0]['prob'] ?? 0;
$top5Prob = $predictions[4]['prob'] ?? 0;
$probGap = $top1Prob > 0 ? ($top1Prob - $top5Prob) / $top1Prob : 0;
$gapScore = $probGap > 0.3 ? 80 : ($probGap > 0.15 ? 60 : ($probGap > 0.05 ? 40 : 20));
// 维度3: 概率分布熵(需要传入完整概率分布)
$entropyScore = 50; // 默认中等
$totalScore = $btScore * 0.4 + $gapScore * 0.3 + $entropyScore * 0.3;
foreach ($predictions as $idx => $p) {
$level = $totalScore >= 70 ? 'high' : ($totalScore >= 50 ? 'medium' : 'low');
$confidence[] = [
'num' => $p['num'],
'level' => $level,
'score' => round($totalScore, 1)
];
// 排名越靠后置信度略降
if ($idx > 0) $totalScore *= 0.95;
}
return [
'items' => $confidence,
'overall_score'=> round($totalScore, 1),
'overall_level'=> $totalScore >= 70 ? 'high' : ($totalScore >= 50 ? 'medium' : 'low'),
'data_warning' => ($backtest && ($backtest['total_tests'] ?? 0) < 30) ? '回测期数较少,置信度可能不准确' : null,
];
}
/**
* V4 回测
*/
private function _runBacktestV4($periods, $weights, $testCount = 50, $cutoffTime = null)
{
$query = $this->field('expect,num7,openTime');
if ($cutoffTime) {
$query->where('openTime', '<', $cutoffTime);
}
$totalHistory = $query->order('openTime', 'desc')
->limit($periods + $testCount + 20)
->select();
if (count($totalHistory) < $periods + $testCount) {
return ['hit_rate' => 0, 'avg_rank' => 0, 'details' => [], 'error' => '数据不足'];
}
$hits = 0;
$ranks = [];
$details = [];
for ($i = 0; $i < $testCount; $i++) {
$targetRow = $totalHistory[$i];
$targetExpect = (string)$targetRow['expect'];
$actualNum = (int)$targetRow['num7'];
$predResult = $this->getPredictionV4($periods, $weights, $targetExpect, true, 0);
if (isset($predResult['error']) || empty($predResult['predictions'])) {
continue;
}
$rank = -1;
foreach ($predResult['predictions'] as $idx => $p) {
if ($p['num'] === $actualNum) { $rank = $idx + 1; break; }
}
if ($rank > 0) {
$hits++;
$ranks[] = $rank;
}
$details[] = [
'expect' => $targetExpect,
'actual' => $actualNum,
'predictions' => array_column($predResult['predictions'], 'num'),
'hit' => $rank > 0,
'rank' => $rank
];
}
$hitRate = $testCount > 0 ? round($hits / $testCount * 100, 2) : 0;
$avgRank = count($ranks) > 0 ? round(array_sum($ranks) / count($ranks), 2) : 0;
$ndcg5 = $this->_calculateNDCG($details, 5);
$mrr = $this->_calculateMRR($details);
$hitDistribution = $this->_calculateHitDistribution($details);
$precision5 = $testCount > 0 ? round($hits / ($testCount * 5) * 100, 2) : 0;
return [
'hit_rate' => $hitRate,
'avg_rank' => $avgRank,
'total_tests' => $testCount,
'total_hits' => $hits,
'details' => $details,
'ndcg_5' => $ndcg5,
'mrr' => $mrr,
'hit_distribution'=> $hitDistribution,
'precision_5' => $precision5,
'data_sufficient' => $testCount >= 30,
];
}
}