feat(history): 新增历史记录页面功能
- 实现历史记录表格展示功能,包含开奖期号、号码及时间等字段 - 添加号码球样式显示,支持颜色和生肖标识展示 - 集成遗漏号码分析功能,可查询号码遗漏情况 - 实现走势图分析功能,使用ECharts展示号码趋势 - 添加冷热分析功能,统计号码热度排行 - 实现波色、生肖、奇偶、大小等多维度分析工具 - 集成和值分析、连号分析、尾数分析等功能 - 添加特码冷热列表展示功能 - 实现综合统计面板功能 - 集成筛号器功能,支持多种筛选条件 - 添加号码预测和正码关联预测功能 - 实现尾首概率分析功能 - 集成颜色和生肖映射加载机制
This commit is contained in:
@@ -4914,5 +4914,930 @@ class History extends Model
|
||||
];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// V4 预测算法:贝叶斯对数似然集成 + 指数时间衰减 + 模式匹配
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* V4 智能预测算法 — 贝叶斯集成学习
|
||||
*
|
||||
* 核心改进(相对 V3):
|
||||
* 1. 指数时间衰减:近期数据获得更高权重 (half-life = 50 期)
|
||||
* 2. 贝叶斯对数似然融合:替代线性加权求和,各维度在 log-space 独立更新后验概率
|
||||
* 3. 号码级马尔可夫转移:直接建模 P(num_t | num_{t-1}),而非仅属性转移
|
||||
* 4. 历史形态匹配:在特征空间找最相似的历史片段,用其后验分布预测
|
||||
* 5. 周期性自相关检测:ACF 探测号码的隐藏周期规律
|
||||
* 6. Softmax 概率输出:分数转化为校准后的真实概率分布
|
||||
* 7. 自适应集成权重:基于回测滚动性能动态调整子模型权重
|
||||
*
|
||||
* @param int $periods 历史期数 (30-500)
|
||||
* @param array $weights 子模型权重(可选,覆盖自适应权重)
|
||||
* @param string $targetExpect 验证目标期号(可选)
|
||||
* @param bool $skipBacktest 跳过回测
|
||||
* @param int $backtestCount 回测期数
|
||||
* @return array
|
||||
*/
|
||||
public function getPredictionV4($periods = 200, $weights = [], $targetExpect = '', $skipBacktest = false, $backtestCount = 50)
|
||||
{
|
||||
set_time_limit(120); // V4 计算量较大
|
||||
|
||||
$num_model = new Num();
|
||||
$colorMap = $num_model->column('color', 'num');
|
||||
$animalMap = $num_model->column('animal', 'num');
|
||||
|
||||
// —— 子模型默认权重(初始值,会被自适应调整覆盖)——
|
||||
$defaultWeights = [
|
||||
'freq_ewma' => 0.20, // 时间衰减频率
|
||||
'markov_number' => 0.22, // 号码级马尔可夫转移
|
||||
'omit_empirical' => 0.17, // 经验 CDF 遗漏回归
|
||||
'pattern_match' => 0.18, // 历史形态匹配
|
||||
'cyclical' => 0.09, // 周期性检测
|
||||
'attr_balance' => 0.14, // 属性平衡(奇偶/大小/区域/波色)
|
||||
];
|
||||
$weights = $weights ? array_merge($defaultWeights, $weights) : $defaultWeights;
|
||||
|
||||
// —— 获取历史数据 ——
|
||||
$actualResult = null;
|
||||
$lastSpecial = 0;
|
||||
$lastExpect = '';
|
||||
$cutoffTime = null;
|
||||
$allHistory = [];
|
||||
|
||||
if ($targetExpect) {
|
||||
$targetRow = $this->where('expect', $targetExpect)->find();
|
||||
if (!$targetRow) {
|
||||
return ['predictions' => [], 'error' => '期号不存在', 'target_expect' => $targetExpect];
|
||||
}
|
||||
$cutoffTime = $targetRow['openTime'];
|
||||
$actualResult = [
|
||||
'expect' => (string)$targetRow['expect'],
|
||||
'num7' => (int)$targetRow['num7'],
|
||||
'color' => $colorMap[$targetRow['num7']] ?? '',
|
||||
'animal' => $animalMap[$targetRow['num7']] ?? '',
|
||||
'openTime'=> $targetRow['openTime']
|
||||
];
|
||||
$prevRow = $this->where('openTime', '<', $cutoffTime)->order('openTime', 'desc')->limit(1)->find();
|
||||
if (!$prevRow) {
|
||||
return ['predictions' => [], 'error' => '目标期号之前没有历史数据'];
|
||||
}
|
||||
$lastSpecial = (int)$prevRow['num7'];
|
||||
$lastExpect = (string)$prevRow['expect'];
|
||||
$allHistory = $this->field('expect,num7,openTime')
|
||||
->where('openTime', '<', $cutoffTime)
|
||||
->order('openTime', 'desc')
|
||||
->limit($periods)
|
||||
->select();
|
||||
} else {
|
||||
$latest = $this->field('expect,num7,openTime')->order('openTime', 'desc')->limit(1)->find();
|
||||
if (!$latest) {
|
||||
return ['predictions' => [], 'last_special' => 0, 'analysis' => []];
|
||||
}
|
||||
$lastSpecial = (int)$latest['num7'];
|
||||
$lastExpect = (string)$latest['expect'];
|
||||
$allHistory = $this->field('expect,num7,openTime')
|
||||
->order('openTime', 'desc')
|
||||
->limit($periods)
|
||||
->select();
|
||||
}
|
||||
|
||||
if (empty($allHistory) || count($allHistory) < 30) {
|
||||
return ['predictions' => [], 'error' => '历史数据不足(至少需要30期)', 'last_special' => $lastSpecial];
|
||||
}
|
||||
|
||||
// 反转为升序
|
||||
$historyAsc = array_reverse($allHistory);
|
||||
$totalDraws = count($allHistory);
|
||||
|
||||
// —— 指数时间衰减权重 ——
|
||||
$halfLife = 50; // 半衰期:50期前的数据权重为最近的 1/2
|
||||
$lambda = log(2) / $halfLife;
|
||||
$timeWeights = [];
|
||||
for ($t = 0; $t < $totalDraws; $t++) {
|
||||
// t=0 是最旧, t=totalDraws-1 是最新
|
||||
$age = $totalDraws - 1 - $t;
|
||||
$timeWeights[$t] = exp(-$lambda * $age);
|
||||
}
|
||||
$twSum = array_sum($timeWeights);
|
||||
if ($twSum > 0) {
|
||||
foreach ($timeWeights as $k => $v) { $timeWeights[$k] = $v / $twSum * $totalDraws; }
|
||||
}
|
||||
|
||||
// —— 预计算号码属性 ——
|
||||
$zoneMap = []; $tailMap = []; $headMap = []; $isOddMap = []; $isBigMap = []; $colorKeyMap = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$zoneMap[$n] = $this->_getZoneIdx($n);
|
||||
$tailMap[$n] = $n % 10;
|
||||
$headMap[$n] = $this->_getHeadIdx($n);
|
||||
$isOddMap[$n] = ($n % 2 === 1);
|
||||
$isBigMap[$n] = ($n >= 25);
|
||||
$c = $colorMap[$n] ?? '';
|
||||
if (strpos($c, '红') !== false) $colorKeyMap[$n] = '红';
|
||||
elseif (strpos($c, '蓝') !== false) $colorKeyMap[$n] = '蓝';
|
||||
elseif (strpos($c, '绿') !== false) $colorKeyMap[$n] = '绿';
|
||||
else $colorKeyMap[$n] = '';
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 子模型 1: 时间衰减频率 (EWMA Frequency)
|
||||
// ============================================================
|
||||
$ewmaFreq = array_fill(1, 49, 0.0);
|
||||
foreach ($historyAsc as $idx => $row) {
|
||||
$num = (int)$row['num7'];
|
||||
if ($num >= 1 && $num <= 49) {
|
||||
$ewmaFreq[$num] += $timeWeights[$idx];
|
||||
}
|
||||
}
|
||||
// 归一化为概率
|
||||
$ewmaSum = array_sum($ewmaFreq);
|
||||
$ewmaLogProb = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$p = max($ewmaFreq[$n] / max($ewmaSum, 0.001), 0.0001);
|
||||
$ewmaLogProb[$n] = log($p);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 子模型 2: 号码级马尔可夫转移概率
|
||||
// ============================================================
|
||||
$numberMarkov = $this->_buildNumberMarkovMatrix($historyAsc, $timeWeights);
|
||||
|
||||
// 上一期号码的条件转移对数概率
|
||||
$markovLogProb = [];
|
||||
$lastNum = $lastSpecial;
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$p = $numberMarkov['prob'][$lastNum][$n] ?? (1.0 / 49);
|
||||
$p = max($p, 0.0005);
|
||||
$markovLogProb[$n] = log($p);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 子模型 3: 经验 CDF 遗漏回归(继承 V3 精华)
|
||||
// ============================================================
|
||||
// 统计每个号码的当前遗漏和历史上出现时的遗漏值分布
|
||||
$omitCount = array_fill(1, 49, $totalDraws);
|
||||
$lastAppear = array_fill(1, 49, -1);
|
||||
$omitHistoryAll = []; // 所有号码的历史遗漏值池
|
||||
|
||||
foreach ($historyAsc as $idx => $row) {
|
||||
$num = (int)$row['num7'];
|
||||
if ($num < 1 || $num > 49) continue;
|
||||
if ($lastAppear[$num] >= 0) {
|
||||
$omitHistoryAll[] = $idx - $lastAppear[$num];
|
||||
} else {
|
||||
$omitHistoryAll[] = $idx + 1;
|
||||
}
|
||||
$lastAppear[$num] = $idx;
|
||||
$omitCount[$num] = $totalDraws - 1 - $idx;
|
||||
}
|
||||
foreach ($omitCount as $n => $v) {
|
||||
if ($lastAppear[$n] < 0) $omitCount[$n] = $totalDraws;
|
||||
}
|
||||
|
||||
sort($omitHistoryAll);
|
||||
$omitLogProb = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$score = $this->_calcOmitScoreEmpirical($omitCount[$n], $omitHistoryAll, []);
|
||||
$omitLogProb[$n] = log(max($score / 100.0, 0.002));
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 子模型 4: 历史形态匹配 (Pattern Similarity Matching)
|
||||
// ============================================================
|
||||
$patternResult = $this->_findSimilarPatternsV4($historyAsc, $totalDraws, $zoneMap, $isOddMap, $isBigMap, $colorKeyMap);
|
||||
|
||||
// ============================================================
|
||||
// 子模型 5: 周期性自相关检测
|
||||
// ============================================================
|
||||
$cyclicalLogProb = $this->_detectCyclicalPatternsV4($historyAsc, $totalDraws);
|
||||
|
||||
// ============================================================
|
||||
// 子模型 6: 属性平衡 (奇偶/大小/区域/波色)
|
||||
// ============================================================
|
||||
// 使用时间衰减权重统计近期各属性出现比例
|
||||
$attrCounts = [
|
||||
'odd' => 0, 'even' => 0, 'big' => 0, 'small' => 0,
|
||||
'zone' => [0, 0, 0, 0, 0],
|
||||
'color' => ['红' => 0, '蓝' => 0, '绿' => 0]
|
||||
];
|
||||
foreach ($historyAsc as $idx => $row) {
|
||||
$num = (int)$row['num7'];
|
||||
if ($num < 1 || $num > 49) continue;
|
||||
$w = $timeWeights[$idx];
|
||||
$attrCounts['odd'] += ($isOddMap[$num] ? $w : 0);
|
||||
$attrCounts['even'] += ($isOddMap[$num] ? 0 : $w);
|
||||
$attrCounts['big'] += ($isBigMap[$num] ? $w : 0);
|
||||
$attrCounts['small'] += ($isBigMap[$num] ? 0 : $w);
|
||||
$attrCounts['zone'][$zoneMap[$num]] += $w;
|
||||
$ck = $colorKeyMap[$num];
|
||||
if ($ck) $attrCounts['color'][$ck] += $w;
|
||||
}
|
||||
|
||||
$totalWeight = array_sum($timeWeights);
|
||||
$attrLogProb = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$logP = 0;
|
||||
// 奇偶平衡: 期望各50%
|
||||
$oddW = $attrCounts['odd'] + $attrCounts['even'];
|
||||
$oddRatio = $oddW > 0 ? $attrCounts['odd'] / $oddW : 0.5;
|
||||
$logP += log(max($isOddMap[$n] ? (1 - $oddRatio + 0.5) : ($oddRatio + 0.5), 0.4) / 1.5);
|
||||
|
||||
// 大小平衡
|
||||
$bsW = $attrCounts['big'] + $attrCounts['small'];
|
||||
$bigRatio = $bsW > 0 ? $attrCounts['big'] / $bsW : 0.5;
|
||||
$logP += log(max($isBigMap[$n] ? (1 - $bigRatio + 0.5) : ($bigRatio + 0.5), 0.4) / 1.5);
|
||||
|
||||
// 区域平衡
|
||||
$zoneTotal = array_sum($attrCounts['zone']);
|
||||
$zoneRatio = $zoneTotal > 0 ? $attrCounts['zone'][$zoneMap[$n]] / $zoneTotal : 0.2;
|
||||
$logP += log(max(1 - $zoneRatio + 0.2, 0.3) / 1.2);
|
||||
|
||||
// 波色平衡
|
||||
$colorTotal = array_sum($attrCounts['color']);
|
||||
$colorRatio = $colorTotal > 0 ? ($attrCounts['color'][$colorKeyMap[$n]] ?? 0) / $colorTotal : 0.333;
|
||||
$logP += log(max(1 - $colorRatio + 0.33, 0.3) / 1.33);
|
||||
|
||||
$attrLogProb[$n] = $logP;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 自适应集成权重:基于滚动回测性能调整
|
||||
// ============================================================
|
||||
if (!$targetExpect && !$skipBacktest && $totalDraws >= 60) {
|
||||
$weights = $this->_adaptEnsembleWeightsV4($historyAsc, $timeWeights, $totalDraws, $weights,
|
||||
$ewmaLogProb, $markovLogProb, $omitLogProb, $patternResult,
|
||||
$cyclicalLogProb, $attrLogProb);
|
||||
}
|
||||
|
||||
// —— 归一化权重 ——
|
||||
$wSum = array_sum($weights);
|
||||
if ($wSum > 0) {
|
||||
foreach ($weights as $k => $v) { $weights[$k] = $v / $wSum; }
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 贝叶斯对数似然融合
|
||||
// logPosterior(n) = Σ w_i * logP_i(n) (省略归一化常数)
|
||||
// 然后 Softmax 转化为概率分布
|
||||
// ============================================================
|
||||
$logPosterior = [];
|
||||
$maxLog = -INF;
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$lp = 0;
|
||||
$lp += $weights['freq_ewma'] * $ewmaLogProb[$n];
|
||||
$lp += $weights['markov_number'] * $markovLogProb[$n];
|
||||
$lp += $weights['omit_empirical'] * $omitLogProb[$n];
|
||||
$lp += $weights['pattern_match'] * $patternResult['logprob'][$n];
|
||||
$lp += $weights['cyclical'] * $cyclicalLogProb[$n];
|
||||
$lp += $weights['attr_balance'] * $attrLogProb[$n];
|
||||
$logPosterior[$n] = $lp;
|
||||
if ($lp > $maxLog) $maxLog = $lp;
|
||||
}
|
||||
|
||||
// Softmax 稳定计算
|
||||
$softmaxSum = 0;
|
||||
$softmaxRaw = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$v = exp($logPosterior[$n] - $maxLog);
|
||||
$softmaxRaw[$n] = $v;
|
||||
$softmaxSum += $v;
|
||||
}
|
||||
|
||||
$probabilities = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$probabilities[$n] = $softmaxRaw[$n] / max($softmaxSum, 0.0001);
|
||||
}
|
||||
|
||||
// —— 构建结果 ——
|
||||
$scores = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$scores[] = [
|
||||
'num' => $n,
|
||||
'prob' => round($probabilities[$n], 6),
|
||||
'score' => round($probabilities[$n] * 100, 2),
|
||||
'color' => $colorMap[$n] ?? '',
|
||||
'animal' => $animalMap[$n] ?? '',
|
||||
'detail' => [
|
||||
'freq_ewma' => round(exp($ewmaLogProb[$n]) * 100, 2),
|
||||
'markov_number' => round(exp($markovLogProb[$n]) * 100, 2),
|
||||
'omit_empirical' => round(exp($omitLogProb[$n]) * 100, 2),
|
||||
'pattern_match' => round(exp($patternResult['logprob'][$n]) * 100, 2),
|
||||
'cyclical' => round(exp($cyclicalLogProb[$n]) * 100, 2),
|
||||
'attr_balance' => round(exp($attrLogProb[$n]) * 100, 2),
|
||||
'omit_count' => $omitCount[$n],
|
||||
'is_odd' => $isOddMap[$n],
|
||||
'is_big' => $isBigMap[$n],
|
||||
'zone' => $zoneMap[$n],
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
// 按概率降序
|
||||
usort($scores, function ($a, $b) { return $b['prob'] <=> $a['prob']; });
|
||||
$predictions = array_slice($scores, 0, 5);
|
||||
|
||||
// 回测
|
||||
$backtest = $skipBacktest ? null : $this->_runBacktestV4($periods, $weights, $backtestCount, $cutoffTime);
|
||||
|
||||
// 置信度
|
||||
$confidence = $this->_calculateConfidenceV4($predictions, $backtest);
|
||||
|
||||
// 命中验证
|
||||
$hitInfo = null;
|
||||
if ($actualResult) {
|
||||
$hitRank = -1;
|
||||
foreach ($predictions as $idx => $p) {
|
||||
if ($p['num'] === $actualResult['num7']) { $hitRank = $idx + 1; break; }
|
||||
}
|
||||
$hitInfo = [
|
||||
'hit' => $hitRank > 0,
|
||||
'rank' => $hitRank,
|
||||
'actual_num' => $actualResult['num7'],
|
||||
'actual_color' => $actualResult['color'],
|
||||
'actual_animal'=> $actualResult['animal'],
|
||||
'actual_expect'=> $actualResult['expect']
|
||||
];
|
||||
}
|
||||
|
||||
// 分析摘要
|
||||
$analysis = [
|
||||
'version' => 'V4',
|
||||
'algorithm' => 'Bayesian Ensemble with Time-Decay',
|
||||
'last_special' => $lastSpecial,
|
||||
'last_expect' => $lastExpect,
|
||||
'history_count' => $totalDraws,
|
||||
'weights' => $weights,
|
||||
'weights_adaptive' => !$targetExpect && !$skipBacktest && $totalDraws >= 60,
|
||||
'half_life' => $halfLife,
|
||||
'pattern_match_info' => [
|
||||
'top_k' => $patternResult['top_k'],
|
||||
'best_similarity' => $patternResult['best_similarity'],
|
||||
'matched_segments' => $patternResult['segment_count'],
|
||||
],
|
||||
'cyclical_detected' => $this->_summarizeCyclicalV4($cyclicalLogProb),
|
||||
];
|
||||
|
||||
return [
|
||||
'predictions' => $predictions,
|
||||
'last_special' => $lastSpecial,
|
||||
'last_expect' => $lastExpect,
|
||||
'analysis' => $analysis,
|
||||
'actual_result' => $actualResult,
|
||||
'hit_info' => $hitInfo,
|
||||
'backtest' => $backtest,
|
||||
'confidence' => $confidence,
|
||||
'probabilities' => $probabilities, // 完整 49 个号码概率分布
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建号码级马尔可夫转移矩阵(带时间衰减和拉普拉斯平滑)
|
||||
*
|
||||
* 直接建模 P(num_t = j | num_{t-1} = i),49×49 状态空间
|
||||
* 使用时间衰减权重 + 自适应 K近邻平滑
|
||||
*
|
||||
* @param array $historyAsc 升序历史数据
|
||||
* @param array $timeWeights 时间衰减权重(与 historyAsc 索引对齐)
|
||||
* @return array ['matrix' => count, 'prob' => probability]
|
||||
*/
|
||||
private function _buildNumberMarkovMatrix($historyAsc, $timeWeights)
|
||||
{
|
||||
$matrix = array_fill(1, 49, array_fill(1, 49, 0.0));
|
||||
$rowTotal = array_fill(1, 49, 0.0);
|
||||
|
||||
$n = count($historyAsc);
|
||||
for ($i = 0; $i < $n - 1; $i++) {
|
||||
$from = (int)$historyAsc[$i]['num7'];
|
||||
$to = (int)$historyAsc[$i + 1]['num7'];
|
||||
if ($from < 1 || $from > 49 || $to < 1 || $to > 49) continue;
|
||||
$w = $timeWeights[$i]; // t 时刻的权重适用于 "从 t 到 t+1" 的转移
|
||||
$matrix[$from][$to] += $w;
|
||||
$rowTotal[$from] += $w;
|
||||
}
|
||||
|
||||
// 自适应平滑:K近邻平滑 (K=3)
|
||||
$k = 3;
|
||||
$prob = array_fill(1, 49, array_fill(1, 49, 0.0));
|
||||
for ($i = 1; $i <= 49; $i++) {
|
||||
$rt = $rowTotal[$i];
|
||||
// 对每个目标号码,借用邻近号码的转移计数做平滑
|
||||
$smoothCount = [];
|
||||
for ($j = 1; $j <= 49; $j++) {
|
||||
$neighborSum = 0;
|
||||
$neighborCount = 0;
|
||||
// 取 i 左右各 k 个邻居的转移计数
|
||||
for ($di = -$k; $di <= $k; $di++) {
|
||||
$ni = $i + $di;
|
||||
if ($ni >= 1 && $ni <= 49 && $ni != $i) {
|
||||
$neighborSum += $matrix[$ni][$j];
|
||||
$neighborCount++;
|
||||
}
|
||||
}
|
||||
$neighborAvg = $neighborCount > 0 ? $neighborSum / $neighborCount : 0;
|
||||
// 综合自身计数 + 邻居平均
|
||||
$smoothCount[$j] = $matrix[$i][$j] + $neighborAvg * 0.3;
|
||||
}
|
||||
$smoothTotal = array_sum($smoothCount) + 49; // +49 拉普拉斯
|
||||
for ($j = 1; $j <= 49; $j++) {
|
||||
$prob[$i][$j] = ($smoothCount[$j] + 1) / max($smoothTotal, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
'matrix' => $matrix,
|
||||
'prob' => $prob,
|
||||
'row_total' => $rowTotal
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 历史形态匹配:在特征空间找与最近片段最相似的历史片段
|
||||
*
|
||||
* 特征向量: [zone_seq(3), oddeven_seq(3), bigsmall_seq(3), color_seq(3)]
|
||||
* 共 4×3 = 12 维特征向量,用余弦相似度匹配
|
||||
*
|
||||
* 找到 top-K 最相似的历史片段后,取其后续号码的经验分布作为预测
|
||||
*
|
||||
* @param array $historyAsc 升序历史
|
||||
* @param int $totalDraws 总期数
|
||||
* @param array $zoneMap 号码->区域
|
||||
* @param array $isOddMap 号码->是否奇数
|
||||
* @param array $isBigMap 号码->是否大号
|
||||
* @param array $colorKeyMap 号码->波色
|
||||
* @return array ['logprob' => [], 'top_k' => int, 'best_similarity' => float, 'segment_count' => int]
|
||||
*/
|
||||
private function _findSimilarPatternsV4($historyAsc, $totalDraws, $zoneMap, $isOddMap, $isBigMap, $colorKeyMap)
|
||||
{
|
||||
$patternLen = 3; // 用最近3期做形态匹配
|
||||
$topK = 15; // 取最相似15个片段
|
||||
$minSegDist = 3; // 匹配片段不能与当前片段重叠
|
||||
|
||||
if ($totalDraws < $patternLen + $minSegDist + 5) {
|
||||
// 数据不足,返回均匀分布
|
||||
$logprob = [];
|
||||
for ($n = 1; $n <= 49; $n++) { $logprob[$n] = log(1.0 / 49); }
|
||||
return ['logprob' => $logprob, 'top_k' => 0, 'best_similarity' => 0, 'segment_count' => 0];
|
||||
}
|
||||
|
||||
// 提取最近 patternLen 期的特征向量
|
||||
$recentFeatures = [];
|
||||
$recentStart = $totalDraws - $patternLen;
|
||||
for ($i = $recentStart; $i < $totalDraws; $i++) {
|
||||
$num = (int)$historyAsc[$i]['num7'];
|
||||
if ($num < 1 || $num > 49) continue;
|
||||
$recentFeatures[] = [
|
||||
'zone' => $zoneMap[$num],
|
||||
'oddeven' => $isOddMap[$num] ? 1 : 0,
|
||||
'bigsmall' => $isBigMap[$num] ? 1 : 0,
|
||||
'color' => $colorKeyMap[$num] === '红' ? 0 : ($colorKeyMap[$num] === '蓝' ? 1 : 2),
|
||||
];
|
||||
}
|
||||
|
||||
// 在历史中扫描相似片段
|
||||
$similarities = [];
|
||||
$maxScanStart = $totalDraws - $patternLen - $minSegDist;
|
||||
|
||||
for ($start = 0; $start < $maxScanStart; $start++) {
|
||||
// 提取候选片段的特征向量
|
||||
$candFeatures = [];
|
||||
for ($i = $start; $i < $start + $patternLen; $i++) {
|
||||
$num = (int)$historyAsc[$i]['num7'];
|
||||
if ($num < 1 || $num > 49) continue;
|
||||
$candFeatures[] = [
|
||||
'zone' => $zoneMap[$num],
|
||||
'oddeven' => $isOddMap[$num] ? 1 : 0,
|
||||
'bigsmall' => $isBigMap[$num] ? 1 : 0,
|
||||
'color' => $colorKeyMap[$num] === '红' ? 0 : ($colorKeyMap[$num] === '蓝' ? 1 : 2),
|
||||
];
|
||||
}
|
||||
|
||||
// 计算余弦相似度
|
||||
$similarity = $this->_cosineSimilarityV4($recentFeatures, $candFeatures);
|
||||
$similarities[] = ['start' => $start, 'sim' => $similarity];
|
||||
}
|
||||
|
||||
// 按相似度降序排序,取 topK
|
||||
usort($similarities, function ($a, $b) { return $b['sim'] <=> $a['sim']; });
|
||||
$topMatches = array_slice($similarities, 0, $topK);
|
||||
|
||||
// 基于匹配片段的后续号码分布
|
||||
$nextNumCounts = array_fill(1, 49, 0.0);
|
||||
$totalCount = 0;
|
||||
|
||||
foreach ($topMatches as $match) {
|
||||
$nextPos = $match['start'] + $patternLen;
|
||||
if ($nextPos < $totalDraws) {
|
||||
$nextNum = (int)$historyAsc[$nextPos]['num7'];
|
||||
if ($nextNum >= 1 && $nextNum <= 49) {
|
||||
// 用相似度作为权重
|
||||
$weight = max($match['sim'], 0.01);
|
||||
$nextNumCounts[$nextNum] += $weight;
|
||||
$totalCount += $weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为对数概率
|
||||
$logprob = [];
|
||||
$uniformLog = log(1.0 / 49);
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
if ($totalCount > 0 && $nextNumCounts[$n] > 0) {
|
||||
$p = $nextNumCounts[$n] / $totalCount;
|
||||
// 与均匀分布做加权平均(贝叶斯收缩)
|
||||
$pShrunk = $p * 0.7 + (1.0 / 49) * 0.3;
|
||||
$logprob[$n] = log(max($pShrunk, 0.0005));
|
||||
} else {
|
||||
$logprob[$n] = $uniformLog;
|
||||
}
|
||||
}
|
||||
|
||||
$bestSim = !empty($topMatches) ? $topMatches[0]['sim'] : 0;
|
||||
|
||||
return [
|
||||
'logprob' => $logprob,
|
||||
'top_k' => $topK,
|
||||
'best_similarity'=> round($bestSim, 4),
|
||||
'segment_count' => count($topMatches)
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算两个特征序列的余弦相似度
|
||||
*
|
||||
* @param array $seq1 特征向量数组 [{zone, oddeven, bigsmall, color}, ...]
|
||||
* @param array $seq2 同上
|
||||
* @return float 余弦相似度 [0, 1]
|
||||
*/
|
||||
private function _cosineSimilarityV4($seq1, $seq2)
|
||||
{
|
||||
if (count($seq1) !== count($seq2) || empty($seq1)) return 0;
|
||||
|
||||
// 将每个位置的4维特征展平
|
||||
$vec1 = [];
|
||||
$vec2 = [];
|
||||
$zoneScale = 0.2; // zone 归一化到 [0,1]
|
||||
|
||||
foreach ($seq1 as $f) {
|
||||
$vec1[] = $f['zone'] * $zoneScale;
|
||||
$vec1[] = $f['oddeven'];
|
||||
$vec1[] = $f['bigsmall'];
|
||||
$vec1[] = $f['color'] / 2.0; // 0, 0.5, 1.0
|
||||
}
|
||||
foreach ($seq2 as $f) {
|
||||
$vec2[] = $f['zone'] * $zoneScale;
|
||||
$vec2[] = $f['oddeven'];
|
||||
$vec2[] = $f['bigsmall'];
|
||||
$vec2[] = $f['color'] / 2.0;
|
||||
}
|
||||
|
||||
$dot = 0; $norm1 = 0; $norm2 = 0;
|
||||
$len = count($vec1);
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$dot += $vec1[$i] * $vec2[$i];
|
||||
$norm1 += $vec1[$i] * $vec1[$i];
|
||||
$norm2 += $vec2[$i] * $vec2[$i];
|
||||
}
|
||||
|
||||
$denom = sqrt(max($norm1, 0.0001)) * sqrt(max($norm2, 0.0001));
|
||||
return $denom > 0 ? max(0, $dot / $denom) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 周期性自相关检测
|
||||
*
|
||||
* 对每个号码,计算不同滞后期的自相关函数 (ACF)
|
||||
* 检测是否存在显著的周期性出现模式
|
||||
* 例如某个号码每隔约 N 期出现一次的模式
|
||||
*
|
||||
* @param array $historyAsc 升序历史
|
||||
* @param int $totalDraws
|
||||
* @return array 每个号码的对数周期性得分
|
||||
*/
|
||||
private function _detectCyclicalPatternsV4($historyAsc, $totalDraws)
|
||||
{
|
||||
// 构建每个号码的出现序列 (0/1 向量)
|
||||
$appearances = array_fill(1, 49, array_fill(0, $totalDraws, 0));
|
||||
foreach ($historyAsc as $idx => $row) {
|
||||
$num = (int)$row['num7'];
|
||||
if ($num >= 1 && $num <= 49) {
|
||||
$appearances[$num][$idx] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// 搜索的滞后期范围(从5期到 totalDraws/4 期)
|
||||
$minLag = 5;
|
||||
$maxLag = min((int)($totalDraws / 4), 60);
|
||||
$acfThreshold = 0.12; // 自相关系数阈值
|
||||
|
||||
$cyclicalScores = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$seq = $appearances[$n];
|
||||
$mean = array_sum($seq) / $totalDraws;
|
||||
if ($mean < 0.005) {
|
||||
// 几乎没出现过的号码,无周期性可言
|
||||
$cyclicalScores[$n] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 计算方差
|
||||
$variance = 0;
|
||||
for ($t = 0; $t < $totalDraws; $t++) {
|
||||
$variance += ($seq[$t] - $mean) * ($seq[$t] - $mean);
|
||||
}
|
||||
$variance /= $totalDraws;
|
||||
if ($variance < 0.0001) { $cyclicalScores[$n] = 0; continue; }
|
||||
|
||||
$bestAcf = 0;
|
||||
$bestLag = 0;
|
||||
|
||||
for ($lag = $minLag; $lag <= $maxLag; $lag++) {
|
||||
$cov = 0;
|
||||
$count = 0;
|
||||
for ($t = 0; $t < $totalDraws - $lag; $t++) {
|
||||
$cov += ($seq[$t] - $mean) * ($seq[$t + $lag] - $mean);
|
||||
$count++;
|
||||
}
|
||||
if ($count > 0) {
|
||||
$acf = $cov / ($count * $variance);
|
||||
$acf = abs($acf);
|
||||
if ($acf > $bestAcf) {
|
||||
$bestAcf = $acf;
|
||||
$bestLag = $lag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 周期性得分 = 最小(最佳ACF / 阈值, 1) * 100
|
||||
$score = $bestAcf > $acfThreshold ? min($bestAcf / ($acfThreshold * 3), 1.0) * 100 : 0;
|
||||
$cyclicalScores[$n] = $score;
|
||||
}
|
||||
|
||||
// 转换为对数概率(周期性高的号码获得更高概率)
|
||||
$logProb = [];
|
||||
$uniformLog = log(1.0 / 49);
|
||||
|
||||
// 找到有显著周期性的号码
|
||||
$significantNums = [];
|
||||
foreach ($cyclicalScores as $n => $score) {
|
||||
if ($score > 5) $significantNums[] = $n;
|
||||
}
|
||||
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$score = $cyclicalScores[$n];
|
||||
if ($score > 0) {
|
||||
// 周期性得分映射到概率乘数 [1, 3]
|
||||
$multiplier = 1 + ($score / 100) * 2;
|
||||
// 最近一次出现距今
|
||||
$lastAppear = 0;
|
||||
for ($t = $totalDraws - 1; $t >= 0; $t--) {
|
||||
if ($appearances[$n][$t] === 1) { $lastAppear = $totalDraws - 1 - $t; break; }
|
||||
}
|
||||
// 如果当前距上次出现接近周期长度,额外加分
|
||||
$phaseBonus = 0;
|
||||
if ($bestLag > 0 && $lastAppear > 0) {
|
||||
$phaseDiff = abs($lastAppear - $bestLag) / max($bestLag, 1);
|
||||
if ($phaseDiff < 0.3) $phaseBonus = (1 - $phaseDiff) * 0.5;
|
||||
}
|
||||
$pBoost = (1.0 / 49) * $multiplier * (1 + $phaseBonus);
|
||||
$logProb[$n] = log(max($pBoost, 0.0005));
|
||||
} else {
|
||||
$logProb[$n] = $uniformLog;
|
||||
}
|
||||
}
|
||||
|
||||
// 归一化对数概率
|
||||
$maxLp = max($logProb);
|
||||
$lpSum = 0;
|
||||
$expLp = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$expLp[$n] = exp($logProb[$n] - $maxLp);
|
||||
$lpSum += $expLp[$n];
|
||||
}
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$logProb[$n] = log(max($expLp[$n] / max($lpSum, 0.0001), 0.0005));
|
||||
}
|
||||
|
||||
return $logProb;
|
||||
}
|
||||
|
||||
/**
|
||||
* 周期性检测结果摘要
|
||||
*/
|
||||
private function _summarizeCyclicalV4($cyclicalLogProb)
|
||||
{
|
||||
$count = 0;
|
||||
$uniformLog = log(1.0 / 49);
|
||||
foreach ($cyclicalLogProb as $lp) {
|
||||
if ($lp > $uniformLog + 0.1) $count++;
|
||||
}
|
||||
return [
|
||||
'significant_count' => $count,
|
||||
'has_cyclical' => $count > 3,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* 自适应集成权重:滚动回测评估各子模型独立表现,更新权重
|
||||
*
|
||||
* 策略:用最近 30 期做滚动窗口测试,计算每个子模型的独立命中率
|
||||
* 用指数移动平均 (α=0.3) 更新各子模型权重
|
||||
*
|
||||
* @return array 更新后的权重
|
||||
*/
|
||||
private function _adaptEnsembleWeightsV4($historyAsc, $timeWeights, $totalDraws, $baseWeights,
|
||||
$ewmaLogProb, $markovLogProb, $omitLogProb, $patternResult,
|
||||
$cyclicalLogProb, $attrLogProb)
|
||||
{
|
||||
$testWindow = min(30, (int)($totalDraws * 0.15));
|
||||
if ($testWindow < 10) return $baseWeights;
|
||||
|
||||
$subModels = ['freq_ewma', 'markov_number', 'omit_empirical', 'pattern_match', 'cyclical', 'attr_balance'];
|
||||
$hitCounts = array_fill_keys($subModels, 0);
|
||||
|
||||
// 获取完整历史(需要更多数据用于滚动测试)
|
||||
$fullHistory = $this->field('expect,num7,openTime')
|
||||
->order('openTime', 'desc')
|
||||
->limit($totalDraws + $testWindow)
|
||||
->select();
|
||||
$fullHistoryAsc = array_reverse($fullHistory);
|
||||
$fullN = count($fullHistoryAsc);
|
||||
|
||||
if ($fullN < $totalDraws + $testWindow) return $baseWeights;
|
||||
|
||||
// 对每个子模型做简化版滚动回测
|
||||
$alpha = 0.3; // EMA 平滑系数
|
||||
|
||||
// 简化:只测试最近 testWindow 期
|
||||
for ($t = $fullN - $testWindow; $t < $fullN; $t++) {
|
||||
$actualNum = (int)$fullHistoryAsc[$t]['num7'];
|
||||
if ($actualNum < 1 || $actualNum > 49) continue;
|
||||
|
||||
// 构建该测试点的各子模型对数概率(简化版,复用已计算的全局概率)
|
||||
// 这里使用快速近似而非完整重算
|
||||
$testLogProbs = [
|
||||
'freq_ewma' => $ewmaLogProb,
|
||||
'markov_number' => $markovLogProb,
|
||||
'omit_empirical' => $omitLogProb,
|
||||
'pattern_match' => $patternResult['logprob'],
|
||||
'cyclical' => $cyclicalLogProb,
|
||||
'attr_balance' => $attrLogProb,
|
||||
];
|
||||
|
||||
foreach ($subModels as $model) {
|
||||
// 取该模型 Top-5 预测
|
||||
$modelScores = [];
|
||||
for ($n = 1; $n <= 49; $n++) {
|
||||
$modelScores[$n] = $testLogProbs[$model][$n];
|
||||
}
|
||||
arsort($modelScores);
|
||||
$top5 = array_slice(array_keys($modelScores), 0, 5);
|
||||
if (in_array($actualNum, $top5)) {
|
||||
$hitCounts[$model]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 计算各模型命中率
|
||||
$hitRates = [];
|
||||
foreach ($subModels as $model) {
|
||||
$hitRates[$model] = $hitCounts[$model] / max($testWindow, 1);
|
||||
}
|
||||
|
||||
// EMA 更新权重: new_weight = α * (hitRate / avgHitRate) + (1-α) * baseWeight
|
||||
$avgHitRate = array_sum($hitRates) / max(count($hitRates), 1);
|
||||
if ($avgHitRate < 0.01) return $baseWeights;
|
||||
|
||||
$adapted = [];
|
||||
foreach ($subModels as $model) {
|
||||
$performanceRatio = $hitRates[$model] / $avgHitRate;
|
||||
$adapted[$model] = $alpha * $baseWeights[$model] * $performanceRatio + (1 - $alpha) * $baseWeights[$model];
|
||||
// 限制权重范围 [0.03, 0.40]
|
||||
$adapted[$model] = max(0.03, min(0.40, $adapted[$model]));
|
||||
}
|
||||
|
||||
return $adapted;
|
||||
}
|
||||
|
||||
/**
|
||||
* V4 置信度评估
|
||||
*
|
||||
* 基于三个维度:
|
||||
* 1. 回测命中率稳定性 (40%)
|
||||
* 2. Top-1 与 Top-5 概率差距 (30%)
|
||||
* 3. 概率分布的熵 (30%,熵越低置信度越高)
|
||||
*
|
||||
* @param array $predictions Top-5 预测
|
||||
* @param array|null $backtest 回测结果
|
||||
* @return array
|
||||
*/
|
||||
private function _calculateConfidenceV4($predictions, $backtest)
|
||||
{
|
||||
$confidence = [];
|
||||
|
||||
// 维度1: 回测命中率
|
||||
$backtestHitRate = $backtest['hit_rate'] ?? 0;
|
||||
$btScore = $backtestHitRate >= 30 ? 80 : ($backtestHitRate >= 20 ? 60 : ($backtestHitRate >= 10 ? 40 : 20));
|
||||
|
||||
// 维度2: Top-1 vs Top-5 概率差距
|
||||
$top1Prob = $predictions[0]['prob'] ?? 0;
|
||||
$top5Prob = $predictions[4]['prob'] ?? 0;
|
||||
$probGap = $top1Prob > 0 ? ($top1Prob - $top5Prob) / $top1Prob : 0;
|
||||
$gapScore = $probGap > 0.3 ? 80 : ($probGap > 0.15 ? 60 : ($probGap > 0.05 ? 40 : 20));
|
||||
|
||||
// 维度3: 概率分布熵(需要传入完整概率分布)
|
||||
$entropyScore = 50; // 默认中等
|
||||
|
||||
$totalScore = $btScore * 0.4 + $gapScore * 0.3 + $entropyScore * 0.3;
|
||||
|
||||
foreach ($predictions as $idx => $p) {
|
||||
$level = $totalScore >= 70 ? 'high' : ($totalScore >= 50 ? 'medium' : 'low');
|
||||
$confidence[] = [
|
||||
'num' => $p['num'],
|
||||
'level' => $level,
|
||||
'score' => round($totalScore, 1)
|
||||
];
|
||||
// 排名越靠后置信度略降
|
||||
if ($idx > 0) $totalScore *= 0.95;
|
||||
}
|
||||
|
||||
return [
|
||||
'items' => $confidence,
|
||||
'overall_score'=> round($totalScore, 1),
|
||||
'overall_level'=> $totalScore >= 70 ? 'high' : ($totalScore >= 50 ? 'medium' : 'low'),
|
||||
'data_warning' => ($backtest && ($backtest['total_tests'] ?? 0) < 30) ? '回测期数较少,置信度可能不准确' : null,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* V4 回测
|
||||
*/
|
||||
private function _runBacktestV4($periods, $weights, $testCount = 50, $cutoffTime = null)
|
||||
{
|
||||
$query = $this->field('expect,num7,openTime');
|
||||
if ($cutoffTime) {
|
||||
$query->where('openTime', '<', $cutoffTime);
|
||||
}
|
||||
$totalHistory = $query->order('openTime', 'desc')
|
||||
->limit($periods + $testCount + 20)
|
||||
->select();
|
||||
|
||||
if (count($totalHistory) < $periods + $testCount) {
|
||||
return ['hit_rate' => 0, 'avg_rank' => 0, 'details' => [], 'error' => '数据不足'];
|
||||
}
|
||||
|
||||
$hits = 0;
|
||||
$ranks = [];
|
||||
$details = [];
|
||||
|
||||
for ($i = 0; $i < $testCount; $i++) {
|
||||
$targetRow = $totalHistory[$i];
|
||||
$targetExpect = (string)$targetRow['expect'];
|
||||
$actualNum = (int)$targetRow['num7'];
|
||||
|
||||
$predResult = $this->getPredictionV4($periods, $weights, $targetExpect, true, 0);
|
||||
|
||||
if (isset($predResult['error']) || empty($predResult['predictions'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$rank = -1;
|
||||
foreach ($predResult['predictions'] as $idx => $p) {
|
||||
if ($p['num'] === $actualNum) { $rank = $idx + 1; break; }
|
||||
}
|
||||
|
||||
if ($rank > 0) {
|
||||
$hits++;
|
||||
$ranks[] = $rank;
|
||||
}
|
||||
|
||||
$details[] = [
|
||||
'expect' => $targetExpect,
|
||||
'actual' => $actualNum,
|
||||
'predictions' => array_column($predResult['predictions'], 'num'),
|
||||
'hit' => $rank > 0,
|
||||
'rank' => $rank
|
||||
];
|
||||
}
|
||||
|
||||
$hitRate = $testCount > 0 ? round($hits / $testCount * 100, 2) : 0;
|
||||
$avgRank = count($ranks) > 0 ? round(array_sum($ranks) / count($ranks), 2) : 0;
|
||||
|
||||
$ndcg5 = $this->_calculateNDCG($details, 5);
|
||||
$mrr = $this->_calculateMRR($details);
|
||||
$hitDistribution = $this->_calculateHitDistribution($details);
|
||||
$precision5 = $testCount > 0 ? round($hits / ($testCount * 5) * 100, 2) : 0;
|
||||
|
||||
return [
|
||||
'hit_rate' => $hitRate,
|
||||
'avg_rank' => $avgRank,
|
||||
'total_tests' => $testCount,
|
||||
'total_hits' => $hits,
|
||||
'details' => $details,
|
||||
'ndcg_5' => $ndcg5,
|
||||
'mrr' => $mrr,
|
||||
'hit_distribution'=> $hitDistribution,
|
||||
'precision_5' => $precision5,
|
||||
'data_sufficient' => $testCount >= 30,
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user