Full implementation of EPA kernel-weighted local median smoothing with interactive dashboard generation.
"""
EPA Kernel-Weighted Local Median Smoothing - Comprehensive Interactive Dashboard
Generates a single self-contained HTML file with:
- Interactive sliders for bandwidth, outlier fraction, noise
- Multiple kernel options (Epanechnikov, Gaussian, Uniform, Triangular)
- Multiple signal presets (Sin+Cos, Step, Ramp, Sawtooth, Gaussian)
- Real-time plot updates
- Animation of the smoothing process
- MathJax-rendered formulas
- Metrics comparison panel
Academic Reference:
"A New EMD Approach" - Empirical Mode Decomposition with Local Median Smoothing
Authors: Yezhou Sha, Volodia Spokoiny, Wolfgang Karl Hardle,
David Siang-Li Jheng, Marc-Eduard Ionescu, Daniel Traian Pele
Affiliations: Humboldt-Universitat zu Berlin, MSCA Digital Finance,
Bucharest University of Economic Studies
This implementation follows the Local Median approach from the presentation,
using Epanechnikov kernel-weighted median for robust signal decomposition.
Related resources:
- Theory: https://digital-ai-finance.github.io/emd_local_median/
- Dashboard: https://digital-ai-finance.github.io/epa_smoothing/dashboard.html
- QuantLet: https://github.com/QuantLet/Crypto_Currency_Returns
"""
import numpy as np
import json
from pathlib import Path
# =============================================================================
# Kernel Functions
# =============================================================================
def epanechnikov_kernel(u):
"""EPA kernel: K(u) = 0.75(1 - u^2) for |u| <= 1"""
weights = np.zeros_like(u, dtype=float)
mask = np.abs(u) <= 1
weights[mask] = 0.75 * (1 - u[mask]**2)
return weights
def gaussian_kernel(u):
"""Gaussian kernel: K(u) = exp(-u^2/2) / sqrt(2*pi), truncated at |u|<=3"""
weights = np.zeros_like(u, dtype=float)
mask = np.abs(u) <= 3
weights[mask] = np.exp(-u[mask]**2 / 2) / np.sqrt(2 * np.pi)
return weights
def uniform_kernel(u):
"""Uniform (box) kernel: K(u) = 0.5 for |u| <= 1"""
weights = np.zeros_like(u, dtype=float)
mask = np.abs(u) <= 1
weights[mask] = 0.5
return weights
def triangular_kernel(u):
"""Triangular kernel: K(u) = 1 - |u| for |u| <= 1"""
weights = np.zeros_like(u, dtype=float)
mask = np.abs(u) <= 1
weights[mask] = 1 - np.abs(u[mask])
return weights
def get_kernel(name):
"""Return kernel function by name."""
kernels = {
'epanechnikov': epanechnikov_kernel,
'gaussian': gaussian_kernel,
'uniform': uniform_kernel,
'triangular': triangular_kernel
}
return kernels.get(name, epanechnikov_kernel)
# =============================================================================
# Signal Generation
# =============================================================================
def generate_signal(x, signal_type='sincos'):
"""Generate various test signals."""
if signal_type == 'sincos':
return np.sin(x) + 0.5 * np.cos(2 * x)
elif signal_type == 'step':
return np.where(x < np.pi, -0.5, 0.5)
elif signal_type == 'ramp':
return (x - np.pi) / np.pi
elif signal_type == 'sawtooth':
return 2 * ((x / (2 * np.pi)) - np.floor(x / (2 * np.pi) + 0.5))
elif signal_type == 'gaussian':
return np.exp(-((x - np.pi)**2) / 2)
else:
return np.sin(x) + 0.5 * np.cos(2 * x)
# =============================================================================
# Core Smoothing Functions
# =============================================================================
def weighted_median(values, weights):
"""Compute weighted median with proper edge case handling."""
if len(values) == 0 or np.sum(weights) == 0:
return np.nan
# Sort by values
sorted_indices = np.argsort(values)
sorted_values = values[sorted_indices]
sorted_weights = weights[sorted_indices]
# Normalize weights
total_weight = np.sum(sorted_weights)
cumulative_weights = np.cumsum(sorted_weights) / total_weight
# Find median with linear interpolation for edge case
idx = np.searchsorted(cumulative_weights, 0.5)
if idx >= len(sorted_values):
return sorted_values[-1]
elif idx == 0:
return sorted_values[0]
elif np.isclose(cumulative_weights[idx-1], 0.5):
# Exact 0.5 - interpolate
return 0.5 * (sorted_values[idx-1] + sorted_values[idx])
else:
return sorted_values[idx]
def epa_local_median(x, y, bandwidth, kernel_name='epanechnikov'):
"""Kernel-weighted local median smoother."""
kernel_func = get_kernel(kernel_name)
n = len(x)
y_smooth = np.zeros(n)
for i in range(n):
u = (x - x[i]) / bandwidth
weights = kernel_func(u)
y_smooth[i] = weighted_median(y, weights)
return y_smooth
def epa_local_mean(x, y, bandwidth, kernel_name='epanechnikov'):
"""Kernel-weighted local mean (Nadaraya-Watson)."""
kernel_func = get_kernel(kernel_name)
n = len(x)
y_smooth = np.zeros(n)
for i in range(n):
u = (x - x[i]) / bandwidth
weights = kernel_func(u)
total = np.sum(weights)
if total > 0:
y_smooth[i] = np.sum(weights * y) / total
else:
y_smooth[i] = np.nan
return y_smooth
# =============================================================================
# Data Generation
# =============================================================================
def generate_data(n, noise_std, outlier_frac, signal_type='sincos', seed=42):
"""Generate noisy curve with outliers."""
np.random.seed(seed)
x = np.linspace(0, 2 * np.pi, n)
y_true = generate_signal(x, signal_type)
y_noisy = y_true + np.random.normal(0, noise_std, n)
n_outliers = int(n * outlier_frac)
if n_outliers > 0:
outlier_idx = np.random.choice(n, n_outliers, replace=False)
outlier_signs = np.random.choice([-1, 1], n_outliers)
y_noisy[outlier_idx] += outlier_signs * 4.0
return x, y_true, y_noisy
# =============================================================================
# Pre-computation
# =============================================================================
def precompute_all_data():
"""Pre-compute smoothed curves for all parameter combinations."""
n = 150
kernels = ['epanechnikov', 'gaussian', 'uniform', 'triangular']
signals = ['sincos', 'step', 'ramp', 'sawtooth', 'gaussian']
bandwidths = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0, 1.2, 1.5]
outlier_fracs = [0.0, 0.02, 0.05, 0.08, 0.10, 0.15, 0.20]
noise_stds = [0.15, 0.25, 0.35, 0.5]
# Base x values
x = np.linspace(0, 2 * np.pi, n)
x_list = x.tolist()
# Pre-compute true signals for each type
true_signals = {}
for sig in signals:
true_signals[sig] = generate_signal(x, sig).tolist()
# Data structure
data = {
'x': x_list,
'true_signals': true_signals,
'n': n,
'kernels': kernels,
'signals': signals,
'bandwidths': bandwidths,
'outlier_fracs': outlier_fracs,
'noise_stds': noise_stds,
'results': {}
}
total = len(kernels) * len(signals) * len(noise_stds) * len(outlier_fracs) * len(bandwidths)
count = 0
for kernel in kernels:
for signal in signals:
y_true = np.array(true_signals[signal])
for noise in noise_stds:
for outlier in outlier_fracs:
# Generate noisy data
_, _, y_noisy = generate_data(n, noise, outlier, signal, seed=42)
for bw in bandwidths:
count += 1
if count % 500 == 0:
print(f" Progress: {count}/{total} ({100*count/total:.1f}%)")
# Key format: kernel_signal_noise_outlier_bw
key = f"{kernel}_{signal}_{noise}_{outlier}_{bw}"
# Compute smoothed curves
y_median = epa_local_median(x, y_noisy, bw, kernel)
y_mean = epa_local_mean(x, y_noisy, bw, kernel)
# Compute metrics
rmse_median = float(np.sqrt(np.mean((y_median - y_true)**2)))
rmse_mean = float(np.sqrt(np.mean((y_mean - y_true)**2)))
mae_median = float(np.mean(np.abs(y_median - y_true)))
mae_mean = float(np.mean(np.abs(y_mean - y_true)))
data['results'][key] = {
'y_noisy': y_noisy.tolist(),
'median': y_median.tolist(),
'mean': y_mean.tolist(),
'rmse_median': rmse_median,
'rmse_mean': rmse_mean,
'mae_median': mae_median,
'mae_mean': mae_mean
}
print(f" Completed: {count} combinations")
return data
# =============================================================================
# HTML Generation
# =============================================================================
def generate_html():
"""Generate the complete interactive HTML dashboard."""
print("Pre-computing all parameter combinations...")
data = precompute_all_data()
data_json = json.dumps(data)
html = f'''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EPA Kernel-Weighted Local Median Smoothing</title>
<script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<style>
* {{
box-sizing: border-box;
margin: 0;
padding: 0;
}}
body {{
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}}
.container {{
max-width: 1400px;
margin: 0 auto;
}}
.header {{
text-align: center;
color: white;
margin-bottom: 20px;
}}
.header h1 {{
font-size: 2em;
margin-bottom: 10px;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}}
.header p {{
font-size: 1.1em;
opacity: 0.9;
}}
.panel {{
background: white;
border-radius: 12px;
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
padding: 20px;
margin-bottom: 20px;
}}
.controls {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
align-items: end;
}}
.control-group {{
display: flex;
flex-direction: column;
}}
.control-group label {{
font-weight: 600;
color: #333;
margin-bottom: 8px;
font-size: 0.9em;
}}
.control-group select {{
width: 100%;
padding: 10px 12px;
border: 2px solid #e0e0e0;
border-radius: 8px;
font-size: 1em;
background: white;
cursor: pointer;
transition: border-color 0.3s;
}}
.control-group select:hover {{
border-color: #667eea;
}}
.control-group select:focus {{
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
}}
.main-content {{
display: grid;
grid-template-columns: 1fr 320px;
gap: 20px;
}}
.metrics-panel {{
display: flex;
flex-direction: column;
gap: 15px;
}}
.metric-card {{
background: linear-gradient(135deg, #f5f7fa 0%, #e4e8ec 100%);
border-radius: 10px;
padding: 15px;
text-align: center;
}}
.metric-card h3 {{
font-size: 0.9em;
color: #666;
margin-bottom: 8px;
}}
.metric-value {{
font-size: 1.8em;
font-weight: bold;
}}
.metric-value.green {{ color: #2ca02c; }}
.metric-value.red {{ color: #d62728; }}
.metric-value.blue {{ color: #1f77b4; }}
.improvement {{
font-size: 0.85em;
margin-top: 5px;
}}
.improvement.positive {{ color: #2ca02c; }}
.improvement.negative {{ color: #d62728; }}
.math-panel {{
background: #f8f9fa;
border-left: 4px solid #667eea;
padding: 20px;
margin-top: 10px;
}}
.math-panel h3 {{
color: #333;
margin-bottom: 15px;
}}
.formula-row {{
display: flex;
justify-content: space-around;
flex-wrap: wrap;
gap: 20px;
}}
.formula-item {{
text-align: center;
padding: 10px;
}}
.formula-item .label {{
font-size: 0.85em;
color: #666;
margin-bottom: 5px;
}}
.tabs {{
display: flex;
border-bottom: 2px solid #e0e0e0;
margin-bottom: 15px;
flex-wrap: wrap;
}}
.tab {{
padding: 10px 20px;
cursor: pointer;
border-bottom: 3px solid transparent;
transition: all 0.3s;
font-weight: 500;
}}
.tab:hover {{
background: #f5f5f5;
}}
.tab.active {{
border-bottom-color: #667eea;
color: #667eea;
}}
.tab-content {{
display: none;
}}
.tab-content.active {{
display: block;
}}
.animation-controls {{
display: flex;
gap: 10px;
justify-content: center;
margin-top: 15px;
flex-wrap: wrap;
}}
.btn {{
padding: 10px 25px;
border: none;
border-radius: 6px;
cursor: pointer;
font-weight: 600;
transition: all 0.3s;
}}
.btn-primary {{
background: #667eea;
color: white;
}}
.btn-primary:hover {{
background: #5a6fd6;
}}
.btn-secondary {{
background: #e0e0e0;
color: #333;
}}
.btn-secondary:hover {{
background: #d0d0d0;
}}
.legend-container {{
text-align: center;
margin-top: 10px;
display: flex;
flex-wrap: wrap;
justify-content: center;
gap: 10px;
}}
.legend-item {{
display: inline-flex;
align-items: center;
font-size: 0.9em;
}}
.legend-color {{
width: 30px;
height: 4px;
margin-right: 8px;
border-radius: 2px;
}}
.insight-box {{
background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%);
border-radius: 8px;
padding: 15px;
margin-top: 15px;
}}
.insight-box h4 {{
color: #2e7d32;
margin-bottom: 8px;
}}
.insight-box p {{
color: #1b5e20;
font-size: 0.95em;
}}
/* Responsive styles */
@media (max-width: 1000px) {{
.main-content {{
grid-template-columns: 1fr;
}}
.metrics-panel {{
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 10px;
}}
}}
@media (max-width: 768px) {{
body {{
padding: 10px;
}}
.header h1 {{
font-size: 1.4em;
}}
.header p {{
font-size: 0.95em;
}}
.controls {{
grid-template-columns: 1fr 1fr;
gap: 10px;
}}
.panel {{
padding: 15px;
}}
#main-plot {{
height: 300px !important;
}}
.metrics-panel {{
grid-template-columns: repeat(2, 1fr);
}}
.metric-card {{
padding: 10px;
}}
.metric-value {{
font-size: 1.4em;
}}
.tab {{
padding: 8px 12px;
font-size: 0.9em;
}}
.formula-row {{
flex-direction: column;
gap: 10px;
}}
}}
@media (max-width: 500px) {{
.controls {{
grid-template-columns: 1fr;
}}
.metrics-panel {{
grid-template-columns: 1fr 1fr;
}}
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>EPA Kernel-Weighted Local Median Smoothing</h1>
<p>Robust nonparametric smoothing that resists outliers</p>
</div>
<div class="panel">
<div class="controls">
<div class="control-group">
<label>Kernel</label>
<select id="kernel">
<option value="epanechnikov">Epanechnikov</option>
<option value="gaussian">Gaussian</option>
<option value="uniform">Uniform (Box)</option>
<option value="triangular">Triangular</option>
</select>
</div>
<div class="control-group">
<label>Signal</label>
<select id="signal">
<option value="sincos">Sin + Cos</option>
<option value="step">Step Function</option>
<option value="ramp">Ramp</option>
<option value="sawtooth">Sawtooth</option>
<option value="gaussian">Gaussian Bump</option>
</select>
</div>
<div class="control-group">
<label>Bandwidth (h)</label>
<select id="bandwidth">
<option value="0.2">0.2</option>
<option value="0.3">0.3</option>
<option value="0.4">0.4</option>
<option value="0.5" selected>0.5</option>
<option value="0.6">0.6</option>
<option value="0.7">0.7</option>
<option value="0.8">0.8</option>
<option value="1.0">1.0</option>
<option value="1.2">1.2</option>
<option value="1.5">1.5</option>
</select>
</div>
<div class="control-group">
<label>Outlier Fraction</label>
<select id="outliers">
<option value="0">0%</option>
<option value="0.02">2%</option>
<option value="0.05">5%</option>
<option value="0.08" selected>8%</option>
<option value="0.1">10%</option>
<option value="0.15">15%</option>
<option value="0.2">20%</option>
</select>
</div>
<div class="control-group">
<label>Noise Level</label>
<select id="noise">
<option value="0.15">0.15 (Low)</option>
<option value="0.25" selected>0.25 (Medium)</option>
<option value="0.35">0.35 (High)</option>
<option value="0.5">0.50 (Very High)</option>
</select>
</div>
</div>
</div>
<div class="main-content">
<div class="panel">
<div id="main-plot" style="width:100%;height:450px;"></div>
<div class="legend-container">
<span class="legend-item"><span class="legend-color" style="background:#999;"></span>Noisy Data</span>
<span class="legend-item"><span class="legend-color" style="background:#000;border-style:dashed;"></span>True Function</span>
<span class="legend-item"><span class="legend-color" style="background:#2ca02c;"></span>Kernel Median</span>
<span class="legend-item"><span class="legend-color" style="background:#d62728;"></span>Kernel Mean</span>
</div>
</div>
<div class="metrics-panel">
<div class="metric-card">
<h3>RMSE - Median</h3>
<div class="metric-value green" id="rmse-median">0.000</div>
</div>
<div class="metric-card">
<h3>RMSE - Mean</h3>
<div class="metric-value red" id="rmse-mean">0.000</div>
</div>
<div class="metric-card">
<h3>Median Improvement</h3>
<div class="metric-value blue" id="improvement">0%</div>
<div class="improvement" id="improvement-text">vs Mean smoother</div>
</div>
<div class="metric-card">
<h3>MAE - Median</h3>
<div class="metric-value green" id="mae-median">0.000</div>
</div>
<div class="metric-card">
<h3>MAE - Mean</h3>
<div class="metric-value red" id="mae-mean">0.000</div>
</div>
</div>
</div>
<div class="panel">
<div class="math-panel">
<h3>Kernel Formulas</h3>
<div class="formula-row" id="formula-row">
<div class="formula-item">
<div class="label">Epanechnikov Kernel</div>
<div>\\( K(u) = \\frac{{3}}{{4}}(1 - u^2) \\cdot \\mathbf{{1}}_{{|u| \\leq 1}} \\)</div>
</div>
<div class="formula-item">
<div class="label">Scaled Distance</div>
<div>\\( u_i = \\frac{{x_i - x_0}}{{h}} \\)</div>
</div>
<div class="formula-item">
<div class="label">Weighted Median</div>
<div>\\( \\hat{{y}}(x_0) = \\text{{wmedian}}\\{{y_i : w_i = K(u_i)\\}} \\)</div>
</div>
</div>
</div>
<div class="insight-box" id="insight-box">
<h4>Key Insight</h4>
<p id="insight-text">The weighted median ignores the magnitude of outliers - only their position in the sorted order matters. This makes it robust to extreme values that would heavily influence the weighted mean.</p>
</div>
</div>
<div class="panel">
<div class="tabs">
<div class="tab active" onclick="showTab('kernel')">Kernel Shape</div>
<div class="tab" onclick="showTab('bandwidth')">Bandwidth Comparison</div>
<div class="tab" onclick="showTab('residuals')">Residual Analysis</div>
<div class="tab" onclick="showTab('animation')">Step-by-Step Animation</div>
</div>
<div id="kernel-tab" class="tab-content active">
<div id="kernel-plot" style="width:100%;height:350px;"></div>
</div>
<div id="bandwidth-tab" class="tab-content">
<div id="bandwidth-plot" style="width:100%;height:350px;"></div>
</div>
<div id="residuals-tab" class="tab-content">
<div id="residuals-plot" style="width:100%;height:350px;"></div>
</div>
<div id="animation-tab" class="tab-content">
<div id="animation-plot" style="width:100%;height:350px;"></div>
<div class="animation-controls">
<button class="btn btn-primary" onclick="startAnimation()">Play Animation</button>
<button class="btn btn-secondary" onclick="stopAnimation()">Stop</button>
<button class="btn btn-secondary" onclick="resetAnimation()">Reset</button>
</div>
<div style="text-align:center;margin-top:10px;">
<span id="anim-status">Point: 0 / 150</span>
</div>
</div>
</div>
</div>
<script>
// Pre-computed data
const DATA = {data_json};
// Current state
let currentKernel = 'epanechnikov';
let currentSignal = 'sincos';
let currentBandwidth = 0.5;
let currentOutliers = 0.08;
let currentNoise = 0.25;
let animationInterval = null;
let animationIndex = 0;
// Kernel formulas for display
const kernelFormulas = {{
'epanechnikov': {{
name: 'Epanechnikov Kernel',
formula: '\\\\( K(u) = \\\\frac{{3}}{{4}}(1 - u^2) \\\\cdot \\\\mathbf{{1}}_{{|u| \\\\leq 1}} \\\\)'
}},
'gaussian': {{
name: 'Gaussian Kernel',
formula: '\\\\( K(u) = \\\\frac{{1}}{{\\\\sqrt{{2\\\\pi}}}} e^{{-u^2/2}} \\\\cdot \\\\mathbf{{1}}_{{|u| \\\\leq 3}} \\\\)'
}},
'uniform': {{
name: 'Uniform (Box) Kernel',
formula: '\\\\( K(u) = \\\\frac{{1}}{{2}} \\\\cdot \\\\mathbf{{1}}_{{|u| \\\\leq 1}} \\\\)'
}},
'triangular': {{
name: 'Triangular Kernel',
formula: '\\\\( K(u) = (1 - |u|) \\\\cdot \\\\mathbf{{1}}_{{|u| \\\\leq 1}} \\\\)'
}}
}};
// Get current data key
function getDataKey() {{
return `${{currentKernel}}_${{currentSignal}}_${{currentNoise}}_${{currentOutliers}}_${{currentBandwidth}}`;
}}
// Update main plot
function updateMainPlot() {{
const key = getDataKey();
const result = DATA.results[key];
const y_true = DATA.true_signals[currentSignal];
if (!result) {{
console.error('Data not found for key:', key);
return;
}}
const traces = [
{{
x: DATA.x,
y: result.y_noisy,
mode: 'markers',
name: 'Noisy Data',
marker: {{ size: 5, color: '#999', opacity: 0.6 }}
}},
{{
x: DATA.x,
y: y_true,
mode: 'lines',
name: 'True Function',
line: {{ color: 'black', width: 2, dash: 'dash' }}
}},
{{
x: DATA.x,
y: result.median,
mode: 'lines',
name: 'Kernel Median',
line: {{ color: '#2ca02c', width: 3 }}
}},
{{
x: DATA.x,
y: result.mean,
mode: 'lines',
name: 'Kernel Mean',
line: {{ color: '#d62728', width: 3 }}
}}
];
const kernelName = currentKernel.charAt(0).toUpperCase() + currentKernel.slice(1);
const signalNames = {{
'sincos': 'Sin + Cos',
'step': 'Step Function',
'ramp': 'Ramp',
'sawtooth': 'Sawtooth',
'gaussian': 'Gaussian Bump'
}};
const layout = {{
title: `${{kernelName}} Kernel: Median vs Mean (${{signalNames[currentSignal]}})`,
xaxis: {{ title: 'x' }},
yaxis: {{ title: 'y' }},
showlegend: false,
margin: {{ t: 50, b: 50, l: 50, r: 20 }}
}};
Plotly.react('main-plot', traces, layout);
// Update metrics
document.getElementById('rmse-median').textContent = result.rmse_median.toFixed(3);
document.getElementById('rmse-mean').textContent = result.rmse_mean.toFixed(3);
document.getElementById('mae-median').textContent = result.mae_median.toFixed(3);
document.getElementById('mae-mean').textContent = result.mae_mean.toFixed(3);
const improvement = ((result.rmse_mean - result.rmse_median) / result.rmse_mean * 100);
document.getElementById('improvement').textContent = improvement.toFixed(1) + '%';
const improvementEl = document.getElementById('improvement');
if (improvement > 0) {{
improvementEl.classList.remove('red');
improvementEl.classList.add('green');
document.getElementById('improvement-text').textContent = 'better than Mean';
document.getElementById('improvement-text').className = 'improvement positive';
}} else {{
improvementEl.classList.remove('green');
improvementEl.classList.add('red');
document.getElementById('improvement-text').textContent = 'worse than Mean';
document.getElementById('improvement-text').className = 'improvement negative';
}}
// Update insight
updateInsight(improvement, currentOutliers, currentSignal);
// Update formula display
updateFormulaDisplay();
}}
function updateFormulaDisplay() {{
const info = kernelFormulas[currentKernel];
const formulaRow = document.getElementById('formula-row');
formulaRow.innerHTML = `
<div class="formula-item">
<div class="label">${{info.name}}</div>
<div>${{info.formula}}</div>
</div>
<div class="formula-item">
<div class="label">Scaled Distance</div>
<div>\\\\( u_i = \\\\frac{{x_i - x_0}}{{h}} \\\\)</div>
</div>
<div class="formula-item">
<div class="label">Weighted Median</div>
<div>\\\\( \\\\hat{{y}}(x_0) = \\\\text{{wmedian}}\\\\{{y_i : w_i = K(u_i)\\\\}} \\\\)</div>
</div>
`;
// Re-render MathJax
if (window.MathJax) {{
MathJax.typesetPromise([formulaRow]);
}}
}}
function updateInsight(improvement, outlierFrac, signalType) {{
const insightEl = document.getElementById('insight-text');
const signalNotes = {{
'step': 'Step functions are challenging because they have discontinuities. ',
'ramp': 'Linear ramps are easy to smooth but boundary effects can be visible. ',
'sawtooth': 'Sawtooth waves test how smoothers handle periodic discontinuities. ',
'gaussian': 'Gaussian bumps are smooth, so both methods perform similarly. ',
'sincos': ''
}};
let baseInsight = signalNotes[signalType] || '';
if (outlierFrac < 0.02) {{
insightEl.textContent = baseInsight + "With no outliers, both methods perform similarly. The median's robustness provides no advantage here.";
}} else if (improvement > 30) {{
insightEl.textContent = baseInsight + `With ${{(outlierFrac*100).toFixed(0)}}% outliers, the median smoother shows ${{improvement.toFixed(0)}}% lower error! Outlier magnitude doesn't affect the weighted median.`;
}} else if (improvement > 10) {{
insightEl.textContent = baseInsight + `The median smoother is more robust to the ${{(outlierFrac*100).toFixed(0)}}% outliers, achieving ${{improvement.toFixed(0)}}% lower RMSE.`;
}} else {{
insightEl.textContent = baseInsight + "At this setting, both smoothers perform comparably. Try increasing the outlier fraction to see the median's robustness advantage.";
}}
}}
// Kernel shape plot - shows all kernels with current one highlighted
function plotKernel() {{
const traces = [];
const kernelColors = {{
'epanechnikov': '#667eea',
'gaussian': '#ff7f0e',
'uniform': '#2ca02c',
'triangular': '#d62728'
}};
// Generate kernel data for each type
const u = [];
for (let i = -2; i <= 2; i += 0.01) {{
u.push(i);
}}
// Epanechnikov
const k_epa = u.map(ui => Math.abs(ui) <= 1 ? 0.75 * (1 - ui*ui) : 0);
// Gaussian (truncated at 3)
const k_gauss = u.map(ui => Math.abs(ui) <= 3 ? Math.exp(-ui*ui/2) / Math.sqrt(2*Math.PI) : 0);
// Uniform
const k_uniform = u.map(ui => Math.abs(ui) <= 1 ? 0.5 : 0);
// Triangular
const k_tri = u.map(ui => Math.abs(ui) <= 1 ? 1 - Math.abs(ui) : 0);
const allKernels = {{
'epanechnikov': k_epa,
'gaussian': k_gauss,
'uniform': k_uniform,
'triangular': k_tri
}};
// Add all kernels, highlight current one
Object.keys(allKernels).forEach(kernel => {{
const isCurrent = kernel === currentKernel;
traces.push({{
x: u,
y: allKernels[kernel],
mode: 'lines',
fill: isCurrent ? 'tozeroy' : 'none',
fillcolor: isCurrent ? 'rgba(102, 126, 234, 0.3)' : 'transparent',
line: {{
color: kernelColors[kernel],
width: isCurrent ? 3 : 1.5,
dash: isCurrent ? 'solid' : 'dot'
}},
name: kernel.charAt(0).toUpperCase() + kernel.slice(1),
opacity: isCurrent ? 1 : 0.5
}});
}});
const layout = {{
title: 'Kernel Comparison (Current: ' + currentKernel.charAt(0).toUpperCase() + currentKernel.slice(1) + ')',
xaxis: {{ title: 'u = (x - x0) / h', zeroline: true, range: [-2, 2] }},
yaxis: {{ title: 'K(u)', range: [0, 1.1] }},
legend: {{ x: 0.02, y: 0.98 }},
margin: {{ t: 50, b: 50, l: 50, r: 20 }}
}};
Plotly.react('kernel-plot', traces, layout);
}}
// Bandwidth comparison plot
function plotBandwidthComparison() {{
const bws = [0.2, 0.4, 0.7, 1.0];
const colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#9467bd'];
const y_true = DATA.true_signals[currentSignal];
// Get noisy data from first bandwidth
const firstKey = `${{currentKernel}}_${{currentSignal}}_${{currentNoise}}_${{currentOutliers}}_${{bws[0]}}`;
const firstResult = DATA.results[firstKey];
const traces = [
{{
x: DATA.x,
y: firstResult ? firstResult.y_noisy : [],
mode: 'markers',
name: 'Noisy Data',
marker: {{ size: 4, color: '#ccc' }}
}},
{{
x: DATA.x,
y: y_true,
mode: 'lines',
name: 'True',
line: {{ color: 'black', width: 2, dash: 'dash' }}
}}
];
bws.forEach((bw, i) => {{
const key = `${{currentKernel}}_${{currentSignal}}_${{currentNoise}}_${{currentOutliers}}_${{bw}}`;
const result = DATA.results[key];
if (result) {{
traces.push({{
x: DATA.x,
y: result.median,
mode: 'lines',
name: `h=${{bw}} (RMSE=${{result.rmse_median.toFixed(3)}})`,
line: {{ color: colors[i], width: 2.5 }}
}});
}}
}});
const layout = {{
title: 'Effect of Bandwidth on Kernel Median',
xaxis: {{ title: 'x' }},
yaxis: {{ title: 'y' }},
legend: {{ x: 0.02, y: 0.98 }},
margin: {{ t: 50, b: 50, l: 50, r: 20 }}
}};
Plotly.react('bandwidth-plot', traces, layout);
}}
// Residuals plot
function plotResiduals() {{
const key = getDataKey();
const result = DATA.results[key];
const y_true = DATA.true_signals[currentSignal];
if (!result) return;
const residuals_median = y_true.map((y, i) => result.median[i] - y);
const residuals_mean = y_true.map((y, i) => result.mean[i] - y);
const traces = [
{{
x: DATA.x,
y: residuals_median,
mode: 'lines+markers',
name: 'Median Residuals',
line: {{ color: '#2ca02c' }},
marker: {{ size: 4 }}
}},
{{
x: DATA.x,
y: residuals_mean,
mode: 'lines+markers',
name: 'Mean Residuals',
line: {{ color: '#d62728' }},
marker: {{ size: 4 }}
}},
{{
x: DATA.x,
y: DATA.x.map(() => 0),
mode: 'lines',
name: 'Zero',
line: {{ color: 'black', dash: 'dash', width: 1 }}
}}
];
const layout = {{
title: 'Residuals: Smoothed - True Function',
xaxis: {{ title: 'x' }},
yaxis: {{ title: 'Residual' }},
legend: {{ x: 0.02, y: 0.98 }},
margin: {{ t: 50, b: 50, l: 50, r: 20 }}
}};
Plotly.react('residuals-plot', traces, layout);
}}
// Kernel function for animation
function computeKernelWeights(u) {{
if (currentKernel === 'epanechnikov') {{
return Math.abs(u) <= 1 ? 0.75 * (1 - u*u) : 0;
}} else if (currentKernel === 'gaussian') {{
return Math.abs(u) <= 3 ? Math.exp(-u*u/2) / Math.sqrt(2*Math.PI) : 0;
}} else if (currentKernel === 'uniform') {{
return Math.abs(u) <= 1 ? 0.5 : 0;
}} else if (currentKernel === 'triangular') {{
return Math.abs(u) <= 1 ? 1 - Math.abs(u) : 0;
}}
return 0;
}}
// Animation
function plotAnimation(idx) {{
const key = getDataKey();
const result = DATA.results[key];
if (!result) return;
const x0 = DATA.x[idx];
const h = currentBandwidth;
const supportWidth = currentKernel === 'gaussian' ? h * 3 : h;
// Compute weights for current point
const weights = DATA.x.map(xi => {{
const u = (xi - x0) / h;
return computeKernelWeights(u);
}});
// Size based on weights
const maxWeight = Math.max(...weights);
const sizes = weights.map(w => 5 + (w / (maxWeight || 1)) * 15);
const colors = weights.map(w => w > 0 ? `rgba(102, 126, 234, ${{0.3 + (w / (maxWeight || 1)) * 0.7}})` : 'rgba(200, 200, 200, 0.3)');
const traces = [
{{
x: DATA.x,
y: result.y_noisy,
mode: 'markers',
name: 'Data',
marker: {{ size: sizes, color: colors }}
}},
{{
x: DATA.x.slice(0, idx + 1),
y: result.median.slice(0, idx + 1),
mode: 'lines',
name: 'Kernel Median (built)',
line: {{ color: '#2ca02c', width: 3 }}
}},
{{
x: [x0],
y: [result.median[idx]],
mode: 'markers',
name: 'Current Point',
marker: {{ size: 15, color: '#d62728', symbol: 'star' }}
}}
];
const layout = {{
title: `Building Kernel Median - Point ${{idx + 1}} / ${{DATA.n}}`,
xaxis: {{ title: 'x', range: [0, 2 * Math.PI] }},
yaxis: {{ title: 'y', range: [-2.5, 3] }},
shapes: [{{
type: 'rect',
x0: x0 - supportWidth,
x1: x0 + supportWidth,
y0: -2.5,
y1: 3,
fillcolor: 'rgba(255, 255, 0, 0.15)',
line: {{ color: 'orange', dash: 'dash' }}
}}],
showlegend: false,
margin: {{ t: 50, b: 50, l: 50, r: 20 }}
}};
Plotly.react('animation-plot', traces, layout);
document.getElementById('anim-status').textContent = `Point: ${{idx + 1}} / ${{DATA.n}}`;
}}
function startAnimation() {{
if (animationInterval) return;
animationInterval = setInterval(() => {{
plotAnimation(animationIndex);
animationIndex = (animationIndex + 3) % DATA.n;
}}, 100);
}}
function stopAnimation() {{
if (animationInterval) {{
clearInterval(animationInterval);
animationInterval = null;
}}
}}
function resetAnimation() {{
stopAnimation();
animationIndex = 0;
plotAnimation(0);
}}
// Tab switching
function showTab(tabName) {{
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
event.target.classList.add('active');
document.getElementById(tabName + '-tab').classList.add('active');
if (tabName === 'kernel') plotKernel();
else if (tabName === 'bandwidth') plotBandwidthComparison();
else if (tabName === 'residuals') plotResiduals();
else if (tabName === 'animation') plotAnimation(animationIndex);
}}
// Event listeners for all controls
document.getElementById('kernel').addEventListener('change', function() {{
currentKernel = this.value;
updateMainPlot();
plotKernel();
plotBandwidthComparison();
plotResiduals();
}});
document.getElementById('signal').addEventListener('change', function() {{
currentSignal = this.value;
updateMainPlot();
plotBandwidthComparison();
plotResiduals();
}});
document.getElementById('bandwidth').addEventListener('change', function() {{
currentBandwidth = parseFloat(this.value);
updateMainPlot();
plotResiduals();
}});
document.getElementById('outliers').addEventListener('change', function() {{
currentOutliers = parseFloat(this.value);
updateMainPlot();
plotBandwidthComparison();
plotResiduals();
}});
document.getElementById('noise').addEventListener('change', function() {{
currentNoise = parseFloat(this.value);
updateMainPlot();
plotBandwidthComparison();
plotResiduals();
}});
// Initialize
updateMainPlot();
plotKernel();
</script>
</body>
</html>
'''
return html
# =============================================================================
# Main
# =============================================================================
def main():
output_path = Path(__file__).parent / "epa_smoothing_dashboard.html"
print("Generating comprehensive EPA Local Median Smoothing dashboard...")
print("This will compute 5,600 parameter combinations...")
html = generate_html()
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
print(f"\nSaved: {output_path}")
print(f"File size: {output_path.stat().st_size / 1024:.1f} KB")
if __name__ == "__main__":
main()