import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
# 1. 基本导数计算
def basic_derivatives():
"""基本导数计算示例"""
# 定义函数 f(x) = x²
def f(x):
return x**2
# 定义导数 f'(x) = 2x
def f_prime(x):
return 2*x
# 数值导数(使用有限差分)
def numerical_derivative(f, x, h=1e-6):
return (f(x + h) - f(x)) / h
# 测试点
x_values = np.array([-2, -1, 0, 1, 2])
print("函数值和导数:")
for x in x_values:
fx = f(x)
analytical_derivative = f_prime(x)
numerical_derivative_val = numerical_derivative(f, x)
print(f"x = {x:2d}: f(x) = {fx:4.1f}, f'(x) = {analytical_derivative:4.1f}, "
f"数值导数 = {numerical_derivative_val:6.4f}")
return f, f_prime
# 2. 梯度下降可视化
def gradient_descent_visualization():
"""梯度下降可视化"""
# 定义函数 f(x) = x² + 2x + 1
def f(x):
return x**2 + 2*x + 1
def f_prime(x):
return 2*x + 2
# 梯度下降
def gradient_descent(f, f_prime, x0, learning_rate=0.1, max_iterations=100):
x = x0
history = [x]
for i in range(max_iterations):
gradient = f_prime(x)
x = x - learning_rate * gradient
history.append(x)
# 检查收敛
if abs(gradient) < 1e-6:
break
return x, history
# 运行梯度下降
x0 = 5.0
optimal_x, history = gradient_descent(f, f_prime, x0)
print(f"初始值: x = {x0}")
print(f"最优值: x = {optimal_x:.6f}")
print(f"函数值: f(x) = {f(optimal_x):.6f}")
print(f"迭代次数: {len(history)}")
# 可视化
x_plot = np.linspace(-3, 7, 100)
y_plot = f(x_plot)
plt.figure(figsize=(12, 5))
# 函数和优化路径
plt.subplot(1, 2, 1)
plt.plot(x_plot, y_plot, 'b-', label='f(x) = x² + 2x + 1')
plt.plot(history, [f(x) for x in history], 'ro-', label='优化路径')
plt.plot(optimal_x, f(optimal_x), 'go', markersize=10, label='最优解')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('梯度下降优化')
plt.legend()
plt.grid(True, alpha=0.3)
# 梯度变化
plt.subplot(1, 2, 2)
gradients = [f_prime(x) for x in history]
plt.plot(gradients, 'r-', label='梯度')
plt.axhline(y=0, color='k', linestyle='--', alpha=0.5)
plt.xlabel('迭代次数')
plt.ylabel('梯度值')
plt.title('梯度收敛')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
return optimal_x, history
# 3. 多维梯度下降
def multidimensional_gradient_descent():
"""多维梯度下降示例"""
# 定义二维函数 f(x, y) = x² + y²
def f_2d(x, y):
return x**2 + y**2
def gradient_2d(x, y):
return np.array([2*x, 2*y])
# 梯度下降
def gradient_descent_2d(f, gradient_func, x0, learning_rate=0.1, max_iterations=100):
x = np.array(x0, dtype=float)
history = [x.copy()]
for i in range(max_iterations):
grad = gradient_func(x[0], x[1])
x = x - learning_rate * grad
history.append(x.copy())
# 检查收敛
if np.linalg.norm(grad) < 1e-6:
break
return x, history
# 运行优化
x0 = np.array([3.0, 4.0])
optimal_point, history = gradient_descent_2d(f_2d, gradient_2d, x0)
print(f"初始点: {x0}")
print(f"最优点: {optimal_point}")
print(f"函数值: {f_2d(optimal_point[0], optimal_point[1]):.6f}")
# 可视化
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = f_2d(X, Y)
plt.figure(figsize=(10, 8))
# 等高线图
plt.contour(X, Y, Z, levels=20, alpha=0.6)
plt.colorbar(label='f(x, y)')
# 优化路径
history = np.array(history)
plt.plot(history[:, 0], history[:, 1], 'ro-', label='优化路径')
plt.plot(optimal_point[0], optimal_point[1], 'go', markersize=10, label='最优点')
plt.xlabel('x')
plt.ylabel('y')
plt.title('二维梯度下降')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axis('equal')
plt.show()
return optimal_point, history
# 4. 线性回归中的梯度下降
def linear_regression_gradient_descent():
"""线性回归中的梯度下降"""
# 生成数据
np.random.seed(42)
X = np.random.randn(100, 1)
y = 2 * X + 1 + 0.1 * np.random.randn(100, 1)
# 线性回归模型
def linear_model(X, w, b):
return X * w + b
def mse_loss(y_true, y_pred):
return np.mean((y_true - y_pred) ** 2)
def gradient_mse(X, y, w, b):
y_pred = linear_model(X, w, b)
dw = -2 * np.mean(X * (y - y_pred))
db = -2 * np.mean(y - y_pred)
return np.array([dw, db])
# 梯度下降训练
def train_linear_regression(X, y, learning_rate=0.01, max_iterations=1000):
w, b = 0.0, 0.0
history = []
for i in range(max_iterations):
y_pred = linear_model(X, w, b)
loss = mse_loss(y, y_pred)
grad = gradient_mse(X, y, w, b)
w = w - learning_rate * grad[0]
b = b - learning_rate * grad[1]
history.append({'iteration': i, 'loss': loss, 'w': w, 'b': b})
if i % 100 == 0:
print(f"迭代 {i}: 损失 = {loss:.6f}, w = {w:.4f}, b = {b:.4f}")
return w, b, history
# 训练模型
w_optimal, b_optimal, history = train_linear_regression(X, y)
print(f"\n最终参数: w = {w_optimal:.4f}, b = {b_optimal:.4f}")
print(f"真实参数: w = 2.0, b = 1.0")
# 可视化结果
plt.figure(figsize=(12, 5))
# 数据和拟合线
plt.subplot(1, 2, 1)
plt.scatter(X, y, alpha=0.6, label='数据')
X_plot = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_plot = linear_model(X_plot, w_optimal, b_optimal)
plt.plot(X_plot, y_plot, 'r-', linewidth=2, label=f'拟合线: y = {w_optimal:.2f}x + {b_optimal:.2f}')
plt.xlabel('X')
plt.ylabel('y')
plt.title('线性回归结果')
plt.legend()
plt.grid(True, alpha=0.3)
# 损失函数收敛
plt.subplot(1, 2, 2)
iterations = [h['iteration'] for h in history]
losses = [h['loss'] for h in history]
plt.plot(iterations, losses, 'b-')
plt.xlabel('迭代次数')
plt.ylabel('损失')
plt.title('损失函数收敛')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
return w_optimal, b_optimal, history
# 5. 局部最小值问题
def local_minima_example():
"""局部最小值问题示例"""
# 定义具有多个局部最小值的函数
def complex_function(x):
return np.sin(x) + 0.5 * x**2
def complex_function_derivative(x):
return np.cos(x) + x
# 从不同起点运行梯度下降
starting_points = [-5, 0, 5]
results = []
for x0 in starting_points:
x_opt, history = gradient_descent(complex_function, complex_function_derivative, x0)
results.append({'start': x0, 'optimal': x_opt, 'value': complex_function(x_opt)})
print(f"起点 {x0}: 收敛到 x = {x_opt:.4f}, f(x) = {complex_function(x_opt):.4f}")
# 可视化
x_plot = np.linspace(-6, 6, 200)
y_plot = complex_function(x_plot)
plt.figure(figsize=(12, 6))
plt.plot(x_plot, y_plot, 'b-', label='f(x) = sin(x) + 0.5x²')
for result in results:
plt.plot(result['start'], complex_function(result['start']), 'ro', markersize=8, label=f'起点 {result["start"]}')
plt.plot(result['optimal'], result['value'], 'go', markersize=8, label=f'收敛点 {result["optimal"]:.2f}')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('局部最小值问题')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
return results
# 运行所有示例
if __name__ == "__main__":
print("=== 基本导数计算 ===")
f, f_prime = basic_derivatives()
print("\n=== 梯度下降可视化 ===")
optimal_x, history = gradient_descent_visualization()
print("\n=== 多维梯度下降 ===")
optimal_point, history_2d = multidimensional_gradient_descent()
print("\n=== 线性回归梯度下降 ===")
w_opt, b_opt, history_lr = linear_regression_gradient_descent()
print("\n=== 局部最小值问题 ===")
results = local_minima_example()