包含Normal Equations,批量梯度下降和随机梯度下降,这里的代码跟Logistic回归的代码类似
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
class LinearRegression(object):
def __init__(self):
self._history_w = []
self._cost = []
def load_input_data(self, data_file):
with open(data_file) as f:
input_x = []
input_y = []
for line in f:
[x0, x1, y] = line.split()
input_x.append([float(x0), float(x1)])
input_y.append(float(y))
self._input_x = np.array(input_x)
self._input_y = np.array(input_y).T
def normal_equations(self): # 用矩阵的计算直接得到w
xtx = np.dot(self._input_x.T, self._input_x)
xtx_inverse = np.linalg.inv(xtx)
tmp = np.dot(xtx_inverse, self._input_x.T)
self._final_w = np.inner(tmp, self._input_y) # (X^T * X)^-1 * X^T * Y
return self._final_w
def cost_function(self, w): # 成本函数
tmp = np.inner(self._input_x, w)
tmp = tmp - self._input_y
return np.inner(tmp.T, tmp)
def batch_gradient_descent(self, iter_num, iter_rate): #批量梯度下降
(data_num, features) = np.shape(self._input_x)
w = np.ones(features)
for i in range(iter_num):
inner = np.inner(self._input_x, w)
delta = inner - self._input_y
w = w - iter_rate * np.inner(self._input_x.T, delta) #w的迭代
self._history_w.append(w)
self._cost.append(self.cost_function(w))
self._final_w = w
return w
def stochastic_gradient_descent(self, iter_num, iter_rate): # 随机梯度下降
(data_num, features) = np.shape(self._input_x)
w = np.ones(features)
data_range = range(data_num)
for i in range(iter_num):
for j in data_range:
iter_rate = 4/(1.0+j+i) + 0.01
inner = np.inner(self._input_x[j], w)
delta = inner - self._input_y[j]
w = w - iter_rate * delta* self._input_x[j] #w的迭代
self._history_w.append(w)
self._cost.append(self.cost_function(w))
self._final_w = w
return w
在同一个类中添加如下代码
def draw_result(self, title):
plt.figure(1)
x1 = [x[1] for x in self._input_x]
plt.scatter(x1, self._input_y, color='b', s=20, marker=".")
plt.xlabel('x')
plt.ylabel('x')
def f(x):
return (self._final_w[0] + self._final_w[1]*x)
x2 = np.array([self._input_x.min(axis=0)[1], self._input_x.max(axis=0)[1]])
plt.plot(x2, f(x2), 'b-', lw=1)
plt.title(title)
plt.show()
def draw_cost_function(self, title):
plt.figure(1)
x = np.arange(len(self._cost))
plt.scatter(x, self._cost, label='Cost', color='g', s=10, marker=".")
plt.xlabel('x')
plt.ylabel('Cost function')
plt.title(title + ' Cost trend')
plt.ylim(-0.5, 100) #限定y轴显示的区间
plt.legend()
plt.show()
数据集来自《机器学习实战》
https://github.com/apachecn/MachineLearning/blob/python-2.7/input/8.Regression/data.txt
3.1 Normal Equations
linear = LinearRegression()
linear.load_input_data("test.txt")
linear.normal_equations()
title = "Normal Equations"
linear.draw_result(title)
3.2 批量梯度下降
linear = LinearRegression()
linear.load_input_data("test.txt")
linear.batch_gradient_descent(iter_num=100, iter_rate=0.001)
title = "Batch Gradient Descent"
linear.draw_result(title)
linear.draw_cost_function(title)
3.2 随机梯度下降
linear = LinearRegression()
linear.load_input_data("test.txt")
linear.stochastic_gradient_descent(iter_num=5, iter_rate=0.001)
title = "Stochastic Gradient Descent"
linear.draw_result(title)
linear.draw_cost_function(title)