## 机器学习基础笔记 (Machine Learning)

### Logistic回归python实现

#### 1.算法python代码

# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import numpy as np

class Logistic(object):

def __init__(self):
self._history_w = []
self._likelihood = []

with open(data_file) as f:
input_x = []
input_y = []
for line in f:
[x1, x2, y] = line.split()
input_x.append([1.0, float(x1), float(x2)])
input_y.append(int(y))
self._input_x = np.array(input_x, dtype=np.float128)
self._input_y = np.array(input_y, dtype=np.float128).T

def sigmoid(self, x, w):                    # sigmoid函数
return 1.0/(1+ np.exp(-np.inner(w,x)))

def likelihood_function(self, w):           # 目标极大似然函数
temp = np.inner(self._input_x, w)
a = np.inner(temp.T, self._input_y)
b = np.sum(np.log(1+np.exp(temp)))
return b-a

(data_num, features) = np.shape(self._input_x)
w = np.ones(features)      #初始化w为全1向量
for i in range(iter_num):
theta = self.sigmoid(self._input_x, w)
delta = theta - self._input_y
w = w - iter_rate * np.inner(self._input_x.T, delta)  # 迭代更新w
self._history_w.append(w)
self._likelihood.append(self.likelihood_function(w))
self._final_w = w
return w

(data_num, features) = np.shape(self._input_x)
w = np.ones(features)  #初始化w为全1向量
iter_range = range(iter_num)
data_range = range(data_num)
for i in range(iter_num):
for j in data_range:
iter_rate = 4/(1.0+j+i) + 0.01         # 学习率随着迭代的次数而不断变小
theta = self.sigmoid(self._input_x[j], w)
delta = theta - self._input_y[j]
w = w - iter_rate * delta* self._input_x[j] # 迭代更新w
self._history_w.append(w)
self._likelihood.append(self.likelihood_function(w))
self._final_w = w
return w


#### 2. python数据显示

    def draw_result(self, title):
total_data = np.shape(self._input_y)[0]
self._nagtive_x = []
self._positive_x = []
for i in range(total_data):
if self._input_y[i] > 0:
self._positive_x.append(self._input_x[i])
else:
self._nagtive_x.append(self._input_x[i])

plt.figure(1)
x1 = [x[1] for x in self._positive_x]
x2 = [x[2] for x in self._positive_x]
plt.scatter(x1, x2, label='positive', color='g', s=20, marker="o") # 显示值为1的数据
x1 = [x[1] for x in self._nagtive_x]
x2 = [x[2] for x in self._nagtive_x]
plt.scatter(x1, x2, label='nagtive', color='r', s=20, marker="x") # 显示值为0的数据
plt.xlabel('x1')
plt.ylabel('x2')
def f(x):
return -(self._final_w[0] + self._final_w[1]*x)/self._final_w[2]
x = np.linspace(-4, 4, 10, endpoint=True)  # 显示学习到的直线
plt.plot(x, f(x), 'b-', lw=1)
plt.title(title)
plt.legend()
plt.show()

def draw_w_history(self, title):
f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)
x = np.arange(len(self._history_w))
w0 = [w[0] for w in self._history_w]
w1 = [w[1] for w in self._history_w]
w2 = [w[2] for w in self._history_w]
ax1.set_title(title+ ' w trend')
ax1.set_ylabel('w[0]')
ax1.scatter(x, w0, label='w[0]', color='b', s=10, marker=".")
ax2.set_ylabel('w[1]')
ax2.scatter(x, w1, label='w[1]', color='g', s=10, marker=".")
ax3.set_ylabel('w[2]')
ax3.scatter(x, w2, label='w[2]', color='r', s=10, marker=".")
plt.show()

def draw_likelihood_function(self, title):
plt.figure(1)
x = np.arange(len(self._likelihood))
plt.scatter(x, self._likelihood, label='Likelihood', color='g', s=10, marker=".")
plt.xlabel('x')
plt.ylabel('Likelihood function')
plt.title(title + ' Likelihood trend')
plt.legend()
plt.show()


#### 3.数据集测试

https://github.com/apachecn/MachineLearning/blob/python-2.7/input/5.Logistic/TestSet.txt

##### 3.1批量梯度下降
log = Logistic()
log.draw_result(title)
log.draw_w_history(title)
log.draw_likelihood_function(title)


##### 3.2随机梯度下降
log = Logistic()
log.draw_result(title)
log.draw_w_history(title)
log.draw_likelihood_function(title)


《机器学习实战》第五章

• #### Sklearn 与 TensorFlow 机器学习实用指南

ApacheCN tensorflow 20页 2018年5月3日
916

• #### TensorFlow 官方文档中文版

jikexueyuanwiki tensorflow 33页 2018年6月5日
8767

• #### C++ 资源大全中文版

伯乐在线 cplusplus 1页 2018年6月6日
1422

• #### 吴恩达cs229

jiacheng-pan machine-learning 15页 2019年5月12日
0

• #### 命令行的艺术

jlevy linux 34页 2019年5月26日
46710

• #### Pro Git 简体中文第二版

Pro Git Book git 118页 2018年5月24日
838