2017-02-16       个评论    来源：A_Zhang的专栏
我要投稿

L(θ)=?i=1mP(yi|xi;θ)=(hθ(xi))yi(1?hθ(xi))1?yi

J(θ)取对数，因为ln(x)x单调性相同

l(θ)=lnL(θ)=∑i=1m(yilnhθ(xi)+(1?yi)ln(1?hθ(xi)))

{

} 返回回归系数θ

```"""
learned on Wed Feb 15 22:11:00 2017
@author maggie

description:
load all the data from the given file, and return as mat

sigmoid function
sigmoid functon in logistic regression

get the theta vector according to the gradient ascent method

classify function
using the theta vector to classify the test dataset

digitRecognition function
initialize all

"""

#/usr/bin/python
from numpy import *
from os import listdir

trainfileList = listdir(dir)
m = len(trainfileList)
dataArray = zeros((m, 1024)) #store the data
labelArray = zeros((m, 1))   #store the label
for i in range(m):
tempArray = zeros((1, 1024))
filename = trainfileList[i]
fr = open('%s/%s' %(dir, filename))
for j in range(32):
for k in range(32):
tempArray[0, 32*j+k] = int(linestr[k])
dataArray[i,:] = tempArray
filename0 = filename.split('.')[0]
label = filename0.split('_')[0]
labelArray[i] = int(label)
return dataArray, labelArray

def sigmoid(inX):
return 1.0/(1+exp(-inX))

dataMat = mat(dataArray)  #size : m x n
labelMat = mat(labelArray)  #size : m x 1
m, n = shape(dataMat)
weigh = ones((n, 1))  #initialize the theta vector
for i in range(maxCycles):
h = sigmoid(dataMat * weigh)
error = labelMat - h #size : m x 1
weigh = weigh + alpha * dataMat.transpose() * error  #update the theta vector
return weigh

def classify(testDir, weigh):
dataMat = mat(dataArray)
labelMat = mat(labelArray)
h = sigmoid(dataMat * weigh)
m = len(h)
error = 0.0
for i in range(m):
if int(h[i]) > 0.5:
print int(labelMat[i]), 'is classified as : 1'
if int(labelMat[i]) != 1:
error += 1
print 'error'
else:
print int(labelMat[i]), 'is classified as : 0'
if int(labelMat[i]) != 0:
error += 1
print 'error'
print 'error rate is ', '%.4f' %(error/m)

def digitRecognition(trainDir, testDir, alpha=0.07, maxCycles=10):
weigh = gradAscent(data, label, alpha, maxCycles)
classify(testDir, weigh)
print weigh

if __name__ == '__main__':
digitRecognition('train','test')```

(train和test数据以及代码可以点击此处下载）(https://github.com/zjsghww/MachineLearning)