1. 成绩通过情况的分类错误率
import logRegres
logRegres. multiTest( 3 )
2.分类预测
frTrain = open ( 'horseColicTraining.txt' ) ;
trainingSet = [ ] ; trainingLabels = [ ]
for line in frTrain. readlines( ) :
currLine = line. strip( ) . split( '\t' )
lineArr = [ ]
for i in range ( 3 ) :
lineArr. append( float ( currLine[ i] ) )
trainingSet. append( lineArr)
trainingLabels. append( float ( currLine[ 3 ] ) )
trainingSet[ : 5 ]
trainingLabels[ : 5 ]
trainWeights = logRegres. stocGradAscent1( trainingSet, trainingLabels, numIter= 1000 )
logRegres. classifyVector( array( [ 3 , 3 , 410 ] ) , trainWeights)
logRegres.py
from numpy import *
import numpy as np
def loadDataSet ( ) :
dataMat = [ ] ; labelMat = [ ]
fr = open ( 'testSet.txt' )
for line in fr. readlines( ) :
lineArr = line. strip( ) . split( )
dataMat. append( [ 1.0 , float ( lineArr[ 0 ] ) , float ( lineArr[ 1 ] ) ] )
labelMat. append( int ( lineArr[ 2 ] ) )
return dataMat, labelMat
def sigmoid ( inX) :
return 1.0 / ( 1 + exp( - inX) )
def gradAscent ( dataMatIn, classLabels) :
dataMatrix = mat( dataMatIn)
labelMat = mat( classLabels) . transpose( )
m, n = shape( dataMatrix)
alpha = 0.001
maxCycles = 500
weights = ones( ( n, 1 ) )
for k in range ( maxCycles) :
h = sigmoid( dataMatrix* weights)
error = ( labelMat - h)
weights = weights + alpha * dataMatrix. transpose( ) * error
return weights
def plotBestFit ( weights) :
import matplotlib. pyplot as plt
dataMat, labelMat= loadDataSet( )
dataArr = array( dataMat)
n = shape( dataArr) [ 0 ]
xcord1 = [ ] ; ycord1 = [ ]
xcord2 = [ ] ; ycord2 = [ ]
for i in range ( n) :
if int ( labelMat[ i] ) == 1 :
xcord1. append( dataArr[ i, 1 ] ) ; ycord1. append( dataArr[ i, 2 ] )
else :
xcord2. append( dataArr[ i, 1 ] ) ; ycord2. append( dataArr[ i, 2 ] )
fig = plt. figure( )
ax = fig. add_subplot( 111 )
ax. scatter( xcord1, ycord1, s= 30 , c= 'red' , marker= 's' )
ax. scatter( xcord2, ycord2, s= 30 , c= 'green' )
x = arange( - 3.0 , 3.0 , 0.1 )
y = ( - weights[ 0 ] - weights[ 1 ] * x) / weights[ 2 ]
ax. plot( x, y)
plt. xlabel( 'X1' ) ; plt. ylabel( 'X2' ) ;
plt. show( )
def stocGradAscent0 ( dataMatrix, classLabels) :
m, n = shape( dataMatrix)
alpha = 0.01
weights = ones( n)
for i in range ( m) :
h = sigmoid( sum ( dataMatrix[ i] * weights) )
error = classLabels[ i] - h
weights = weights + alpha * error * dataMatrix[ i]
return weights
def stocGradAscent1 ( dataMatrix, classLabels, numIter= 150 ) :
dataMatrix = np. array( dataMatrix)
classLabels = np. array( classLabels)
m, n = shape( dataMatrix)
weights = ones( n)
for j in range ( numIter) :
dataIndex = list ( range ( m) )
for i in range ( m) :
alpha = 4 / ( 1.0 + j+ i) + 0.0001
randIndex = int ( random. uniform( 0 , len ( dataIndex) ) )
h = sigmoid( sum ( dataMatrix[ randIndex] * weights) )
error = classLabels[ randIndex] - h
weights = weights + alpha * error * dataMatrix[ randIndex]
del ( dataIndex[ randIndex] )
return weights
def classifyVector ( inX, weights) :
prob = sigmoid( sum ( inX* weights) )
if prob > 0.5 : return 1.0
else : return 0.0
def colicTest ( m) :
frTrain = open ( 'horseColicTraining.txt' ) ; frTest = open ( 'horseColicTest.txt' )
trainingSet = [ ] ; trainingLabels = [ ]
for line in frTrain. readlines( ) :
currLine = line. strip( ) . split( '\t' )
lineArr = [ ]
for i in range ( m) :
lineArr. append( float ( currLine[ i] ) )
trainingSet. append( lineArr)
trainingLabels. append( float ( currLine[ m] ) )
trainWeights = stocGradAscent1( array( trainingSet) , trainingLabels, 1000 )
errorCount = 0 ; numTestVec = 0.0
for line in frTest. readlines( ) :
numTestVec += 1.0
currLine = line. strip( ) . split( '\t' )
lineArr = [ ]
for i in range ( m) :
lineArr. append( float ( currLine[ i] ) )
if int ( classifyVector( array( lineArr) , trainWeights) ) != int ( currLine[ m] ) :
errorCount += 1
errorRate = ( float ( errorCount) / numTestVec)
print ( "the error rate of this test is: %f" % errorRate)
return errorRate
def multiTest ( m) :
numTests = 10 ; errorSum= 0.0
for k in range ( numTests) :
errorSum += colicTest( m)
print ( "after %d iterations the average error rate is: %f" % ( numTests, errorSum/ float ( numTests) ) )