执行示例代码
def npchunk_features(sentence, i, history):
word, pos = sentence[i]
return {"pos": pos}
class ConsecutiveNPChunkTagger(nltk.TaggerI):
def __init__(self, train_sents):
train_set = []
for tagged_sent in train_sents:
untagged_sent = nltk.tag.untag(tagged_sent)
history = []
for i, (word, tag) in enumerate(tagged_sent):
featureset = npchunk_features(untagged_sent, i, history)
train_set.append((featureset, tag))
history.append(tag)
# self.classifier = nltk.NaiveBayesClassifier.train(train_set)
self.classifier = nltk.MaxentClassifier.train(
train_set, algorithm='MEGAM', trace=0) #
def tag(self, sentence):
history = []
for i, word in enumerate(sentence):
featureset = npchunk_features(sentence, i, history)
tag = self.classifier.classify(featureset)
history.append(tag)
return zip(sentence, history)
class ConsecutiveNPChunker(nltk.ChunkParserI):
def __init__(self, train_sents):
tagged_sents = [[((w,t),c) for (w,t,c) in
nltk.chunk.tree2conlltags(sent)]
for sent in train_sents]
self.tagger = ConsecutiveNPChunkTagger(tagged_sents)
def parse(self, sentence):
tagged_sents = self.tagger.tag(sentence)
conlltags = [(w, t, c) for ((w, t), c) in tagged_sents]
return nltk.chunk.conlltags2tree(conlltags)
test_sents = conll2000.chunked_sents('test.txt', chunk_types=['NP'])
train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
chunker = ConsecutiveNPChunker(train_sents)
print(chunker.evaluate(test_sents))
报错
Traceback (most recent call last):
File "E:/Python Practice/NLP/Chapter7.py", line 225, in <module>
chunker = ConsecutiveNPChunker(train_sents)
File "E:/Python Practice/NLP/Chapter7.py", line 216, in __init__
self.tagger = ConsecutiveNPChunkTagger(tagged_sents)
File "E:/Python Practice/NLP/Chapter7.py", line 201, in __init__
train_set, algorithm='MEGAM', trace=0) #
File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 335, in train
train_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffs
File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 1483, in train_maxent_classifier_with_megam
stdout = call_megam(options)
File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 168, in call_megam
config_megam()
File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 57, in config_megam
url="http://www.umiacs.umd.edu/~hal/megam/index.html",
File "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 690, in find_binary
name, path_to_bin, env_vars, searchpath, binary_names, url, verbose
File "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 674, in find_binary_iter
path_to_bin or name, env_vars, searchpath, binary_names, url, verbose
File "D:\Anaconda3\lib\site-packages\nltk\internals.py", line 632, in find_file_iter
raise LookupError("\n\n%s\n%s\n%s" % (div, msg, div))
LookupError:
===========================================================================
NLTK was unable to find the megam file!
Use software specific configuration paramaters or set the MEGAM environment variable.
For more information on megam, see:
<http://www.umiacs.umd.edu/~hal/megam/index.html>
===========================================================================
通过StackOverflow,在这里下载MEGAM源文件Source: megam_src.tgz, 下载之后解压,然后在代码中(程序的开始)为MEGAM添加环境变量
import os
os.environ["MEGAM"] = 'D:\Anaconda3\Lib\site-packages\MEGAM\megam-64'
再次运行,报错
File "E:/Python Practice/NLP/Chapter7.py", line 204, in __init__
train_set, algorithm='MEGAM', trace=0) #
File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 335, in train
train_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffs
File "D:\Anaconda3\lib\site-packages\nltk\classify\maxent.py", line 1483, in train_maxent_classifier_with_megam
stdout = call_megam(options)
File "D:\Anaconda3\lib\site-packages\nltk\classify\megam.py", line 172, in call_megam
p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
File "D:\Anaconda3\lib\subprocess.py", line 800, in __init__
restore_signals, start_new_session)
File "D:\Anaconda3\lib\subprocess.py", line 1207, in _execute_child
startupinfo)
OSError: [WinError 193] %1 is not a valid Win32 application
该错误原因是python版本是64位,调用的库是32位的,所以解决方法便是安装32位的python。在官网下载一个32位的python,这里选择 python3.7.7 Windows x86 executable installer 安装成功,更换解释器之后还是报同样的错误。。。
由于本人使用了Anaconda,又在Anaconda官网安装了一个32位的Anaconda,依旧报错。。。
有时间再来更新