Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 131 additions & 1 deletion pattern/text/en/wordnet/pywordnet/concordance.py
Original file line number Diff line number Diff line change
@@ -1 +1,131 @@
# some accessing of the semantic concordance data for wordnet 1.6# by Des Berry, berry@ais.itimport stringimport osfrom wordnet import binarySearchFile# Sample entries in the 'taglist' file# ordinary%1:18:01:: 1 br-a01:78,1;86,1;88,4# ordered%5:00:00:organized:01 2 br-j23:6,14;13,32;66,12# where the general form is:# lemma%ss_type:lex_filenum:lex_id:head_word:head_id sense_number[location_list]# location_list: filename:sent_num,word_num[;sent_num,word_num...]ss_type = ("NOUN", "VERB", "ADJECTIVE", "ADVERB", "ADJECTIVE SATELLITE")# given a sentence number (and the contents of a semantic concordance file)# return a string of words as the sentencedef find_sentence(snum, msg): str = "<s snum=%s>" % snum s = string.find(msg, str) if s < 0: return "<Unknown>" s = s + len(str) sentence = "" tag = "" while 1: if msg[s] == '\n': s = s + 1 n = string.find(msg, '<', s) if n < 0: break if n - s != 0: if tag == "w" and msg[s] != "'" and len(sentence) > 0: # word form sentence = sentence + " " sentence = sentence + msg[s:n] e = string.find(msg, '>', n) if e < 0: break tag = msg[n + 1] if tag == "/": # check for ending sentence if msg[n + 2] == 's': # end of sentence break s = e + 1 return sentence# given a taglist sense (one line of the tagfile) and where to find the tagfile (root)# return a tuple of# symset type ('1' .. '5')# sense (numeric character string)# list of sentences (constructed from the taglist)def tagsentence(tag, root): s = string.find(tag, '%') sentence = [] type = tag[s + 1] c = s for i in range(0, 4): c = string.find(tag, ':', c + 1) c = string.find(tag, ' ', c + 1) sense = tag[c + 1] c = c + 3 while 1: d = string.find(tag, ' ', c) # file separator if d < 0: loclist = tag[c:] else: loclist = tag[c:d] c = d + 1 e = string.find(loclist, ':') filename = loclist[:e] fh = open(root + filename, "rb") msg = fh.read() fh.close() while 1: e = e + 1 f = string.find(loclist, ';', e) if f < 0: sent_word = loclist[e:] else: sent_word = loclist[e:f] e = f g = string.find(sent_word, ',') sent = sent_word[:g] sentence.append(find_sentence(sent, msg)) if f < 0: break if d < 0: break return (type, sense, sentence)# given a word to search for and where to find the files (root)# displays the information# This could be changed to display in different ways!def sentences(word, root): cache = {} file = open(root + "taglist", "rb") key = word + "%" keylen = len(key) binarySearchFile(file, key + " ", cache, 10) print "Word '%s'" % word while 1: line = file.readline() if line[:keylen] != key: break type, sense, sentence = tagsentence(line, root + "tagfiles/") print ss_type[string.atoi(type) - 1], sense for sent in sentence: print sentdef _test(word, corpus, base): print corpus sentences("ordinary", base + corpus + "/")if __name__ == '__main__': base = "C:/win16/dict/semcor/" word = "ordinary" _test(word, "brown1", base) _test(word, "brown2", base) _test(word, "brownv", base)
# some accessing of the semantic concordance data for wordnet 1.6
# by Des Berry, berry@ais.it

import string
import os
from wordnet import binarySearchFile

# Sample entries in the 'taglist' file
# ordinary%1:18:01:: 1 br-a01:78,1;86,1;88,4
# ordered%5:00:00:organized:01 2 br-j23:6,14;13,32;66,12
# where the general form is:
# lemma%ss_type:lex_filenum:lex_id:head_word:head_id sense_number
[location_list]
# location_list: filename:sent_num,word_num[;sent_num,word_num...]

ss_type = ("NOUN", "VERB", "ADJECTIVE", "ADVERB", "ADJECTIVE SATELLITE")


# given a sentence number (and the contents of a semantic concordance file)
# return a string of words as the sentence
def find_sentence(snum, msg):
str = "<s snum=%s>" % snum
s = string.find(msg, str)
if s < 0:
return "<Unknown>"
s = s + len(str)
sentence = ""
tag = ""
while 1:
if msg[s] == '\n':
s = s + 1
n = string.find(msg, '<', s)
if n < 0:
break
if n - s != 0:
if tag == "w" and msg[s] != "'" and len(sentence) > 0: # word form
sentence = sentence + " "
sentence = sentence + msg[s:n]
e = string.find(msg, '>', n)
if e < 0:
break
tag = msg[n + 1]
if tag == "/": # check for ending sentence
if msg[n + 2] == 's':
# end of sentence
break
s = e + 1
return sentence


# given a taglist sense (one line of the tagfile) and where to find the tagfile (root)
# return a tuple of
# symset type ('1' .. '5')
# sense (numeric character string)
# list of sentences (constructed from the taglist)
def tagsentence(tag, root):
s = string.find(tag, '%')
sentence = []
type = tag[s + 1]
c = s
for i in range(0, 4):
c = string.find(tag, ':', c + 1)
c = string.find(tag, ' ', c + 1)
sense = tag[c + 1]
c = c + 3
while 1:
d = string.find(tag, ' ', c) # file separator
if d < 0:
loclist = tag[c:]
else:
loclist = tag[c:d]
c = d + 1

e = string.find(loclist, ':')
filename = loclist[:e]
fh = open(root + filename, "rb")
msg = fh.read()
fh.close()

while 1:
e = e + 1
f = string.find(loclist, ';', e)
if f < 0:
sent_word = loclist[e:]
else:
sent_word = loclist[e:f]
e = f

g = string.find(sent_word, ',')
sent = sent_word[:g]

sentence.append(find_sentence(sent, msg))

if f < 0:
break

if d < 0:
break
return (type, sense, sentence)


# given a word to search for and where to find the files (root)
# displays the information
# This could be changed to display in different ways!
def sentences(word, root):
cache = {}
file = open(root + "taglist", "rb")
key = word + "%"
keylen = len(key)
binarySearchFile(file, key + " ", cache, 10)
print("Word '%s'" % word)
while 1:
line = file.readline()
if line[:keylen] != key:
break
type, sense, sentence = tagsentence(line, root + "tagfiles/")
print(ss_type[string.atoi(type) - 1], sense)
for sent in sentence:
print(sent)


def _test(word, corpus, base):
print(corpus)
sentences("ordinary", base + corpus + "/")

if __name__ == '__main__':
base = "C:/win16/dict/semcor/"
word = "ordinary"
_test(word, "brown1", base)
_test(word, "brown2", base)
_test(word, "brownv", base)