Now outputs mysql pipe-able right into mysql
This commit is contained in:
parent
903629e333
commit
cc493cb366
|
@ -48,21 +48,26 @@ class NGramSet:
|
||||||
self.gram_set[key]['percent'] = self.gram_set[key]['count'] / float(self.total_count)
|
self.gram_set[key]['percent'] = self.gram_set[key]['count'] / float(self.total_count)
|
||||||
|
|
||||||
def print_mysql(self):
|
def print_mysql(self):
|
||||||
print "mysql"
|
table_name = re.sub('[^A-Za-z0-9]', '_', "%s_%d" % (self.filename, self.gram_size))
|
||||||
|
print "DROP TABLE IF EXISTS %s;" % (table_name)
|
||||||
|
print "CREATE TABLE %s (gram VARCHAR(255), count INT, percent FLOAT, PRIMARY KEY(gram));" % (table_name)
|
||||||
|
for key in self.gram_set:
|
||||||
|
print "INSERT INTO %s VALUES (\"%s\", %d, %f);" % (table_name, key, self.gram_set[key]['count'], self.gram_set[key]['percent'])
|
||||||
|
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
print "Usage: gramificate.py [N] [FILE]"
|
print "Usage: gramificate.py [N] [FILE]"
|
||||||
print " N Gram size"
|
print " N Gram size"
|
||||||
print " FILE Filename"
|
print " FILE Filename"
|
||||||
print "Output: Mysql commands to create a table FILE-N that contains all the grams and"
|
print "Output: Mysql commands to create a table FILE_N that contains all the grams and"
|
||||||
print " associated stats (count of gram, percent of total). Can be directly and"
|
print " associated stats (count of gram, percent of total). Can be directly and"
|
||||||
print " safely piped into mysql:"
|
print " safely piped into mysql:"
|
||||||
print " mysql -u USER -pPass -D gram_db < ./gramificate.py 2 input.txt"
|
print " ./gramificate.py 2 input.txt | mysql -u USER -pPass -D gram_db"
|
||||||
|
print " and you will end up with a table \"input_txt_2\" with the results"
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
# generate ngrams
|
# generate ngrams
|
||||||
grams = NGramSet(sys.argv[2], int(sys.argv[1]))
|
grams = NGramSet(sys.argv[2], int(sys.argv[1]))
|
||||||
grams.process()
|
grams.process()
|
||||||
print grams.gram_set
|
grams.print_mysql()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue