diff --git a/gramificate.py b/gramificate.py index ea5ed58..b2974df 100755 --- a/gramificate.py +++ b/gramificate.py @@ -48,21 +48,26 @@ class NGramSet: self.gram_set[key]['percent'] = self.gram_set[key]['count'] / float(self.total_count) def print_mysql(self): - print "mysql" + table_name = re.sub('[^A-Za-z0-9]', '_', "%s_%d" % (self.filename, self.gram_size)) + print "DROP TABLE IF EXISTS %s;" % (table_name) + print "CREATE TABLE %s (gram VARCHAR(255), count INT, percent FLOAT, PRIMARY KEY(gram));" % (table_name) + for key in self.gram_set: + print "INSERT INTO %s VALUES (\"%s\", %d, %f);" % (table_name, key, self.gram_set[key]['count'], self.gram_set[key]['percent']) if len(sys.argv) < 3: print "Usage: gramificate.py [N] [FILE]" print " N Gram size" print " FILE Filename" - print "Output: Mysql commands to create a table FILE-N that contains all the grams and" + print "Output: Mysql commands to create a table FILE_N that contains all the grams and" print " associated stats (count of gram, percent of total). Can be directly and" print " safely piped into mysql:" - print " mysql -u USER -pPass -D gram_db < ./gramificate.py 2 input.txt" + print " ./gramificate.py 2 input.txt | mysql -u USER -pPass -D gram_db" + print " and you will end up with a table \"input_txt_2\" with the results" exit() # generate ngrams grams = NGramSet(sys.argv[2], int(sys.argv[1])) grams.process() -print grams.gram_set +grams.print_mysql()