now accepts multiple files at once so a whole directory can be processed on one call

This commit is contained in:
Dan Ballard 2013-01-11 08:05:35 -05:00
parent bc6847ea67
commit 0eea55bcad
1 changed files with 9 additions and 4 deletions

View File

@ -55,7 +55,7 @@ class NGramSet:
print "INSERT INTO %s VALUES (\"%s\", %d, %f);" % (table_name, key, self.gram_set[key]['count'], self.gram_set[key]['percent']) print "INSERT INTO %s VALUES (\"%s\", %d, %f);" % (table_name, key, self.gram_set[key]['count'], self.gram_set[key]['percent'])
if len(sys.argv) < 3: if len(sys.argv) < 3:
print "Usage: gramificate.py [N] [FILE]" print "Usage: gramificate.py [N] [FILE]*"
print " N Gram size" print " N Gram size"
print " FILE Filename" print " FILE Filename"
print "Output: Mysql commands to create a table FILE_N that contains all the grams and" print "Output: Mysql commands to create a table FILE_N that contains all the grams and"
@ -63,10 +63,15 @@ if len(sys.argv) < 3:
print " safely piped into mysql:" print " safely piped into mysql:"
print " ./gramificate.py 2 input.txt | mysql -u USER -pPass -D gram_db" print " ./gramificate.py 2 input.txt | mysql -u USER -pPass -D gram_db"
print " and you will end up with a table \"input_txt_2\" with the results" print " and you will end up with a table \"input_txt_2\" with the results"
print "Also accepts multiple files, so usage on test docs can be done as follows:"
print "./gramificate.py 1 test_docs/* | mysql -u USER -pPass -D gram_db"
exit() exit()
# generate ngrams # generate ngrams
grams = NGramSet(sys.argv[2], int(sys.argv[1])) gram_size = int(sys.argv[1])
for i in range(2,len(sys.argv)):
grams = NGramSet(sys.argv[i], int(sys.argv[1]))
grams.process() grams.process()
grams.print_mysql() grams.print_mysql()