From 0eea55bcad1d33d07061d6cd65688f6e923590bf Mon Sep 17 00:00:00 2001 From: Dan Ballard Date: Fri, 11 Jan 2013 08:05:35 -0500 Subject: [PATCH] now accepts multiple files at once so a whole directory can be processed on one call --- gramificate.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gramificate.py b/gramificate.py index b2974df..ae0e249 100755 --- a/gramificate.py +++ b/gramificate.py @@ -55,7 +55,7 @@ class NGramSet: print "INSERT INTO %s VALUES (\"%s\", %d, %f);" % (table_name, key, self.gram_set[key]['count'], self.gram_set[key]['percent']) if len(sys.argv) < 3: - print "Usage: gramificate.py [N] [FILE]" + print "Usage: gramificate.py [N] [FILE]*" print " N Gram size" print " FILE Filename" print "Output: Mysql commands to create a table FILE_N that contains all the grams and" @@ -63,11 +63,16 @@ if len(sys.argv) < 3: print " safely piped into mysql:" print " ./gramificate.py 2 input.txt | mysql -u USER -pPass -D gram_db" print " and you will end up with a table \"input_txt_2\" with the results" + print "Also accepts multiple files, so usage on test docs can be done as follows:" + print "./gramificate.py 1 test_docs/* | mysql -u USER -pPass -D gram_db" exit() # generate ngrams -grams = NGramSet(sys.argv[2], int(sys.argv[1])) -grams.process() -grams.print_mysql() +gram_size = int(sys.argv[1]) + +for i in range(2,len(sys.argv)): + grams = NGramSet(sys.argv[i], int(sys.argv[1])) + grams.process() + grams.print_mysql()