presage  0.9.1
databaseConnector.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "databaseConnector.h"
26 
27 #include "../../core/utility.h"
28 
29 #include <list>
30 #include <sstream>
31 #include <stdlib.h>
32 #include <assert.h>
33 
34 DatabaseConnector::DatabaseConnector(const std::string database_name,
35  const size_t cardinality,
36  const bool read_write)
37  : logger("DatabaseConnector", std::cerr)
38 {
39  set_database_filename (database_name);
40  set_cardinality (cardinality);
41  set_read_write_mode (read_write);
42 }
43 
44 DatabaseConnector::DatabaseConnector(const std::string database_name,
45  const size_t cardinality,
46  const bool read_write,
47  const std::string& log_level)
48  : logger("DatabaseConnector", std::cerr, log_level)
49 {
50  set_database_filename (database_name);
51  set_cardinality (cardinality);
52  set_read_write_mode (read_write);
53 }
54 
56 {}
57 
58 void DatabaseConnector::createNgramTable(const size_t n) const
59 {
60  if (n > 0) {
61  std::stringstream query;
62  std::stringstream unique;
63  query << "CREATE TABLE";
64 // This #ifdef does not belong here, but unfortunately SQLite 2.x does
65 // not support the IF NOT EXISTS SQL clause.
66 #ifndef HAVE_SQLITE_H
67  query << " IF NOT EXISTS";
68 #endif
69  query << " _" << n << "_gram (";
70  for (int i = n - 1; i >= 0; i--) {
71  if (i != 0) {
72  unique << "word_" << i << ", ";
73  query << "word_" << i << " TEXT, ";
74  } else {
75  unique << "word";
76  query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );";
77  }
78  }
79 
80  executeSql(query.str());
81  } else {
82  // TODO
83  // throw exception
84  }
85 }
86 
88 {
89  std::string query = "SELECT SUM(count) FROM _1_gram;";
90 
91  NgramTable result = executeSql(query);
92 
93  logger << DEBUG << "NgramTable:";
94  for (size_t i = 0; i < result.size(); i++) {
95  for (size_t j = 0; j < result[i].size(); j++) {
96  logger << DEBUG << result[i][j] << '\t';
97  }
98  logger << DEBUG << endl;
99  }
100 
101  return extractFirstInteger(result);
102 }
103 
105 {
106  std::stringstream query;
107  query << "SELECT count "
108  << "FROM _" << ngram.size() << "_gram"
109  << buildWhereClause(ngram) << ";";
110 
111  NgramTable result = executeSql(query.str());
112 
113  logger << DEBUG << "NgramTable:";
114  for (size_t i = 0; i < result.size(); i++) {
115  for (size_t j = 0; j < result[i].size(); j++) {
116  logger << DEBUG << result[i][j] << '\t';
117  }
118  logger << DEBUG << endl;
119  }
120 
121  return extractFirstInteger(result);
122 }
123 
125 {
126  std::stringstream query;
127  query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
128  << "FROM _" << ngram.size() << "_gram"
129  << buildWhereLikeClause(ngram)
130  << " ORDER BY count DESC";
131  if (limit < 0) {
132  query << ";";
133  } else {
134  query << " LIMIT " << limit << ';';
135  }
136 
137  return executeSql(query.str());
138 }
139 
140 NgramTable DatabaseConnector::getNgramLikeTableFiltered(const Ngram ngram, const char** filter, int limit) const
141 {
142  std::stringstream query;
143  query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
144  << "FROM _" << ngram.size() << "_gram"
145  << buildWhereLikeClauseFiltered(ngram,filter)
146  << " ORDER BY count DESC";
147  if (limit < 0) {
148  query << ";";
149  } else {
150  query << " LIMIT " << limit << ';';
151  }
152 
153  return executeSql(query.str());
154 }
155 
157 {
158  int count = getNgramCount(ngram);
159 
160  if (count > 0) {
161  // the ngram was found in the database
162  updateNgram(ngram, ++count);
163 
164  logger << DEBUG << "Updated ngram to " << count << endl;
165 
166  } else {
167  // the ngram was not found in the database
168  count = 1;
169  insertNgram(ngram, count);
170 
171  logger << DEBUG << "Inserted ngram" << endl;
172 
173  }
174  return count;
175 }
176 
177 void DatabaseConnector::removeNgram(const Ngram ngram) const
178 {}
179 
180 void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const
181 {
182  std::stringstream query;
183 
184  query << "INSERT INTO _" << ngram.size() << "_gram "
185  << buildValuesClause(ngram, count)
186  << ";";
187 
188  executeSql(query.str());
189 }
190 
191 void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const
192 {
193  std::stringstream query;
194 
195  query << "UPDATE _" << ngram.size() << "_gram "
196  << "SET count = " << count
197  << buildWhereClause(ngram) << ";";
198 
199  executeSql(query.str());
200 }
201 
202 std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const
203 {
204  std::stringstream where_clause;
205  where_clause << " WHERE";
206  for (size_t i = 0; i < ngram.size(); i++) {
207  if (i < ngram.size() - 1) {
208  where_clause << " word_" << ngram.size() - i - 1 << " = '"
209  << sanitizeString(ngram[i]) << "' AND";
210  } else {
211  where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'";
212  }
213  }
214  return where_clause.str();
215 }
216 
217 // TODO REVISIT refactor: this is same as buildWhereClause, except for
218 // "word = " instead of "word LIKE "
219 std::string DatabaseConnector::buildWhereLikeClause(const Ngram ngram) const
220 {
221  std::stringstream where_clause;
222  where_clause << " WHERE";
223  for (size_t i = 0; i < ngram.size(); i++) {
224  if (i < ngram.size() - 1) {
225  where_clause << " word_" << ngram.size() - i - 1 << " = '"
226  << sanitizeString(ngram[i]) << "' AND";
227  } else {
228  where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
229  }
230  }
231  return where_clause.str();
232 }
233 
234 std::string DatabaseConnector::buildWhereLikeClauseFiltered(const Ngram ngram, const char** filter) const
235 {
236  std::stringstream where_clause;
237  where_clause << " WHERE";
238  for (size_t i = 0; i < ngram.size(); i++) {
239  if (i < ngram.size() - 1) {
240  where_clause << " word_" << ngram.size() - i - 1 << " = '"
241  << sanitizeString(ngram[i]) << "' AND";
242  } else {
243  if(filter == 0)
244  where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
245  else {
246  std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]);
247  where_clause << " (";
248  for (int j = 0; filter[j] != 0; j++) {
249 // for(size_t j=0; j < filter.size()-1; j++)
250  if (j) {
251  where_clause << " OR ";
252  }
253  where_clause << " word LIKE '" << true_prefix << filter[j] << "%'";
254  }
255 // where_clause << " word LIKE '" << true_prefix <<"%' )";
256  where_clause << ')';
257  }
258  }
259  }
260  return where_clause.str();
261 }
262 
263 
264 std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const
265 {
266  assert(cardinality > 0);
267 
268  std::stringstream result;
269  for (int i = cardinality - 1; i >= 0; i--) {
270  if (i != 0) {
271  result << "word_" << i << ", ";
272  } else {
273  result << "word, count";
274  }
275  }
276 
277  return result.str();
278 }
279 
280 std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const
281 {
282  std::stringstream values_clause;
283  values_clause << "VALUES(";
284  for (size_t i = 0; i < ngram.size(); i++) {
285  if (i < ngram.size() - 1) {
286  values_clause << "'" << sanitizeString(ngram[i]) << "', ";
287  } else {
288  values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")";
289  }
290  }
291  return values_clause.str();
292 }
293 
294 std::string DatabaseConnector::sanitizeString(const std::string str) const
295 {
296  // Escape single quotes
297  std::size_t searchPos = 0;
298  std::size_t pos;
299  std::string result(str);
300  while((pos = result.find("'", searchPos)) != std::string::npos)
301  {
302  result.replace(pos, 1, "''");
303  searchPos = pos + 2;
304  }
305  return result;
306 }
307 
309 {
310  // Initialize count to zero and then check that we have at least
311  // an entry in the table of ngram counts returned by the
312  // executeSql() method. If so, convert it into an integer and
313  // return it.
314  //
315  // REVISIT: make conversion to integer more robust (strtol ??)
316  //
317  int count = 0;
318  if (table.size() > 0) {
319  if (table[0].size() > 0) {
320  count = atoi(table[0][0].c_str());
321  }
322  }
323 
324  logger << DEBUG << "table: ";
325  for (size_t i = 0; i < table.size(); i++) {
326  for (size_t j = 0; j < table[i].size(); j++) {
327  logger << DEBUG << table[i][j] << '\t';
328  }
329  logger << DEBUG << endl;
330  }
331 
332  return (count > 0 ? count : 0);
333 }
334 
336 {
337  executeSql("BEGIN TRANSACTION;");
338 }
339 
341 {
342  executeSql("END TRANSACTION;");
343 }
344 
346 {
347  executeSql("ROLLBACK TRANSACTION;");
348 }
349 
351 {
352  return database_filename;
353 }
354 
355 std::string DatabaseConnector::set_database_filename (const std::string& filename)
356 {
357  std::string prev_filename = database_filename;
358 
359  database_filename = expand_variables (filename);
360 
361  // make an attempt at determining whether directory where language
362  // model database is located exists and try to create it if it
363  // does not... only cater for one directory level to create it.
364  //
365  std::string dir = Utility::dirname (database_filename);
366  if (! dir.empty()) {
367  // check that specified directory exists and accessible
368  if (! Utility::is_directory_usable (dir)) {
369  // create it if not
371  }
372  }
373 
374  return prev_filename;
375 }
376 
377 std::string DatabaseConnector::expand_variables (std::string filepath) const
378 {
379  // scan the filepath for variables, which follow the same pattern
380  // as shell variables - strings enclosed in '${' and '}'
381  //
382  const std::string start_marker = "${";
383  const std::string end_marker = "}";
384 
385  std::list<std::string> variables;
386 
387  std::string::size_type pos_start = filepath.find (start_marker);
388  while (pos_start != std::string::npos)
389  {
390  std::string::size_type pos_end = filepath.find (end_marker, pos_start);
391  if (pos_end != std::string::npos) {
392  variables.push_back (filepath.substr(pos_start + start_marker.size(), pos_end - end_marker.size() - pos_start - 1));
393  }
394 
395  pos_start = filepath.find (start_marker, pos_end);
396  }
397 
398  for (std::list<std::string>::const_iterator it = variables.begin();
399  it != variables.end();
400  it++)
401  {
402  substitute_variable_in_string(*it, filepath);
403  }
404 
405  return filepath;
406 }
407 
408 void DatabaseConnector::substitute_variable_in_string (const std::string& variable_name, std::string& filepath) const
409 {
410  std::string variable_token = "${" + variable_name + "}";
411 
412  for (std::string::size_type pos = filepath.find (variable_token);
413  pos != std::string::npos;
414  pos = filepath.find (variable_token, pos))
415  {
416  const char* value = getenv(variable_name.c_str());
417  if (value)
418  {
419  filepath.replace (pos,
420  variable_token.size(),
421  value);
422  }
423  else
424  {
425  // handle "special" variables
426  if (variable_name == "HOME")
427  {
428  value = getenv("USERPROFILE");
429  if (value)
430  {
431  filepath.replace (pos,
432  variable_token.size(),
433  value);
434  }
435  }
436  else
437  {
438  // FIXME: maybe throw exception instead of leaving
439  // variable name in string?
440  //
441  filepath.replace (pos,
442  variable_token.size(),
443  variable_name);
444  }
445  }
446  }
447 }
448 
449 void DatabaseConnector::set_cardinality (const size_t card)
450 {
451  cardinality = card;
452 }
453 
455 {
456  return cardinality;
457 }
458 
459 void DatabaseConnector::set_read_write_mode (const bool read_write)
460 {
461  read_write_mode = read_write;
462 }
463 
465 {
466  return read_write_mode;
467 }
std::string get_database_filename() const
virtual void beginTransaction() const
std::string set_database_filename(const std::string &filename)
std::string buildSelectLikeClause(const int cardinality) const
NgramTable getNgramLikeTableFiltered(const Ngram ngram, const char **filter, int limit=-1) const
std::string buildWhereLikeClauseFiltered(const Ngram ngram, const char **filter) const
int getUnigramCountsSum() const
std::string buildWhereLikeClause(const Ngram ngram) const
size_t get_cardinality() const
void createNgramTable(const size_t cardinality) const
int getNgramCount(const Ngram ngram) const
void set_read_write_mode(const bool read_write)
void removeNgram(const Ngram ngram) const
std::string sanitizeString(const std::string) const
void set_cardinality(const size_t cardinality)
std::string database_filename
DatabaseConnector(const std::string database_name, const size_t cardinality, const bool read_write)
virtual void endTransaction() const
std::string buildValuesClause(const Ngram ngram, const int count) const
static void create_directory(const std::string &dir)
Definition: utility.cpp:330
void updateNgram(const Ngram ngram, const int count) const
virtual NgramTable executeSql(const std::string query) const =0
std::vector< Ngram > NgramTable
int extractFirstInteger(const NgramTable &) const
void insertNgram(const Ngram ngram, const int count) const
void substitute_variable_in_string(const std::string &variable_name, std::string &filepath) const
Logger< char > logger
bool get_read_write_mode() const
static std::string dirname(const std::string &)
Definition: utility.cpp:275
virtual void rollbackTransaction() const
Definition: ngram.h:33
int incrementNgramCount(const Ngram ngram) const
NgramTable getNgramLikeTable(const Ngram ngram, int limit=-1) const
std::string expand_variables(std::string filename) const
static bool is_directory_usable(const std::string &dir)
Definition: utility.cpp:307
std::string buildWhereClause(const Ngram ngram) const
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition: logger.h:278