Drizzled Public API Documentation

key_field.cc
1 /* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008-2009 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <config.h>
22 
23 #include <drizzled/sql_select.h>
24 #include <drizzled/nested_join.h>
25 #include <drizzled/item/cmpfunc.h>
26 #include <drizzled/table.h>
27 #include <drizzled/optimizer/key_field.h>
28 #include <drizzled/optimizer/key_use.h>
29 #include <drizzled/sql_lex.h>
30 #include <drizzled/item/subselect.h>
31 
32 #include <vector>
33 
34 using namespace std;
35 
36 namespace drizzled
37 {
38 
39 void optimizer::add_key_part(DYNAMIC_ARRAY *keyuse_array,
40  optimizer::KeyField *key_field)
41 {
42  Field *field= key_field->getField();
43  Table *form= field->getTable();
44 
45  if (key_field->isEqualityCondition() &&
46  ! (key_field->getOptimizeFlags() & KEY_OPTIMIZE_EXISTS))
47  {
48  for (uint32_t key= 0; key < form->sizeKeys(); key++)
49  {
50  if (! (form->keys_in_use_for_query.test(key)))
51  continue;
52 
53  uint32_t key_parts= (uint32_t) form->key_info[key].key_parts;
54  for (uint32_t part= 0; part < key_parts; part++)
55  {
56  if (field->eq(form->key_info[key].key_part[part].field))
57  {
58  optimizer::KeyUse keyuse(field->getTable(),
59  key_field->getValue(),
60  key_field->getValue()->used_tables(),
61  key,
62  part,
63  key_field->getOptimizeFlags() & KEY_OPTIMIZE_REF_OR_NULL,
64  1 << part,
65  0,
66  key_field->rejectNullValues(),
67  key_field->getConditionalGuard());
68  keyuse_array->push_back(&keyuse);
69  }
70  }
71  }
72  }
73 }
74 
75 void optimizer::add_key_fields_for_nj(Join *join,
76  TableList *nested_join_table,
77  optimizer::KeyField **end,
78  uint32_t *and_level,
79  vector<optimizer::SargableParam> &sargables)
80 {
81  List<TableList>::iterator li(nested_join_table->getNestedJoin()->join_list.begin());
82  List<TableList>::iterator li2(nested_join_table->getNestedJoin()->join_list.begin());
83  bool have_another= false;
84  table_map tables= 0;
85  TableList *table;
86  assert(nested_join_table->getNestedJoin());
87 
88  while ((table= li++) || (have_another && (li=li2, have_another=false,
89  (table= li++))))
90  {
91  if (table->getNestedJoin())
92  {
93  if (! table->on_expr)
94  {
95  /* It's a semi-join nest. Walk into it as if it wasn't a nest */
96  have_another= true;
97  li2= li;
98  li= List<TableList>::iterator(table->getNestedJoin()->join_list.begin());
99  }
100  else
101  add_key_fields_for_nj(join, table, end, and_level, sargables);
102  }
103  else
104  if (! table->on_expr)
105  tables|= table->table->map;
106  }
107  if (nested_join_table->on_expr)
108  {
109  add_key_fields(join,
110  end,
111  and_level,
112  nested_join_table->on_expr,
113  tables,
114  sargables);
115  }
116 }
117 
118 optimizer::KeyField *optimizer::merge_key_fields(optimizer::KeyField *start,
119  optimizer::KeyField *new_fields,
120  optimizer::KeyField *end,
121  uint32_t and_level)
122 {
123  if (start == new_fields)
124  return start; // Impossible or
125  if (new_fields == end)
126  return start; // No new fields, skip all
127 
128  optimizer::KeyField *first_free= new_fields;
129 
130  /* Mark all found fields in old array */
131  for (; new_fields != end; new_fields++)
132  {
133  for (optimizer::KeyField *old= start; old != first_free; old++)
134  {
135  if (old->getField() == new_fields->getField())
136  {
137  /*
138  NOTE: below const_item() call really works as "!used_tables()", i.e.
139  it can return false where it is feasible to make it return true.
140 
141  The cause is as follows: Some of the tables are already known to be
142  const tables (the detection code is in make_join_statistics(),
143  above the update_ref_and_keys() call), but we didn't propagate
144  information about this: Table::const_table is not set to true, and
145  Item::update_used_tables() hasn't been called for each item.
146  The result of this is that we're missing some 'ref' accesses.
147  TODO: OptimizerTeam: Fix this
148  */
149  if (! new_fields->getValue()->const_item())
150  {
151  /*
152  If the value matches, we can use the key reference.
153  If not, we keep it until we have examined all new values
154  */
155  if (old->getValue()->eq(new_fields->getValue(), old->getField()->binary()))
156  {
157  old->setLevel(and_level);
158  old->setOptimizeFlags(((old->getOptimizeFlags() &
159  new_fields->getOptimizeFlags() &
160  KEY_OPTIMIZE_EXISTS) |
161  ((old->getOptimizeFlags() |
162  new_fields->getOptimizeFlags()) &
163  KEY_OPTIMIZE_REF_OR_NULL)));
164  old->setRejectNullValues(old->rejectNullValues() &&
165  new_fields->rejectNullValues());
166  }
167  }
168  else if (old->isEqualityCondition() &&
169  new_fields->isEqualityCondition() &&
170  old->getValue()->eq_by_collation(new_fields->getValue(),
171  old->getField()->binary(),
172  old->getField()->charset()))
173 
174  {
175  old->setLevel(and_level);
176  old->setOptimizeFlags(((old->getOptimizeFlags() &
177  new_fields->getOptimizeFlags() &
178  KEY_OPTIMIZE_EXISTS) |
179  ((old->getOptimizeFlags() |
180  new_fields->getOptimizeFlags()) &
181  KEY_OPTIMIZE_REF_OR_NULL)));
182  old->setRejectNullValues(old->rejectNullValues() &&
183  new_fields->rejectNullValues());
184  }
185  else if (old->isEqualityCondition() &&
186  new_fields->isEqualityCondition() &&
187  ((old->getValue()->const_item() &&
188  old->getValue()->is_null()) ||
189  new_fields->getValue()->is_null()))
190  {
191  /* field = expression OR field IS NULL */
192  old->setLevel(and_level);
193  old->setOptimizeFlags(KEY_OPTIMIZE_REF_OR_NULL);
194  /*
195  Remember the NOT NULL value unless the value does not depend
196  on other tables.
197  */
198  if (! old->getValue()->used_tables() &&
199  old->getValue()->is_null())
200  {
201  old->setValue(new_fields->getValue());
202  }
203  /* The referred expression can be NULL: */
204  old->setRejectNullValues(false);
205  }
206  else
207  {
208  /*
209  We are comparing two different const. In this case we can't
210  use a key-lookup on this so it's better to remove the value
211  and let the range optimzier handle it
212  */
213  if (old == --first_free) // If last item
214  break;
215  *old= *first_free; // Remove old value
216  old--; // Retry this value
217  }
218  }
219  }
220  }
221  /* Remove all not used items */
222  for (optimizer::KeyField *old= start; old != first_free;)
223  {
224  if (old->getLevel() != and_level)
225  { // Not used in all levels
226  if (old == --first_free)
227  break;
228  *old= *first_free; // Remove old value
229  continue;
230  }
231  old++;
232  }
233  return first_free;
234 }
235 
236 void optimizer::add_key_field(optimizer::KeyField **key_fields,
237  uint32_t and_level,
238  Item_func *cond,
239  Field *field,
240  bool eq_func,
241  Item **value,
242  uint32_t num_values,
243  table_map usable_tables,
244  vector<optimizer::SargableParam> &sargables)
245 {
246  uint32_t exists_optimize= 0;
247  if (! (field->flags & PART_KEY_FLAG))
248  {
249  // Don't remove column IS NULL on a LEFT JOIN table
250  if (! eq_func || (*value)->type() != Item::NULL_ITEM ||
251  ! field->getTable()->maybe_null || field->null_ptr)
252  return; // Not a key. Skip it
253  exists_optimize= KEY_OPTIMIZE_EXISTS;
254  assert(num_values == 1);
255  }
256  else
257  {
258  table_map used_tables= 0;
259  bool optimizable= 0;
260  for (uint32_t i= 0; i < num_values; i++)
261  {
262  used_tables|= (value[i])->used_tables();
263  if (! ((value[i])->used_tables() & (field->getTable()->map | RAND_TABLE_BIT)))
264  optimizable= 1;
265  }
266  if (! optimizable)
267  return;
268  if (! (usable_tables & field->getTable()->map))
269  {
270  if (! eq_func || (*value)->type() != Item::NULL_ITEM ||
271  ! field->getTable()->maybe_null || field->null_ptr)
272  return; // Can't use left join optimize
273  exists_optimize= KEY_OPTIMIZE_EXISTS;
274  }
275  else
276  {
277  JoinTable *stat= field->getTable()->reginfo.join_tab;
278  key_map possible_keys= field->key_start;
279  possible_keys&= field->getTable()->keys_in_use_for_query;
280  stat[0].keys|= possible_keys; // Add possible keys
281 
282  /*
283  Save the following cases:
284  Field op constant
285  Field LIKE constant where constant doesn't start with a wildcard
286  Field = field2 where field2 is in a different table
287  Field op formula
288  Field IS NULL
289  Field IS NOT NULL
290  Field BETWEEN ...
291  Field IN ...
292  */
293  stat[0].key_dependent|= used_tables;
294 
295  bool is_const= 1;
296  for (uint32_t i= 0; i < num_values; i++)
297  {
298  if (! (is_const&= value[i]->const_item()))
299  break;
300  }
301  if (is_const)
302  stat[0].const_keys|= possible_keys;
303  else if (! eq_func)
304  {
305  /*
306  Save info to be able check whether this predicate can be
307  considered as sargable for range analisis after reading const tables.
308  We do not save info about equalities as update_const_equal_items
309  will take care of updating info on keys from sargable equalities.
310  */
311  optimizer::SargableParam tmp(field, value, num_values);
312  sargables.push_back(tmp);
313  }
314  /*
315  We can't always use indexes when comparing a string index to a
316  number. cmp_type() is checked to allow compare of dates to numbers.
317  eq_func is NEVER true when num_values > 1
318  */
319  if (! eq_func)
320  {
321  /*
322  Additional optimization: if we're processing
323  "t.key BETWEEN c1 AND c1" then proceed as if we were processing
324  "t.key = c1".
325  TODO: This is a very limited fix. A more generic fix is possible.
326  There are 2 options:
327  A) Make equality propagation code be able to handle BETWEEN
328  (including cases like t1.key BETWEEN t2.key AND t3.key)
329  B) Make range optimizer to infer additional "t.key = c" equalities
330  and use them in equality propagation process (see details in
331  OptimizerKBAndTodo)
332  */
333  if ((cond->functype() != Item_func::BETWEEN) ||
334  ((Item_func_between*) cond)->negated ||
335  ! value[0]->eq(value[1], field->binary()))
336  return;
337  eq_func= true;
338  }
339 
340  if (field->result_type() == STRING_RESULT)
341  {
342  if ((*value)->result_type() != STRING_RESULT)
343  {
344  if (field->cmp_type() != (*value)->result_type())
345  return;
346  }
347  else
348  {
349  /*
350  We can't use indexes if the effective collation
351  of the operation differ from the field collation.
352  */
353  if (field->cmp_type() == STRING_RESULT &&
354  ((Field_str*)field)->charset() != cond->compare_collation())
355  return;
356  }
357  }
358  }
359  }
360  /*
361  For the moment eq_func is always true. This slot is reserved for future
362  extensions where we want to remembers other things than just eq comparisons
363  */
364  assert(eq_func);
365  /* Store possible eq field */
366  (*key_fields)->setField(field);
367  (*key_fields)->setEqualityConditionUsed(eq_func);
368  (*key_fields)->setValue(*value);
369  (*key_fields)->setLevel(and_level);
370  (*key_fields)->setOptimizeFlags(exists_optimize);
371  /*
372  If the condition has form "tbl.keypart = othertbl.field" and
373  othertbl.field can be NULL, there will be no matches if othertbl.field
374  has NULL value.
375  We use null_rejecting in add_not_null_conds() to add
376  'othertbl.field IS NOT NULL' to tab->select_cond.
377  */
378  (*key_fields)->setRejectNullValues((cond->functype() == Item_func::EQ_FUNC ||
379  cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
380  ((*value)->type() == Item::FIELD_ITEM) &&
381  ((Item_field*)*value)->field->maybe_null());
382  (*key_fields)->setConditionalGuard(NULL);
383  (*key_fields)++;
384 }
385 
386 void optimizer::add_key_equal_fields(optimizer::KeyField **key_fields,
387  uint32_t and_level,
388  Item_func *cond,
389  Item_field *field_item,
390  bool eq_func,
391  Item **val,
392  uint32_t num_values,
393  table_map usable_tables,
394  vector<optimizer::SargableParam> &sargables)
395 {
396  Field *field= field_item->field;
397  add_key_field(key_fields, and_level, cond, field,
398  eq_func, val, num_values, usable_tables, sargables);
399  Item_equal *item_equal= field_item->item_equal;
400  if (item_equal)
401  {
402  /*
403  Add to the set of possible key values every substitution of
404  the field for an equal field included into item_equal
405  */
406  Item_equal_iterator it(item_equal->begin());
407  Item_field *item;
408  while ((item= it++))
409  {
410  if (! field->eq(item->field))
411  {
412  add_key_field(key_fields, and_level, cond, item->field,
413  eq_func, val, num_values, usable_tables,
414  sargables);
415  }
416  }
417  }
418 }
419 
420 void optimizer::add_key_fields(Join *join,
421  optimizer::KeyField **key_fields,
422  uint32_t *and_level,
423  COND *cond,
424  table_map usable_tables,
425  vector<optimizer::SargableParam> &sargables)
426 {
427  if (cond->type() == Item_func::COND_ITEM)
428  {
429  List<Item>::iterator li(((Item_cond*) cond)->argument_list()->begin());
430  optimizer::KeyField *org_key_fields= *key_fields;
431 
432  if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
433  {
434  Item *item;
435  while ((item= li++))
436  {
437  add_key_fields(join,
438  key_fields,
439  and_level,
440  item,
441  usable_tables,
442  sargables);
443  }
444  for (; org_key_fields != *key_fields; org_key_fields++)
445  org_key_fields->setLevel(*and_level);
446  }
447  else
448  {
449  (*and_level)++;
450  add_key_fields(join,
451  key_fields,
452  and_level,
453  li++,
454  usable_tables,
455  sargables);
456  Item *item;
457  while ((item= li++))
458  {
459  optimizer::KeyField *start_key_fields= *key_fields;
460  (*and_level)++;
461  add_key_fields(join,
462  key_fields,
463  and_level,
464  item,
465  usable_tables,
466  sargables);
467  *key_fields= merge_key_fields(org_key_fields, start_key_fields,
468  *key_fields, ++(*and_level));
469  }
470  }
471  return;
472  }
473 
474  /*
475  Subquery optimization: Conditions that are pushed down into subqueries
476  are wrapped into Item_func_trig_cond. We process the wrapped condition
477  but need to set cond_guard for KeyUse elements generated from it.
478  */
479  {
480  if (cond->type() == Item::FUNC_ITEM &&
481  ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
482  {
483  Item *cond_arg= ((Item_func*)cond)->arguments()[0];
484  if (! join->group_list &&
485  ! join->order &&
486  join->unit->item &&
487  join->unit->item->substype() == Item_subselect::IN_SUBS &&
488  ! join->unit->is_union())
489  {
490  optimizer::KeyField *save= *key_fields;
491  add_key_fields(join,
492  key_fields,
493  and_level,
494  cond_arg,
495  usable_tables,
496  sargables);
497  /* Indicate that this ref access candidate is for subquery lookup */
498  for (; save != *key_fields; save++)
499  save->setConditionalGuard(((Item_func_trig_cond*)cond)->get_trig_var());
500  }
501  return;
502  }
503  }
504 
505  /* If item is of type 'field op field/constant' add it to key_fields */
506  if (cond->type() != Item::FUNC_ITEM)
507  return;
508  Item_func *cond_func= (Item_func*) cond;
509  switch (cond_func->select_optimize())
510  {
511  case Item_func::OPTIMIZE_NONE:
512  break;
513  case Item_func::OPTIMIZE_KEY:
514  {
515  Item **values;
516  // BETWEEN, IN, NE
517  if (cond_func->key_item()->real_item()->type() == Item::FIELD_ITEM &&
518  ! (cond_func->used_tables() & OUTER_REF_TABLE_BIT))
519  {
520  values= cond_func->arguments() + 1;
521  if (cond_func->functype() == Item_func::NE_FUNC &&
522  cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM &&
523  ! (cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT))
524  values--;
525  assert(cond_func->functype() != Item_func::IN_FUNC ||
526  cond_func->argument_count() != 2);
527  add_key_equal_fields(key_fields, *and_level, cond_func,
528  (Item_field*) (cond_func->key_item()->real_item()),
529  0, values,
530  cond_func->argument_count()-1,
531  usable_tables, sargables);
532  }
533  if (cond_func->functype() == Item_func::BETWEEN)
534  {
535  values= cond_func->arguments();
536  for (uint32_t i= 1 ; i < cond_func->argument_count(); i++)
537  {
538  Item_field *field_item;
539  if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM
540  &&
541  ! (cond_func->arguments()[i]->used_tables() & OUTER_REF_TABLE_BIT))
542  {
543  field_item= (Item_field *) (cond_func->arguments()[i]->real_item());
544  add_key_equal_fields(key_fields, *and_level, cond_func,
545  field_item, 0, values, 1, usable_tables,
546  sargables);
547  }
548  }
549  }
550  break;
551  }
552  case Item_func::OPTIMIZE_OP:
553  {
554  bool equal_func= (cond_func->functype() == Item_func::EQ_FUNC ||
555  cond_func->functype() == Item_func::EQUAL_FUNC);
556 
557  if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM &&
558  ! (cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT))
559  {
560  add_key_equal_fields(key_fields, *and_level, cond_func,
561  (Item_field*) (cond_func->arguments()[0])->real_item(),
562  equal_func,
563  cond_func->arguments()+1, 1, usable_tables,
564  sargables);
565  }
566  if (cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM &&
567  cond_func->functype() != Item_func::LIKE_FUNC &&
568  ! (cond_func->arguments()[1]->used_tables() & OUTER_REF_TABLE_BIT))
569  {
570  add_key_equal_fields(key_fields, *and_level, cond_func,
571  (Item_field*) (cond_func->arguments()[1])->real_item(), equal_func,
572  cond_func->arguments(),1,usable_tables,
573  sargables);
574  }
575  break;
576  }
577  case Item_func::OPTIMIZE_NULL:
578  /* column_name IS [NOT] NULL */
579  if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM &&
580  ! (cond_func->used_tables() & OUTER_REF_TABLE_BIT))
581  {
582  Item *tmp= new Item_null;
583  if (unlikely(! tmp)) // Should never be true
584  return;
585  add_key_equal_fields(key_fields, *and_level, cond_func,
586  (Item_field*) (cond_func->arguments()[0])->real_item(),
587  cond_func->functype() == Item_func::ISNULL_FUNC,
588  &tmp, 1, usable_tables, sargables);
589  }
590  break;
591  case Item_func::OPTIMIZE_EQUAL:
592  Item_equal *item_equal= (Item_equal *) cond;
593  Item *const_item= item_equal->get_const();
594  Item_equal_iterator it(item_equal->begin());
595  Item_field *item;
596  if (const_item)
597  {
598  /*
599  For each field field1 from item_equal consider the equality
600  field1=const_item as a condition allowing an index access of the table
601  with field1 by the keys value of field1.
602  */
603  while ((item= it++))
604  {
605  add_key_field(key_fields, *and_level, cond_func, item->field,
606  true, &const_item, 1, usable_tables, sargables);
607  }
608  }
609  else
610  {
611  /*
612  Consider all pairs of different fields included into item_equal.
613  For each of them (field1, field1) consider the equality
614  field1=field2 as a condition allowing an index access of the table
615  with field1 by the keys value of field2.
616  */
617  Item_equal_iterator fi(item_equal->begin());
618  while ((item= fi++))
619  {
620  Field *field= item->field;
621  while ((item= it++))
622  {
623  if (! field->eq(item->field))
624  {
625  add_key_field(key_fields, *and_level, cond_func, field,
626  true, (Item **) &item, 1, usable_tables,
627  sargables);
628  }
629  }
630  it= item_equal->begin();
631  }
632  }
633  break;
634  }
635 }
636 
637 } /* namespace drizzled */