Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
SegmentMerger.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef SEGMENTMERGER_H
8 #define SEGMENTMERGER_H
9 
10 #include "LuceneObject.h"
11 
12 namespace Lucene {
13 
20 class SegmentMerger : public LuceneObject {
21 public:
22  SegmentMerger(const DirectoryPtr& dir, const String& name);
23  SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge);
24  virtual ~SegmentMerger();
25 
27 
28 protected:
30  String segment;
32 
35 
36  int32_t mergedDocs;
38 
42 
44  static const int32_t MAX_RAW_MERGE_DOCS;
45 
49 
52 
53  ByteArray payloadBuffer;
56 
57 public:
59  static const uint8_t NORMS_HEADER[];
60  static const int32_t NORMS_HEADER_LENGTH;
61 
62 public:
63  bool hasProx();
64 
66  void add(const IndexReaderPtr& reader);
67 
70  IndexReaderPtr segmentReader(int32_t i);
71 
74  int32_t merge();
75 
79  int32_t merge(bool mergeDocStores);
80 
82  void closeReaders();
83 
85  HashSet<String> createCompoundFile(const String& fileName);
86 
88  int32_t mergeFields();
89 
92 
93 protected:
94  void addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet<String> names, bool storeTermVectors,
95  bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads,
96  bool omitTFAndPositions);
97 
99  int32_t copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader);
100  int32_t copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader);
101 
103  void mergeVectors();
104 
105  void copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader);
106  void copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader);
107 
108  void mergeTerms();
109 
110  void mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer);
111 
117  int32_t appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection<SegmentMergeInfoPtr> smis, int32_t n);
118 
119  void mergeNorms();
120 };
121 
122 class CheckAbort : public LuceneObject {
123 public:
124  CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir);
125  virtual ~CheckAbort();
126 
128 
129 protected:
130  double workCount;
133 
134 public:
138  virtual void work(double units);
139 };
140 
141 class CheckAbortNull : public CheckAbort {
142 public:
143  CheckAbortNull();
144  virtual ~CheckAbortNull();
145 
147 
148 public:
150  virtual void work(double units);
151 };
152 
153 }
154 
155 #endif
Definition: SegmentMerger.h:122
OneMergePtr merge
Definition: SegmentMerger.h:131
CheckAbortPtr checkAbort
Definition: SegmentMerger.h:37
virtual void work(double units)
do nothing
Collection< SegmentReaderPtr > matchingSegmentReaders
Definition: SegmentMerger.h:46
int32_t mergedDocs
Definition: SegmentMerger.h:36
boost::shared_ptr< FieldsWriter > FieldsWriterPtr
Definition: LuceneTypes.h:133
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
void copyVectorsWithDeletions(const TermVectorsWriterPtr &termVectorsWriter, const TermVectorsReaderPtr &matchingVectorsReader, const IndexReaderPtr &reader)
void mergeTermInfos(const FormatPostingsFieldsConsumerPtr &consumer)
SegmentMerger(const DirectoryPtr &dir, const String &name)
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
int32_t appendPostings(const FormatPostingsTermsConsumerPtr &termsConsumer, Collection< SegmentMergeInfoPtr > smis, int32_t n)
Process postings from multiple segments all positioned on the same term. Writes out merged entries in...
virtual void work(double units)
Records the fact that roughly units amount of work have been done since this method was last called...
static const int32_t MAX_RAW_MERGE_DOCS
Maximum number of contiguous documents to bulk-copy when merging stored fields.
Definition: SegmentMerger.h:44
boost::weak_ptr< Directory > DirectoryWeakPtr
Definition: LuceneTypes.h:489
virtual ~CheckAbort()
void copyVectorsNoDeletions(const TermVectorsWriterPtr &termVectorsWriter, const TermVectorsReaderPtr &matchingVectorsReader, const IndexReaderPtr &reader)
String segment
Definition: SegmentMerger.h:30
static const int32_t NORMS_HEADER_LENGTH
Definition: SegmentMerger.h:60
ByteArray payloadBuffer
Definition: SegmentMerger.h:53
int32_t merge()
Merges the readers specified by the add method into the directory passed to the constructor.
bool omitTermFreqAndPositions
Definition: SegmentMerger.h:51
boost::shared_ptr< TermVectorsWriter > TermVectorsWriterPtr
Definition: LuceneTypes.h:263
void mergeVectors()
Merge the TermVectors from each of the segments into the new one.
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< FormatPostingsFieldsConsumer > FormatPostingsFieldsConsumerPtr
Definition: LuceneTypes.h:141
SegmentMergeQueuePtr queue
Definition: SegmentMerger.h:50
Base class for all Lucene classes.
Definition: LuceneObject.h:31
boost::shared_ptr< CheckAbort > CheckAbortPtr
Definition: LuceneTypes.h:94
int32_t termIndexInterval
Definition: SegmentMerger.h:31
Definition: SegmentMerger.h:141
int32_t copyFieldsNoDeletions(const FieldsWriterPtr &fieldsWriter, const IndexReaderPtr &reader, const FieldsReaderPtr &matchingFieldsReader)
void setMatchingSegmentReaders()
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Collection< int32_t > rawDocLengths2
Definition: SegmentMerger.h:48
Definition: AbstractAllTermDocs.h:12
HashSet< String > getMergedFiles()
Collection< int32_t > delCounts
Definition: SegmentMerger.h:55
void closeReaders()
close all IndexReaders that have been added. Should not be called before merge(). ...
void addIndexed(const IndexReaderPtr &reader, const FieldInfosPtr &fInfos, HashSet< String > names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
The SegmentMerger class combines two or more Segments, represented by an IndexReader (add...
Definition: SegmentMerger.h:20
DirectoryWeakPtr _dir
Definition: SegmentMerger.h:132
bool mergeDocStores
Whether we should merge doc stores (stored fields and vectors files). When all segments we are mergin...
Definition: SegmentMerger.h:41
DirectoryPtr directory
Definition: SegmentMerger.h:26
boost::shared_ptr< TermVectorsReader > TermVectorsReaderPtr
Definition: LuceneTypes.h:256
Collection< Collection< int32_t > > getDocMaps()
Collection< Collection< int32_t > > docMaps
Definition: SegmentMerger.h:54
HashSet< String > createCompoundFile(const String &fileName)
boost::shared_ptr< SegmentMergeQueue > SegmentMergeQueuePtr
Definition: LuceneTypes.h:213
static const uint8_t NORMS_HEADER[]
norms header placeholder
Definition: SegmentMerger.h:59
boost::shared_ptr< FormatPostingsTermsConsumer > FormatPostingsTermsConsumerPtr
Definition: LuceneTypes.h:145
IndexReaderPtr segmentReader(int32_t i)
Collection< int32_t > rawDocLengths
Definition: SegmentMerger.h:47
boost::shared_ptr< FieldsReader > FieldsReaderPtr
Definition: LuceneTypes.h:131
Collection< int32_t > getDelCounts()
CheckAbort(const OneMergePtr &merge, const DirectoryPtr &dir)
void add(const IndexReaderPtr &reader)
Add an IndexReader to the collection of readers that are to be merged.
int32_t copyFieldsWithDeletions(const FieldsWriterPtr &fieldsWriter, const IndexReaderPtr &reader, const FieldsReaderPtr &matchingFieldsReader)
Collection< IndexReaderPtr > readers
Definition: SegmentMerger.h:33
FieldInfosPtr fieldInfos
Definition: SegmentMerger.h:34
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition: LuceneTypes.h:127
double workCount
Definition: SegmentMerger.h:127

clucene.sourceforge.net