Lucene++ - a full-featured, c++ search engine
API Documentation


SegmentMerger.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef SEGMENTMERGER_H
8 #define SEGMENTMERGER_H
9 
10 #include "LuceneObject.h"
11 
12 namespace Lucene {
13 
20 class SegmentMerger : public LuceneObject {
21 public:
22  SegmentMerger(const DirectoryPtr& dir, const String& name);
23  SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge);
24  virtual ~SegmentMerger();
25 
27 
28 protected:
30  String segment;
32 
35 
36  int32_t mergedDocs;
38 
42 
44  static const int32_t MAX_RAW_MERGE_DOCS;
45 
49 
52 
53  ByteArray payloadBuffer;
56 
57 public:
59  static const uint8_t NORMS_HEADER[];
60  static const int32_t NORMS_HEADER_LENGTH;
61 
62 public:
63  bool hasProx();
64 
66  void add(const IndexReaderPtr& reader);
67 
70  IndexReaderPtr segmentReader(int32_t i);
71 
74  int32_t merge();
75 
79  int32_t merge(bool mergeDocStores);
80 
82  void closeReaders();
83 
85  HashSet<String> createCompoundFile(const String& fileName);
86 
88  int32_t mergeFields();
89 
92 
93 protected:
94  void addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet<String> names, bool storeTermVectors,
95  bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads,
96  bool omitTFAndPositions);
97 
99  int32_t copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader);
100  int32_t copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader);
101 
103  void mergeVectors();
104 
105  void copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader);
106  void copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader);
107 
108  void mergeTerms();
109 
110  void mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer);
111 
117  int32_t appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection<SegmentMergeInfoPtr> smis, int32_t n);
118 
119  void mergeNorms();
120 };
121 
122 class CheckAbort : public LuceneObject {
123 public:
124  CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir);
125  virtual ~CheckAbort();
126 
128 
129 protected:
130  double workCount;
133 
134 public:
138  virtual void work(double units);
139 };
140 
141 class CheckAbortNull : public CheckAbort {
142 public:
143  CheckAbortNull();
144  virtual ~CheckAbortNull();
145 
147 
148 public:
150  virtual void work(double units);
151 };
152 
153 }
154 
155 #endif
Definition: SegmentMerger.h:122
OneMergePtr merge
Definition: SegmentMerger.h:131
CheckAbortPtr checkAbort
Definition: SegmentMerger.h:37
Collection< SegmentReaderPtr > matchingSegmentReaders
Definition: SegmentMerger.h:46
int32_t mergedDocs
Definition: SegmentMerger.h:36
boost::shared_ptr< FieldsWriter > FieldsWriterPtr
Definition: LuceneTypes.h:133
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
void copyVectorsWithDeletions(const TermVectorsWriterPtr &termVectorsWriter, const TermVectorsReaderPtr &matchingVectorsReader, const IndexReaderPtr &reader)
void mergeTermInfos(const FormatPostingsFieldsConsumerPtr &consumer)
SegmentMerger(const DirectoryPtr &dir, const String &name)
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
int32_t appendPostings(const FormatPostingsTermsConsumerPtr &termsConsumer, Collection< SegmentMergeInfoPtr > smis, int32_t n)
Process postings from multiple segments all positioned on the same term. Writes out merged entries in...
static const int32_t MAX_RAW_MERGE_DOCS
Maximum number of contiguous documents to bulk-copy when merging stored fields.
Definition: SegmentMerger.h:44
boost::weak_ptr< Directory > DirectoryWeakPtr
Definition: LuceneTypes.h:489
void copyVectorsNoDeletions(const TermVectorsWriterPtr &termVectorsWriter, const TermVectorsReaderPtr &matchingVectorsReader, const IndexReaderPtr &reader)
String segment
Definition: SegmentMerger.h:30
static const int32_t NORMS_HEADER_LENGTH
Definition: SegmentMerger.h:60
ByteArray payloadBuffer
Definition: SegmentMerger.h:53
int32_t merge()
Merges the readers specified by the add method into the directory passed to the constructor.
bool omitTermFreqAndPositions
Definition: SegmentMerger.h:51
boost::shared_ptr< TermVectorsWriter > TermVectorsWriterPtr
Definition: LuceneTypes.h:263
void mergeVectors()
Merge the TermVectors from each of the segments into the new one.
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< FormatPostingsFieldsConsumer > FormatPostingsFieldsConsumerPtr
Definition: LuceneTypes.h:141
SegmentMergeQueuePtr queue
Definition: SegmentMerger.h:50
Base class for all Lucene classes.
Definition: LuceneObject.h:31
boost::shared_ptr< CheckAbort > CheckAbortPtr
Definition: LuceneTypes.h:94
int32_t termIndexInterval
Definition: SegmentMerger.h:31
Definition: SegmentMerger.h:141
int32_t copyFieldsNoDeletions(const FieldsWriterPtr &fieldsWriter, const IndexReaderPtr &reader, const FieldsReaderPtr &matchingFieldsReader)
void setMatchingSegmentReaders()
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Collection< int32_t > rawDocLengths2
Definition: SegmentMerger.h:48
Definition: AbstractAllTermDocs.h:12
HashSet< String > getMergedFiles()
Collection< int32_t > delCounts
Definition: SegmentMerger.h:55
void closeReaders()
close all IndexReaders that have been added. Should not be called before merge(). ...
void addIndexed(const IndexReaderPtr &reader, const FieldInfosPtr &fInfos, HashSet< String > names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions)
The SegmentMerger class combines two or more Segments, represented by an IndexReader (add...
Definition: SegmentMerger.h:20
DirectoryWeakPtr _dir
Definition: SegmentMerger.h:132
bool mergeDocStores
Whether we should merge doc stores (stored fields and vectors files). When all segments we are mergin...
Definition: SegmentMerger.h:41
DirectoryPtr directory
Definition: SegmentMerger.h:26
boost::shared_ptr< TermVectorsReader > TermVectorsReaderPtr
Definition: LuceneTypes.h:256
Collection< Collection< int32_t > > getDocMaps()
Collection< Collection< int32_t > > docMaps
Definition: SegmentMerger.h:54
HashSet< String > createCompoundFile(const String &fileName)
boost::shared_ptr< SegmentMergeQueue > SegmentMergeQueuePtr
Definition: LuceneTypes.h:213
static const uint8_t NORMS_HEADER[]
norms header placeholder
Definition: SegmentMerger.h:59
boost::shared_ptr< FormatPostingsTermsConsumer > FormatPostingsTermsConsumerPtr
Definition: LuceneTypes.h:145
IndexReaderPtr segmentReader(int32_t i)
Collection< int32_t > rawDocLengths
Definition: SegmentMerger.h:47
boost::shared_ptr< FieldsReader > FieldsReaderPtr
Definition: LuceneTypes.h:131
Collection< int32_t > getDelCounts()
void add(const IndexReaderPtr &reader)
Add an IndexReader to the collection of readers that are to be merged.
int32_t copyFieldsWithDeletions(const FieldsWriterPtr &fieldsWriter, const IndexReaderPtr &reader, const FieldsReaderPtr &matchingFieldsReader)
Collection< IndexReaderPtr > readers
Definition: SegmentMerger.h:33
FieldInfosPtr fieldInfos
Definition: SegmentMerger.h:34
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition: LuceneTypes.h:127
double workCount
Definition: SegmentMerger.h:127

clucene.sourceforge.net