Adding upstream version 2.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c71cb8b61d
commit
982828099e
783 changed files with 150650 additions and 0 deletions
367
index/scorch/README.md
Normal file
367
index/scorch/README.md
Normal file
|
@ -0,0 +1,367 @@
|
|||
# scorch
|
||||
|
||||
## Definitions
|
||||
|
||||
Batch
|
||||
- A collection of Documents to mutate in the index.
|
||||
|
||||
Document
|
||||
- Has a unique identifier (arbitrary bytes).
|
||||
- Is comprised of a list of fields.
|
||||
|
||||
Field
|
||||
- Has a name (string).
|
||||
- Has a type (text, number, date, geopoint).
|
||||
- Has a value (depending on type).
|
||||
- Can be indexed, stored, or both.
|
||||
- If indexed, can be analyzed.
|
||||
-m If indexed, can optionally store term vectors.
|
||||
|
||||
## Scope
|
||||
|
||||
Scorch *MUST* implement the bleve.index API without requiring any changes to this API.
|
||||
|
||||
Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API.
|
||||
|
||||
## Implementation
|
||||
|
||||
The scorch implementation starts with the concept of a segmented index.
|
||||
|
||||
A segment is simply a slice, subset, or portion of the entire index. A segmented index is one which is composed of one or more segments. Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying. Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently.
|
||||
|
||||
### Internal Wrapper
|
||||
|
||||
In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described.
|
||||
|
||||
#### \_id field
|
||||
|
||||
In scorch, field 0 is prearranged to be named \_id. All documents have a value for this field, which is the documents external identifier. In this version the field *MUST* be both indexed AND stored. The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code.
|
||||
|
||||
NOTE: If a document already contains a field \_id, it will be replaced. If this is problematic, the caller must ensure such a scenario does not happen.
|
||||
|
||||
### Proposed Structures
|
||||
|
||||
```
|
||||
type Segment interface {
|
||||
|
||||
Dictionary(field string) TermDictionary
|
||||
|
||||
}
|
||||
|
||||
type TermDictionary interface {
|
||||
|
||||
PostingsList(term string, excluding PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type PostingsList interface {
|
||||
|
||||
Next() Posting
|
||||
|
||||
And(other PostingsList) PostingsList
|
||||
Or(other PostingsList) PostingsList
|
||||
|
||||
}
|
||||
|
||||
type Posting interface {
|
||||
Number() uint64
|
||||
|
||||
Frequency() uint64
|
||||
Norm() float64
|
||||
|
||||
Locations() Locations
|
||||
}
|
||||
|
||||
type Locations interface {
|
||||
Start() uint64
|
||||
End() uint64
|
||||
Pos() uint64
|
||||
ArrayPositions() ...
|
||||
}
|
||||
|
||||
type DeletedDocs {
|
||||
|
||||
}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
segment Segment
|
||||
deleted PostingsList
|
||||
}
|
||||
|
||||
type IndexSnapshot struct {
|
||||
segment []SegmentSnapshot
|
||||
}
|
||||
```
|
||||
**What about errors?**
|
||||
**What about memory mgmnt or context?**
|
||||
**Postings List separate iterator to separate stateful from stateless**
|
||||
### Mutating the Index
|
||||
|
||||
The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches.
|
||||
|
||||
NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS.
|
||||
|
||||
From this point forward, only Batch mutations will be discussed.
|
||||
|
||||
Sequence of Operations:
|
||||
|
||||
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
|
||||
|
||||
```
|
||||
foreach segment {
|
||||
dict := segment.Dictionary("\_id")
|
||||
postings := empty postings list
|
||||
foreach docID {
|
||||
postings = postings.Or(dict.PostingsList(docID, nil))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset.
|
||||
|
||||
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
|
||||
|
||||
3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
|
||||
|
||||
4. We now have everything we need to update the state of the system to include this new snapshot.
|
||||
|
||||
- Acquire a lock
|
||||
- Create a new IndexSnapshot
|
||||
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
|
||||
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
|
||||
- Append the new SegmentSnapshot to the IndexSnapshot
|
||||
- Release the lock
|
||||
|
||||
An ASCII art example:
|
||||
```
|
||||
0 - Empty Index
|
||||
|
||||
No segments
|
||||
|
||||
IndexSnapshot
|
||||
segments []
|
||||
deleted []
|
||||
|
||||
|
||||
1 - Index Batch [ A B C ]
|
||||
|
||||
segment 0
|
||||
numbers [ 1 2 3 ]
|
||||
\_id [ A B C ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 ]
|
||||
deleted [ nil ]
|
||||
|
||||
|
||||
2 - Index Batch [ B' ]
|
||||
|
||||
segment 0 1
|
||||
numbers [ 1 2 3 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-1:
|
||||
[ 0 1 0 ]
|
||||
|
||||
OR it with previous (nil) (call it 0-1)
|
||||
[ 0 1 0 ]
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 ]
|
||||
deleted [ 0-1 nil ]
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
```
|
||||
|
||||
**is there opportunity to stop early when doc is found in one segment**
|
||||
**also, more efficient way to find bits for long lists of ids?**
|
||||
|
||||
### Searching
|
||||
|
||||
In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time.
|
||||
|
||||
As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot. When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like. The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments. The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable. This gives a stable view of the data.
|
||||
|
||||
#### Term Search
|
||||
|
||||
Term search is the only searching primitive exposed in today's bleve.index API. This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working.
|
||||
|
||||
A term search for term T in field F will look something like this:
|
||||
|
||||
```
|
||||
searchResultPostings = empty
|
||||
foreach segment {
|
||||
dict := segment.Dictionary(F)
|
||||
segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted)
|
||||
// make segmentLocal numbers into global numbers, and flip bits in searchResultPostings
|
||||
}
|
||||
```
|
||||
|
||||
The searchResultPostings will be a new implementation of the TermFieldReader interface.
|
||||
|
||||
As a reminder this interface is:
|
||||
|
||||
```
|
||||
// TermFieldReader is the interface exposing the enumeration of documents
|
||||
// containing a given term in a given field. Documents are returned in byte
|
||||
// lexicographic order over their identifiers.
|
||||
type TermFieldReader interface {
|
||||
// Next returns the next document containing the term in this field, or nil
|
||||
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
|
||||
// is optional, and when non-nil, will be used instead of allocating memory.
|
||||
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Advance resets the enumeration at specified document or its immediate
|
||||
// follower.
|
||||
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
|
||||
|
||||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64
|
||||
Close() error
|
||||
}
|
||||
```
|
||||
|
||||
At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces:
|
||||
|
||||
- The weakest interface, only supports Next() no ordering at all.
|
||||
- Ordered, supporting Advance()
|
||||
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
|
||||
|
||||
But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
|
||||
|
||||
NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order. Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field. We probably should introduce at least a marker interface to detect this.
|
||||
|
||||
An ASCII art example:
|
||||
|
||||
```
|
||||
Let's start with the IndexSnapshot we ended with earlier:
|
||||
|
||||
3 - Index Batch [ C' ]
|
||||
|
||||
segment 0 1 2
|
||||
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
|
||||
\_id [ A B C ] [ B ] [ C ]
|
||||
|
||||
Compute bitset segment-0-deleted-by-2:
|
||||
[ 0 0 1 ]
|
||||
|
||||
OR it with previous ([ 0 1 0 ]) (call it 0-12)
|
||||
[ 0 1 1 ]
|
||||
|
||||
Compute bitset segment-1-deleted-by-2:
|
||||
[ 0 0 0 ]
|
||||
|
||||
OR it with previous (nil)
|
||||
still just nil
|
||||
|
||||
|
||||
IndexSnapshot
|
||||
segments [ 0 1 2 ]
|
||||
deleted [ 0-12 nil nil ]
|
||||
|
||||
Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it.
|
||||
|
||||
Concurrently:
|
||||
|
||||
- Segment 0
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' EXCLUDING 0-12
|
||||
- raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ]
|
||||
- Segment 1
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 0 ]
|
||||
- Segment 2
|
||||
- Get Term Dictionary For Field 'desc'
|
||||
- From it get Postings List for term 'cat' excluding nil
|
||||
- [ 1 ]
|
||||
|
||||
Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot)
|
||||
|
||||
IndexSnapshot already should have mapping something like:
|
||||
0 - Offset 0
|
||||
1 - Offset 3 (because segment 0 had 3 docs)
|
||||
2 - Offset 4 (because segment 1 had 1 doc)
|
||||
|
||||
This maps to search result bitset:
|
||||
|
||||
[ 0 0 0 0 1]
|
||||
|
||||
Caller would call Next() and get doc number 5 (assuming 1 based indexing for now)
|
||||
|
||||
Caller could then ask to get term locations, stored fields, external doc ID for document number 5. Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2. That happens to be C...
|
||||
|
||||
```
|
||||
|
||||
#### Future improvements
|
||||
|
||||
In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers. Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance.
|
||||
|
||||
Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage.
|
||||
|
||||
|
||||
### Memory Tracking
|
||||
|
||||
All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
|
||||
|
||||
This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded.
|
||||
|
||||
### Persistence
|
||||
|
||||
Obviously, we want to support (but maybe not require) asynchronous persistence of segments. My expectation is that segments are initially built in memory. At some point they are persisted to disk. This poses some interesting challenges.
|
||||
|
||||
At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents. These bitmasks indirectly encode an ordering in which the segments were added. The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments. In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in. One idea is that we could take a similar approach on disk. When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask.
|
||||
|
||||
This also relates to the topic rollback, addressed next...
|
||||
|
||||
|
||||
### Rollback
|
||||
|
||||
One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
|
||||
|
||||
Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
|
||||
|
||||
|
||||
### Internal Storage
|
||||
|
||||
The bleve.index API has support for "internal storage". The ability to store information under a separate name space.
|
||||
|
||||
This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index. But, the reality is that this storage is used to maintain key information related to the rollback scenario. Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index. Based on this, I believe its important to put the internal key/value pairs inside the segments themselves. But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments. But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask. Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc.
|
||||
|
||||
More thought is required here.
|
||||
|
||||
### Merging
|
||||
|
||||
The segmented index approach requires merging to prevent the number of segments from growing too large.
|
||||
|
||||
Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system. In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless.
|
||||
|
||||
A few simple principles have been identified.
|
||||
|
||||
- Roughly we merge multiple smaller segments into a single larger one.
|
||||
- The larger a segment gets the less likely we should be to ever merge it.
|
||||
- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings.
|
||||
- Segments with all items deleted/obsoleted can be dropped.
|
||||
|
||||
Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
|
332
index/scorch/builder.go
Normal file
332
index/scorch/builder.go
Normal file
|
@ -0,0 +1,332 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const DefaultBuilderBatchSize = 1000
|
||||
const DefaultBuilderMergeMax = 10
|
||||
|
||||
type Builder struct {
|
||||
m sync.Mutex
|
||||
segCount uint64
|
||||
path string
|
||||
buildPath string
|
||||
segPaths []string
|
||||
batchSize int
|
||||
mergeMax int
|
||||
batch *index.Batch
|
||||
internal map[string][]byte
|
||||
segPlugin SegmentPlugin
|
||||
}
|
||||
|
||||
func NewBuilder(config map[string]interface{}) (*Builder, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
|
||||
buildPathPrefix, _ := config["buildPathPrefix"].(string)
|
||||
buildPath, err := os.MkdirTemp(buildPathPrefix, "scorch-offline-build")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := &Builder{
|
||||
path: path,
|
||||
buildPath: buildPath,
|
||||
mergeMax: DefaultBuilderMergeMax,
|
||||
batchSize: DefaultBuilderBatchSize,
|
||||
batch: index.NewBatch(),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
}
|
||||
|
||||
err = rv.parseConfig(config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing builder config: %v", err)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
|
||||
if v, ok := config["mergeMax"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("mergeMax parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.mergeMax = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["batchSize"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("batchSize parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.batchSize = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["internal"]; ok {
|
||||
if vinternal, ok := v.(map[string][]byte); ok {
|
||||
o.internal = vinternal
|
||||
}
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
|
||||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.segPlugin = segPlugin
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Index will place the document into the index.
|
||||
// It is invalid to index the same document multiple times.
|
||||
func (o *Builder) Index(doc index.Document) error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
o.batch.Update(doc)
|
||||
|
||||
return o.maybeFlushBatchLOCKED(o.batchSize)
|
||||
}
|
||||
|
||||
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
|
||||
if len(o.batch.IndexOps) >= moreThan {
|
||||
defer o.batch.Reset()
|
||||
return o.executeBatchLOCKED(o.batch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
|
||||
analysisResults := make([]index.Document, 0, len(batch.IndexOps))
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddIDField()
|
||||
// perform analysis directly
|
||||
analyze(doc, nil)
|
||||
analysisResults = append(analysisResults, doc)
|
||||
}
|
||||
}
|
||||
|
||||
seg, _, err := o.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error building segment base: %v", err)
|
||||
}
|
||||
|
||||
filename := zapFileName(o.segCount)
|
||||
o.segCount++
|
||||
path := o.buildPath + string(os.PathSeparator) + filename
|
||||
|
||||
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
|
||||
err = segUnpersisted.Persist(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
|
||||
}
|
||||
|
||||
o.segPaths = append(o.segPaths, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
|
||||
}
|
||||
|
||||
func (o *Builder) doMerge() error {
|
||||
// as long as we have more than 1 segment, keep merging
|
||||
for len(o.segPaths) > 1 {
|
||||
|
||||
// merge the next <mergeMax> number of segments into one new one
|
||||
// or, if there are fewer than <mergeMax> remaining, merge them all
|
||||
mergeCount := o.mergeMax
|
||||
if mergeCount > len(o.segPaths) {
|
||||
mergeCount = len(o.segPaths)
|
||||
}
|
||||
|
||||
mergePaths := o.segPaths[0:mergeCount]
|
||||
o.segPaths = o.segPaths[mergeCount:]
|
||||
|
||||
// open each of the segments to be merged
|
||||
mergeSegs := make([]segment.Segment, 0, mergeCount)
|
||||
|
||||
// closeOpenedSegs attempts to close all opened
|
||||
// segments even if an error occurs, in which case
|
||||
// the first error is returned
|
||||
closeOpenedSegs := func() error {
|
||||
var err error
|
||||
for _, seg := range mergeSegs {
|
||||
clErr := seg.Close()
|
||||
if clErr != nil && err == nil {
|
||||
err = clErr
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
for _, mergePath := range mergePaths {
|
||||
seg, err := o.segPlugin.Open(mergePath)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
|
||||
}
|
||||
mergeSegs = append(mergeSegs, seg)
|
||||
}
|
||||
|
||||
// do the merge
|
||||
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
|
||||
drops := make([]*roaring.Bitmap, mergeCount)
|
||||
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
|
||||
}
|
||||
o.segCount++
|
||||
o.segPaths = append(o.segPaths, mergedSegPath)
|
||||
|
||||
// close segments opened for merge
|
||||
err = closeOpenedSegs()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing opened segments: %v", err)
|
||||
}
|
||||
|
||||
// remove merged segments
|
||||
for _, mergePath := range mergePaths {
|
||||
err = os.RemoveAll(mergePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) Close() error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
// see if there is a partial batch
|
||||
err := o.maybeFlushBatchLOCKED(1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error flushing batch before close: %v", err)
|
||||
}
|
||||
|
||||
// perform all the merging
|
||||
err = o.doMerge()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while merging: %v", err)
|
||||
}
|
||||
|
||||
// ensure the store path exists
|
||||
err = os.MkdirAll(o.path, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move final segment into place
|
||||
// segment id 2 is chosen to match the behavior of a scorch
|
||||
// index which indexes a single batch of data
|
||||
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
|
||||
err = os.Rename(o.segPaths[0], finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error moving final segment into place: %v", err)
|
||||
}
|
||||
|
||||
// remove the buildPath, as it is no longer needed
|
||||
err = os.RemoveAll(o.buildPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing build path: %v", err)
|
||||
}
|
||||
|
||||
// prepare wrapping
|
||||
seg, err := o.segPlugin.Open(finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening final segment")
|
||||
}
|
||||
|
||||
// create a segment snapshot for this segment
|
||||
ss := &SegmentSnapshot{
|
||||
segment: seg,
|
||||
}
|
||||
is := &IndexSnapshot{
|
||||
epoch: 3, // chosen to match scorch behavior when indexing a single batch
|
||||
segment: []*SegmentSnapshot{ss},
|
||||
creator: "scorch-builder",
|
||||
internal: o.internal,
|
||||
}
|
||||
|
||||
// create the root bolt
|
||||
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// fill the root bolt with this fake index snapshot
|
||||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil, nil)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// commit bolt data
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close bolt
|
||||
err = rootBolt.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close final segment
|
||||
err = seg.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing final segment: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
159
index/scorch/builder_test.go
Normal file
159
index/scorch/builder_test.go
Normal file
|
@ -0,0 +1,159 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestBuilder(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "scorch-builder-test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err = os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatalf("error cleaning up test index: %v", err)
|
||||
}
|
||||
}()
|
||||
options := map[string]interface{}{
|
||||
"path": tmpDir,
|
||||
"batchSize": 2,
|
||||
"mergeMax": 2,
|
||||
}
|
||||
b, err := NewBuilder(options)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
doc := document.NewDocument(fmt.Sprintf("%d", i))
|
||||
doc.AddField(document.NewTextField("name", nil, []byte("hello")))
|
||||
err = b.Index(doc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err = b.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
checkIndex(t, tmpDir, []byte("hello"), "name", 10)
|
||||
}
|
||||
|
||||
func checkIndex(t *testing.T, path string, term []byte, field string, expectCount int) {
|
||||
cfg := make(map[string]interface{})
|
||||
cfg["path"] = path
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("error closing index: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
r, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Fatalf("error accessing index reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err = r.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("error closing reader: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// check the count, expect 10 docs
|
||||
count, err := r.DocCount()
|
||||
if err != nil {
|
||||
t.Errorf("error accessing index doc count: %v", err)
|
||||
} else if count != uint64(expectCount) {
|
||||
t.Errorf("expected %d docs, got %d", expectCount, count)
|
||||
}
|
||||
|
||||
// run a search for hello
|
||||
tfr, err := r.TermFieldReader(context.TODO(), term, field, false, false, false)
|
||||
if err != nil {
|
||||
t.Errorf("error accessing term field reader: %v", err)
|
||||
} else {
|
||||
var rows int
|
||||
tfd, err := tfr.Next(nil)
|
||||
for err == nil && tfd != nil {
|
||||
rows++
|
||||
tfd, err = tfr.Next(nil)
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("error calling next on term field reader: %v", err)
|
||||
}
|
||||
if rows != expectCount {
|
||||
t.Errorf("expected %d rows for term hello, field name, got %d", expectCount, rows)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderFlushFinalBatch(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "scorch-builder-test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err = os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatalf("error cleaning up test index: %v", err)
|
||||
}
|
||||
}()
|
||||
options := map[string]interface{}{
|
||||
"path": tmpDir,
|
||||
"batchSize": 2,
|
||||
"mergeMax": 2,
|
||||
}
|
||||
b, err := NewBuilder(options)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < 9; i++ {
|
||||
doc := document.NewDocument(fmt.Sprintf("%d", i))
|
||||
doc.AddField(document.NewTextField("name", nil, []byte("hello")))
|
||||
err = b.Index(doc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err = b.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
checkIndex(t, tmpDir, []byte("hello"), "name", 9)
|
||||
}
|
41
index/scorch/empty.go
Normal file
41
index/scorch/empty.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
|
||||
type emptyPostingsIterator struct{}
|
||||
|
||||
func (e *emptyPostingsIterator) Next() (segment.Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) Advance(uint64) (segment.Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) Size() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (e *emptyPostingsIterator) ResetBytesRead(uint64) {}
|
||||
|
||||
func (e *emptyPostingsIterator) BytesWritten() uint64 { return 0 }
|
||||
|
||||
var anEmptyPostingsIterator = &emptyPostingsIterator{}
|
77
index/scorch/event.go
Normal file
77
index/scorch/event.go
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import "time"
|
||||
|
||||
// RegistryAsyncErrorCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryAsyncErrorCallbacks = map[string]func(error, string){}
|
||||
|
||||
// RegistryEventCallbacks should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
// In the event of not having a callback, these return true.
|
||||
var RegistryEventCallbacks = map[string]func(Event) bool{}
|
||||
|
||||
// Event represents the information provided in an OnEvent() callback.
|
||||
type Event struct {
|
||||
Kind EventKind
|
||||
Scorch *Scorch
|
||||
Duration time.Duration
|
||||
}
|
||||
|
||||
// EventKind represents an event code for OnEvent() callbacks.
|
||||
type EventKind int
|
||||
|
||||
// EventKindCloseStart is fired when a Scorch.Close() has begun.
|
||||
var EventKindCloseStart = EventKind(1)
|
||||
|
||||
// EventKindClose is fired when a scorch index has been fully closed.
|
||||
var EventKindClose = EventKind(2)
|
||||
|
||||
// EventKindMergerProgress is fired when the merger has completed a
|
||||
// round of merge processing.
|
||||
var EventKindMergerProgress = EventKind(3)
|
||||
|
||||
// EventKindPersisterProgress is fired when the persister has completed
|
||||
// a round of persistence processing.
|
||||
var EventKindPersisterProgress = EventKind(4)
|
||||
|
||||
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
|
||||
// introduces a new segment.
|
||||
var EventKindBatchIntroductionStart = EventKind(5)
|
||||
|
||||
// EventKindBatchIntroduction is fired when Batch() completes.
|
||||
var EventKindBatchIntroduction = EventKind(6)
|
||||
|
||||
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
|
||||
// start the introduction of merged segment from a single merge task.
|
||||
var EventKindMergeTaskIntroductionStart = EventKind(7)
|
||||
|
||||
// EventKindMergeTaskIntroduction is fired when the merger has completed
|
||||
// the introduction of merged segment from a single merge task.
|
||||
var EventKindMergeTaskIntroduction = EventKind(8)
|
||||
|
||||
// EventKindPreMergeCheck is fired before the merge begins to check if
|
||||
// the caller should proceed with the merge.
|
||||
var EventKindPreMergeCheck = EventKind(9)
|
||||
|
||||
// EventKindIndexStart is fired when Index() is invoked which
|
||||
// creates a new Document object from an interface using the index mapping.
|
||||
var EventKindIndexStart = EventKind(10)
|
||||
|
||||
// EventKindPurgerCheck is fired before the purge code is invoked and decides
|
||||
// whether to execute or not. For unit test purposes
|
||||
var EventKindPurgerCheck = EventKind(11)
|
79
index/scorch/event_test.go
Normal file
79
index/scorch/event_test.go
Normal file
|
@ -0,0 +1,79 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestEventBatchIntroductionStart(t *testing.T) {
|
||||
testConfig := CreateConfig("TestEventBatchIntroductionStart")
|
||||
err := InitTest(testConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(testConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var count int
|
||||
RegistryEventCallbacks["test"] = func(e Event) bool {
|
||||
if e.Kind == EventKindBatchIntroductionStart {
|
||||
count++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
ourConfig := make(map[string]interface{}, len(testConfig))
|
||||
for k, v := range testConfig {
|
||||
ourConfig[k] = v
|
||||
}
|
||||
ourConfig["eventCallbackName"] = "test"
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, ourConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
if count != 1 {
|
||||
t.Fatalf("expected to see 1 batch introduction event event, saw %d", count)
|
||||
}
|
||||
}
|
186
index/scorch/field_dict_test.go
Normal file
186
index/scorch/field_dict_test.go
Normal file
|
@ -0,0 +1,186 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestIndexFieldDict(t *testing.T) {
|
||||
cfg := CreateConfig("TestIndexFieldDict")
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
cerr := idx.Close()
|
||||
if cerr != nil {
|
||||
t.Fatal(cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
dict, err := indexReader.FieldDict("name")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount := 0
|
||||
curr, err := dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
if curr.Term != "test" {
|
||||
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
|
||||
}
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
|
||||
dict2, err := indexReader.FieldDict("desc")
|
||||
if err != nil {
|
||||
t.Fatalf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict2.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms := make([]string, 0)
|
||||
curr, err = dict2.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict2.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms := []string{"eat", "more", "rice"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
// test start and end range
|
||||
dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict3.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict3.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict3.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"more"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict4.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict4.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict4.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"cat", "cats", "catting"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
}
|
92
index/scorch/int.go
Normal file
92
index/scorch/int.go
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2014 The Cockroach Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
|
||||
// This code originated from:
|
||||
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go
|
||||
|
||||
// Modified to not use pkg/errors
|
||||
|
||||
package scorch
|
||||
|
||||
import "fmt"
|
||||
|
||||
const (
|
||||
// intMin is chosen such that the range of int tags does not overlap the
|
||||
// ascii character set that is frequently used in testing.
|
||||
intMin = 0x80 // 128
|
||||
intMaxWidth = 8
|
||||
intZero = intMin + intMaxWidth // 136
|
||||
intSmall = intMax - intZero - intMaxWidth // 109
|
||||
// intMax is the maximum int tag value.
|
||||
intMax = 0xfd // 253
|
||||
)
|
||||
|
||||
// encodeUvarintAscending encodes the uint64 value using a variable length
|
||||
// (length-prefixed) representation. The length is encoded as a single
|
||||
// byte indicating the number of encoded bytes (-8) to follow. See
|
||||
// EncodeVarintAscending for rationale. The encoded bytes are appended to the
|
||||
// supplied buffer and the final buffer is returned.
|
||||
func encodeUvarintAscending(b []byte, v uint64) []byte {
|
||||
switch {
|
||||
case v <= intSmall:
|
||||
return append(b, intZero+byte(v))
|
||||
case v <= 0xff:
|
||||
return append(b, intMax-7, byte(v))
|
||||
case v <= 0xffff:
|
||||
return append(b, intMax-6, byte(v>>8), byte(v))
|
||||
case v <= 0xffffff:
|
||||
return append(b, intMax-5, byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffff:
|
||||
return append(b, intMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffff:
|
||||
return append(b, intMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
|
||||
byte(v))
|
||||
case v <= 0xffffffffffff:
|
||||
return append(b, intMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
|
||||
byte(v>>8), byte(v))
|
||||
case v <= 0xffffffffffffff:
|
||||
return append(b, intMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
|
||||
byte(v>>16), byte(v>>8), byte(v))
|
||||
default:
|
||||
return append(b, intMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32),
|
||||
byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
}
|
||||
|
||||
// decodeUvarintAscending decodes a varint encoded uint64 from the input
|
||||
// buffer. The remainder of the input buffer and the decoded uint64
|
||||
// are returned.
|
||||
func decodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
||||
if len(b) == 0 {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value")
|
||||
}
|
||||
length := int(b[0]) - intZero
|
||||
b = b[1:] // skip length byte
|
||||
if length <= intSmall {
|
||||
return b, uint64(length), nil
|
||||
}
|
||||
length -= intSmall
|
||||
if length < 0 || length > 8 {
|
||||
return nil, 0, fmt.Errorf("invalid uvarint length of %d", length)
|
||||
} else if len(b) < length {
|
||||
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b)
|
||||
}
|
||||
var v uint64
|
||||
// It is faster to range over the elements in a slice than to index
|
||||
// into the slice on each loop iteration.
|
||||
for _, t := range b[:length] {
|
||||
v = (v << 8) | uint64(t)
|
||||
}
|
||||
return b[length:], v, nil
|
||||
}
|
96
index/scorch/int_test.go
Normal file
96
index/scorch/int_test.go
Normal file
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2014 The Cockroach Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
|
||||
// This code originated from:
|
||||
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding_test.go
|
||||
|
||||
// Modified to only test the parts we borrowed
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type testCaseUint64 struct {
|
||||
value uint64
|
||||
expEnc []byte
|
||||
}
|
||||
|
||||
func TestEncodeDecodeUvarint(t *testing.T) {
|
||||
testBasicEncodeDecodeUint64(encodeUvarintAscending, decodeUvarintAscending, false, t)
|
||||
testCases := []testCaseUint64{
|
||||
{0, []byte{0x88}},
|
||||
{1, []byte{0x89}},
|
||||
{109, []byte{0xf5}},
|
||||
{110, []byte{0xf6, 0x6e}},
|
||||
{1 << 8, []byte{0xf7, 0x01, 0x00}},
|
||||
{math.MaxUint64, []byte{0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
|
||||
}
|
||||
testCustomEncodeUint64(testCases, encodeUvarintAscending, t)
|
||||
}
|
||||
|
||||
func testBasicEncodeDecodeUint64(
|
||||
encFunc func([]byte, uint64) []byte,
|
||||
decFunc func([]byte) ([]byte, uint64, error),
|
||||
descending bool, t *testing.T,
|
||||
) {
|
||||
testCases := []uint64{
|
||||
0, 1,
|
||||
1<<8 - 1, 1 << 8,
|
||||
1<<16 - 1, 1 << 16,
|
||||
1<<24 - 1, 1 << 24,
|
||||
1<<32 - 1, 1 << 32,
|
||||
1<<40 - 1, 1 << 40,
|
||||
1<<48 - 1, 1 << 48,
|
||||
1<<56 - 1, 1 << 56,
|
||||
math.MaxUint64 - 1, math.MaxUint64,
|
||||
}
|
||||
|
||||
var lastEnc []byte
|
||||
for i, v := range testCases {
|
||||
enc := encFunc(nil, v)
|
||||
if i > 0 {
|
||||
if (descending && bytes.Compare(enc, lastEnc) >= 0) ||
|
||||
(!descending && bytes.Compare(enc, lastEnc) < 0) {
|
||||
t.Errorf("ordered constraint violated for %d: [% x] vs. [% x]", v, enc, lastEnc)
|
||||
}
|
||||
}
|
||||
b, decode, err := decFunc(enc)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
continue
|
||||
}
|
||||
if len(b) != 0 {
|
||||
t.Errorf("leftover bytes: [% x]", b)
|
||||
}
|
||||
if decode != v {
|
||||
t.Errorf("decode yielded different value than input: %d vs. %d", decode, v)
|
||||
}
|
||||
lastEnc = enc
|
||||
}
|
||||
}
|
||||
|
||||
func testCustomEncodeUint64(
|
||||
testCases []testCaseUint64, encFunc func([]byte, uint64) []byte, t *testing.T,
|
||||
) {
|
||||
for _, test := range testCases {
|
||||
enc := encFunc(nil, test.value)
|
||||
if !bytes.Equal(enc, test.expEnc) {
|
||||
t.Errorf("expected [% x]; got [% x] (value: %d)", test.expEnc, enc, test.value)
|
||||
}
|
||||
}
|
||||
}
|
515
index/scorch/introducer.go
Normal file
515
index/scorch/introducer.go
Normal file
|
@ -0,0 +1,515 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type segmentIntroduction struct {
|
||||
id uint64
|
||||
data segment.Segment
|
||||
obsoletes map[uint64]*roaring.Bitmap
|
||||
ids []string
|
||||
internal map[string][]byte
|
||||
stats *fieldStats
|
||||
|
||||
applied chan error
|
||||
persisted chan error
|
||||
persistedCallback index.BatchCallback
|
||||
}
|
||||
|
||||
type persistIntroduction struct {
|
||||
persisted map[uint64]segment.Segment
|
||||
applied notificationChan
|
||||
}
|
||||
|
||||
type epochWatcher struct {
|
||||
epoch uint64
|
||||
notifyCh notificationChan
|
||||
}
|
||||
|
||||
func (s *Scorch) introducerLoop() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
s.fireAsyncError(&AsyncPanicError{
|
||||
Source: "introducer",
|
||||
Path: s.path,
|
||||
})
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}()
|
||||
|
||||
var epochWatchers []*epochWatcher
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
case epochWatcher := <-s.introducerNotifier:
|
||||
epochWatchers = append(epochWatchers, epochWatcher)
|
||||
|
||||
case nextMerge := <-s.merges:
|
||||
s.introduceMerge(nextMerge)
|
||||
|
||||
case next := <-s.introductions:
|
||||
err := s.introduceSegment(next)
|
||||
if err != nil {
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
case persist := <-s.persists:
|
||||
s.introducePersist(persist)
|
||||
|
||||
}
|
||||
|
||||
var epochCurr uint64
|
||||
s.rootLock.RLock()
|
||||
if s.root != nil {
|
||||
epochCurr = s.root.epoch
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
var epochWatchersNext []*epochWatcher
|
||||
for _, w := range epochWatchers {
|
||||
if w.epoch < epochCurr {
|
||||
close(w.notifyCh)
|
||||
} else {
|
||||
epochWatchersNext = append(epochWatchersNext, w)
|
||||
}
|
||||
}
|
||||
epochWatchers = epochWatchersNext
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
nsegs := len(root.segment)
|
||||
|
||||
// prepare new index snapshot
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
segment: make([]*SegmentSnapshot, 0, nsegs+1),
|
||||
offsets: make([]uint64, 0, nsegs+1),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
var running uint64
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
var droppedSegmentFiles []string
|
||||
for i := range root.segment {
|
||||
// see if optimistic work included this segment
|
||||
delta, ok := next.obsoletes[root.segment[i].id]
|
||||
if !ok {
|
||||
var err error
|
||||
delta, err = root.segment[i].segment.DocNumbers(next.ids)
|
||||
if err != nil {
|
||||
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
|
||||
close(next.applied)
|
||||
_ = newSnapshot.DecRef()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
newss := &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
stats: root.segment[i].stats,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
}
|
||||
|
||||
// apply new obsoletions
|
||||
if root.segment[i].deleted == nil {
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
if delta.IsEmpty() {
|
||||
newss.deleted = root.segment[i].deleted
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
}
|
||||
if newss.deleted.IsEmpty() {
|
||||
newss.deleted = nil
|
||||
}
|
||||
|
||||
// check for live size before copying
|
||||
if newss.LiveSize() > 0 {
|
||||
newSnapshot.segment = append(newSnapshot.segment, newss)
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += newss.segment.Count()
|
||||
} else if seg, ok := newss.segment.(segment.PersistedSegment); ok {
|
||||
droppedSegmentFiles = append(droppedSegmentFiles,
|
||||
filepath.Base(seg.Path()))
|
||||
}
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
// append new segment, if any, to end of the new index snapshot
|
||||
if next.data != nil {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: next.id,
|
||||
segment: next.data, // take ownership of next.data's ref-count
|
||||
stats: next.stats,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
|
||||
// increment numItemsIntroduced which tracks the number of items
|
||||
// queued for persistence.
|
||||
atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
|
||||
}
|
||||
// copy old values
|
||||
for key, oldVal := range root.internal {
|
||||
newSnapshot.internal[key] = oldVal
|
||||
}
|
||||
// set new values and apply deletes
|
||||
for key, newVal := range next.internal {
|
||||
if newVal != nil {
|
||||
newSnapshot.internal[key] = newVal
|
||||
} else {
|
||||
delete(newSnapshot.internal, key)
|
||||
}
|
||||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
if next.persisted != nil {
|
||||
s.rootPersisted = append(s.rootPersisted, next.persisted)
|
||||
}
|
||||
if next.persistedCallback != nil {
|
||||
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
|
||||
}
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
// update the removal eligibility for those segment files
|
||||
// that are not a part of the latest root.
|
||||
for _, filename := range droppedSegmentFiles {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
}
|
||||
|
||||
close(next.applied)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
||||
atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
|
||||
|
||||
s.rootLock.Lock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
nextSnapshotEpoch := s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newIndexSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
epoch: nextSnapshotEpoch,
|
||||
segment: make([]*SegmentSnapshot, len(root.segment)),
|
||||
offsets: make([]uint64, len(root.offsets)),
|
||||
internal: make(map[string][]byte, len(root.internal)),
|
||||
refs: 1,
|
||||
creator: "introducePersist",
|
||||
}
|
||||
|
||||
var docsToPersistCount, memSegments, fileSegments uint64
|
||||
for i, segmentSnapshot := range root.segment {
|
||||
// see if this segment has been replaced
|
||||
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: segmentSnapshot.id,
|
||||
segment: replacement,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
stats: segmentSnapshot.stats,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
cachedMeta: segmentSnapshot.cachedMeta,
|
||||
creator: "introducePersist",
|
||||
mmaped: 1,
|
||||
}
|
||||
newIndexSnapshot.segment[i] = newSegmentSnapshot
|
||||
delete(persist.persisted, segmentSnapshot.id)
|
||||
|
||||
// update items persisted incase of a new segment snapshot
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
|
||||
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
|
||||
fileSegments++
|
||||
} else {
|
||||
newIndexSnapshot.segment[i] = root.segment[i]
|
||||
newIndexSnapshot.segment[i].segment.AddRef()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
}
|
||||
newIndexSnapshot.offsets[i] = root.offsets[i]
|
||||
}
|
||||
|
||||
for k, v := range root.internal {
|
||||
newIndexSnapshot.internal[k] = v
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
newIndexSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
rootPrev := s.root
|
||||
s.root = newIndexSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
close(persist.applied)
|
||||
}
|
||||
|
||||
// The introducer should definitely handle the segmentMerge.notify
|
||||
// channel before exiting the introduceMerge.
|
||||
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
|
||||
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
|
||||
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
newSnapshot := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: root.internal,
|
||||
refs: 1,
|
||||
creator: "introduceMerge",
|
||||
}
|
||||
|
||||
var running, docsToPersistCount, memSegments, fileSegments uint64
|
||||
var droppedSegmentFiles []string
|
||||
newSegmentDeleted := make([]*roaring.Bitmap, len(nextMerge.new))
|
||||
for i := range newSegmentDeleted {
|
||||
// create a bitmaps to track the obsoletes per newly merged segments
|
||||
newSegmentDeleted[i] = roaring.NewBitmap()
|
||||
}
|
||||
|
||||
// iterate through current segments
|
||||
for i := range root.segment {
|
||||
segmentID := root.segment[i].id
|
||||
if segSnapAtMerge, ok := nextMerge.mergedSegHistory[segmentID]; ok {
|
||||
// this segment is going away, see if anything else was deleted since we started the merge
|
||||
if segSnapAtMerge != nil && root.segment[i].deleted != nil {
|
||||
// assume all these deletes are new
|
||||
deletedSince := root.segment[i].deleted
|
||||
// if we already knew about some of them, remove
|
||||
if segSnapAtMerge.oldSegment.deleted != nil {
|
||||
deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.oldSegment.deleted)
|
||||
}
|
||||
deletedSinceItr := deletedSince.Iterator()
|
||||
for deletedSinceItr.HasNext() {
|
||||
oldDocNum := deletedSinceItr.Next()
|
||||
newDocNum := segSnapAtMerge.oldNewDocIDs[oldDocNum]
|
||||
newSegmentDeleted[segSnapAtMerge.workerID].Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
|
||||
// clean up the old segment map to figure out the
|
||||
// obsolete segments wrt root in meantime, whatever
|
||||
// segments left behind in old map after processing
|
||||
// the root segments would be the obsolete segment set
|
||||
delete(nextMerge.mergedSegHistory, segmentID)
|
||||
} else if root.segment[i].LiveSize() > 0 {
|
||||
// this segment is staying
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
deleted: root.segment[i].deleted,
|
||||
stats: root.segment[i].stats,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
})
|
||||
root.segment[i].segment.AddRef()
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += root.segment[i].segment.Count()
|
||||
|
||||
if isMemorySegment(root.segment[i]) {
|
||||
docsToPersistCount += root.segment[i].Count()
|
||||
memSegments++
|
||||
} else {
|
||||
fileSegments++
|
||||
}
|
||||
} else if root.segment[i].LiveSize() == 0 {
|
||||
if seg, ok := root.segment[i].segment.(segment.PersistedSegment); ok {
|
||||
droppedSegmentFiles = append(droppedSegmentFiles,
|
||||
filepath.Base(seg.Path()))
|
||||
}
|
||||
}
|
||||
}
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
// merged segment wrt the current root segments, hence
|
||||
// applying the obsolete segment contents to newly merged segment
|
||||
for _, ss := range nextMerge.mergedSegHistory {
|
||||
obsoleted := ss.oldSegment.DocNumbersLive()
|
||||
if obsoleted != nil {
|
||||
obsoletedIter := obsoleted.Iterator()
|
||||
for obsoletedIter.HasNext() {
|
||||
oldDocNum := obsoletedIter.Next()
|
||||
newDocNum := ss.oldNewDocIDs[oldDocNum]
|
||||
newSegmentDeleted[ss.workerID].Add(uint32(newDocNum))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
skipped := true
|
||||
// make the newly merged segments part of the newSnapshot being constructed
|
||||
for i, newMergedSegment := range nextMerge.new {
|
||||
// checking if this newly merged segment is worth keeping based on
|
||||
// obsoleted doc count since the merge intro started
|
||||
if newMergedSegment != nil &&
|
||||
newMergedSegment.Count() > newSegmentDeleted[i].GetCardinality() {
|
||||
stats := newFieldStats()
|
||||
if fsr, ok := newMergedSegment.(segment.FieldStatsReporter); ok {
|
||||
fsr.UpdateFieldStats(stats)
|
||||
}
|
||||
|
||||
// put the merged segment at the end of newSnapshot
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: nextMerge.id[i],
|
||||
segment: newMergedSegment, // take ownership for nextMerge.new's ref-count
|
||||
deleted: newSegmentDeleted[i],
|
||||
stats: stats,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
creator: "introduceMerge",
|
||||
mmaped: nextMerge.mmaped,
|
||||
})
|
||||
newSnapshot.offsets = append(newSnapshot.offsets, running)
|
||||
running += newMergedSegment.Count()
|
||||
|
||||
switch newMergedSegment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
fileSegments++
|
||||
default:
|
||||
docsToPersistCount += newMergedSegment.Count() - newSegmentDeleted[i].GetCardinality()
|
||||
memSegments++
|
||||
}
|
||||
skipped = false
|
||||
}
|
||||
}
|
||||
|
||||
if skipped {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsObsoleted, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, uint64(len(nextMerge.new)))
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
|
||||
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
|
||||
|
||||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
s.rootLock.Lock()
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
s.nextSnapshotEpoch++
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
|
||||
// release lock
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if rootPrev != nil {
|
||||
_ = rootPrev.DecRef()
|
||||
}
|
||||
|
||||
// update the removal eligibility for those segment files
|
||||
// that are not a part of the latest root.
|
||||
for _, filename := range droppedSegmentFiles {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
}
|
||||
|
||||
// notify requester that we incorporated this
|
||||
nextMerge.notifyCh <- &mergeTaskIntroStatus{
|
||||
indexSnapshot: newSnapshot,
|
||||
skipped: skipped}
|
||||
close(nextMerge.notifyCh)
|
||||
}
|
||||
|
||||
func isMemorySegment(s *SegmentSnapshot) bool {
|
||||
switch s.segment.(type) {
|
||||
case segment.PersistedSegment:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
637
index/scorch/merge.go
Normal file
637
index/scorch/merge.go
Normal file
|
@ -0,0 +1,637 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
func (s *Scorch) mergerLoop() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
s.fireAsyncError(&AsyncPanicError{
|
||||
Source: "merger",
|
||||
Path: s.path,
|
||||
})
|
||||
}
|
||||
|
||||
s.asyncTasks.Done()
|
||||
}()
|
||||
|
||||
var lastEpochMergePlanned uint64
|
||||
var ctrlMsg *mergerCtrl
|
||||
mergePlannerOptions, err := s.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
|
||||
return
|
||||
}
|
||||
ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
|
||||
options: mergePlannerOptions,
|
||||
doneCh: nil}
|
||||
|
||||
OUTER:
|
||||
for {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
|
||||
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
|
||||
default:
|
||||
// check to see if there is a new snapshot to persist
|
||||
s.rootLock.Lock()
|
||||
ourSnapshot := s.root
|
||||
ourSnapshot.AddRef()
|
||||
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if ctrlMsg == nil && ourSnapshot.epoch != lastEpochMergePlanned {
|
||||
ctrlMsg = ctrlMsgDflt
|
||||
}
|
||||
if ctrlMsg != nil {
|
||||
continueMerge := s.fireEvent(EventKindPreMergeCheck, 0)
|
||||
// The default, if there's no handler, is to continue the merge.
|
||||
if !continueMerge {
|
||||
// If it's decided that this merge can't take place now,
|
||||
// begin the merge process all over again.
|
||||
// Retry instead of blocking/waiting here since a long wait
|
||||
// can result in more segments introduced i.e. s.root will
|
||||
// be updated.
|
||||
|
||||
// decrement the ref count since its no longer needed in this
|
||||
// iteration
|
||||
_ = ourSnapshot.DecRef()
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// lets get started
|
||||
err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
|
||||
ourSnapshot)
|
||||
if err != nil {
|
||||
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
|
||||
if err == segment.ErrClosed {
|
||||
// index has been closed
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// continue the workloop on a user triggered cancel
|
||||
if ctrlMsg.doneCh != nil {
|
||||
close(ctrlMsg.doneCh)
|
||||
ctrlMsg = nil
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// exit the workloop on index closure
|
||||
ctrlMsg = nil
|
||||
break OUTER
|
||||
}
|
||||
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if ctrlMsg.doneCh != nil {
|
||||
close(ctrlMsg.doneCh)
|
||||
}
|
||||
ctrlMsg = nil
|
||||
|
||||
lastEpochMergePlanned = ourSnapshot.epoch
|
||||
|
||||
atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
|
||||
|
||||
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
|
||||
}
|
||||
_ = ourSnapshot.DecRef()
|
||||
|
||||
// tell the persister we're waiting for changes
|
||||
// first make a epochWatcher chan
|
||||
ew := &epochWatcher{
|
||||
epoch: lastEpochMergePlanned,
|
||||
notifyCh: make(notificationChan, 1),
|
||||
}
|
||||
|
||||
// give it to the persister
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case s.persisterNotifier <- ew:
|
||||
case ctrlMsg = <-s.forceMergeRequestCh:
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// now wait for persister (but also detect close)
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
break OUTER
|
||||
case <-ew.notifyCh:
|
||||
case ctrlMsg = <-s.forceMergeRequestCh:
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
|
||||
}
|
||||
}
|
||||
|
||||
type mergerCtrl struct {
|
||||
ctx context.Context
|
||||
options *mergeplan.MergePlanOptions
|
||||
doneCh chan struct{}
|
||||
}
|
||||
|
||||
// ForceMerge helps users trigger a merge operation on
|
||||
// an online scorch index.
|
||||
func (s *Scorch) ForceMerge(ctx context.Context,
|
||||
mo *mergeplan.MergePlanOptions) error {
|
||||
// check whether force merge is already under processing
|
||||
s.rootLock.Lock()
|
||||
if s.stats.TotFileMergeForceOpsStarted >
|
||||
s.stats.TotFileMergeForceOpsCompleted {
|
||||
s.rootLock.Unlock()
|
||||
return fmt.Errorf("force merge already in progress")
|
||||
}
|
||||
|
||||
s.stats.TotFileMergeForceOpsStarted++
|
||||
s.rootLock.Unlock()
|
||||
|
||||
if mo != nil {
|
||||
err := mergeplan.ValidateMergePlannerOptions(mo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// assume the default single segment merge policy
|
||||
mo = &mergeplan.SingleSegmentMergePlanOptions
|
||||
}
|
||||
msg := &mergerCtrl{options: mo,
|
||||
doneCh: make(chan struct{}),
|
||||
ctx: ctx,
|
||||
}
|
||||
|
||||
// request the merger perform a force merge
|
||||
select {
|
||||
case s.forceMergeRequestCh <- msg:
|
||||
case <-s.closeCh:
|
||||
return nil
|
||||
}
|
||||
|
||||
// wait for the force merge operation completion
|
||||
select {
|
||||
case <-msg.doneCh:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeForceOpsCompleted, 1)
|
||||
case <-s.closeCh:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
|
||||
error) {
|
||||
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
|
||||
|
||||
po, err := s.parsePersisterOptions()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// by default use the MaxSizeInMemoryMergePerWorker from the persister option
|
||||
// as the FloorSegmentFileSize for the merge planner which would be the
|
||||
// first tier size in the planning. If the value is 0, then we don't use the
|
||||
// file size in the planning.
|
||||
mergePlannerOptions.FloorSegmentFileSize = int64(po.MaxSizeInMemoryMergePerWorker)
|
||||
|
||||
if v, ok := s.config["scorchMergePlanOptions"]; ok {
|
||||
b, err := util.MarshalJSON(v)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = util.UnmarshalJSON(b, &mergePlannerOptions)
|
||||
if err != nil {
|
||||
return &mergePlannerOptions, err
|
||||
}
|
||||
|
||||
err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &mergePlannerOptions, nil
|
||||
}
|
||||
|
||||
type closeChWrapper struct {
|
||||
ch1 chan struct{}
|
||||
ctx context.Context
|
||||
closeCh chan struct{}
|
||||
cancelCh chan struct{}
|
||||
}
|
||||
|
||||
func newCloseChWrapper(ch1 chan struct{},
|
||||
ctx context.Context) *closeChWrapper {
|
||||
return &closeChWrapper{
|
||||
ch1: ch1,
|
||||
ctx: ctx,
|
||||
closeCh: make(chan struct{}),
|
||||
cancelCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *closeChWrapper) close() {
|
||||
close(w.closeCh)
|
||||
}
|
||||
|
||||
func (w *closeChWrapper) listen() {
|
||||
select {
|
||||
case <-w.ch1:
|
||||
close(w.cancelCh)
|
||||
case <-w.ctx.Done():
|
||||
close(w.cancelCh)
|
||||
case <-w.closeCh:
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
|
||||
// build list of persisted segments in this snapshot
|
||||
var onlyPersistedSnapshots []mergeplan.Segment
|
||||
for _, segmentSnapshot := range ourSnapshot.segment {
|
||||
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
|
||||
|
||||
// give this list to the planner
|
||||
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
|
||||
if err != nil {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
|
||||
return fmt.Errorf("merge planning err: %v", err)
|
||||
}
|
||||
if resultMergePlan == nil {
|
||||
// nothing to do
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var filenames []string
|
||||
|
||||
cw := newCloseChWrapper(s.closeCh, ctx)
|
||||
defer cw.close()
|
||||
|
||||
go cw.listen()
|
||||
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot, len(task.Segments))
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
mergedSegHistory := make(map[uint64]*mergedSegmentHistory, len(task.Segments))
|
||||
|
||||
for _, planSegment := range task.Segments {
|
||||
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
|
||||
oldMap[segSnapshot.id] = segSnapshot
|
||||
mergedSegHistory[segSnapshot.id] = &mergedSegmentHistory{
|
||||
workerID: 0,
|
||||
oldSegment: segSnapshot,
|
||||
}
|
||||
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
|
||||
if segSnapshot.LiveSize() == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
|
||||
oldMap[segSnapshot.id] = nil
|
||||
delete(mergedSegHistory, segSnapshot.id)
|
||||
} else {
|
||||
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
// track the files getting merged for unsetting the
|
||||
// removal ineligibility. This helps to unflip files
|
||||
// even with fast merger, slow persister work flows.
|
||||
path := persistedSeg.Path()
|
||||
filenames = append(filenames,
|
||||
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var seg segment.Segment
|
||||
var filename string
|
||||
if len(segmentsToMerge) > 0 {
|
||||
filename = zapFileName(newSegmentID)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
fileMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
|
||||
prevBytesReadTotal := cumulateBytesRead(segmentsToMerge)
|
||||
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
|
||||
cw.cancelCh, s)
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
|
||||
|
||||
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
if err == segment.ErrClosed {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("merging failed: %v", err)
|
||||
}
|
||||
|
||||
seg, err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return err
|
||||
}
|
||||
|
||||
totalBytesRead := seg.BytesRead() + prevBytesReadTotal
|
||||
seg.ResetBytesRead(totalBytesRead)
|
||||
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
if mergedSegHistory[task.Segments[i].Id()] != nil {
|
||||
mergedSegHistory[task.Segments[i].Id()].oldNewDocIDs = segNewDocNums
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
|
||||
}
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: []uint64{newSegmentID},
|
||||
mergedSegHistory: mergedSegHistory,
|
||||
new: []segment.Segment{seg},
|
||||
newCount: seg.Count(),
|
||||
notifyCh: make(chan *mergeTaskIntroStatus),
|
||||
mmaped: 1,
|
||||
}
|
||||
|
||||
s.fireEvent(EventKindMergeTaskIntroductionStart, 0)
|
||||
|
||||
// give it to the introducer
|
||||
select {
|
||||
case <-s.closeCh:
|
||||
_ = seg.Close()
|
||||
return segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
|
||||
}
|
||||
|
||||
introStartTime := time.Now()
|
||||
// it is safe to blockingly wait for the merge introduction
|
||||
// here as the introducer is bound to handle the notify channel.
|
||||
introStatus := <-sm.notifyCh
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
|
||||
if introStatus != nil && introStatus.indexSnapshot != nil {
|
||||
_ = introStatus.indexSnapshot.DecRef()
|
||||
if introStatus.skipped {
|
||||
// close the segment on skipping introduction.
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
_ = seg.Close()
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
|
||||
|
||||
s.fireEvent(EventKindMergeTaskIntroduction, 0)
|
||||
}
|
||||
|
||||
// once all the newly merged segment introductions are done,
|
||||
// its safe to unflip the removal ineligibility for the replaced
|
||||
// older segments
|
||||
for _, f := range filenames {
|
||||
s.unmarkIneligibleForRemoval(f)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mergeTaskIntroStatus struct {
|
||||
indexSnapshot *IndexSnapshot
|
||||
skipped bool
|
||||
}
|
||||
|
||||
// this is important when it comes to introducing multiple merged segments in a
|
||||
// single introducer channel push. That way there is a check to ensure that the
|
||||
// file count doesn't explode during the index's lifetime.
|
||||
type mergedSegmentHistory struct {
|
||||
workerID uint64
|
||||
oldNewDocIDs []uint64
|
||||
oldSegment *SegmentSnapshot
|
||||
}
|
||||
|
||||
type segmentMerge struct {
|
||||
id []uint64
|
||||
new []segment.Segment
|
||||
mergedSegHistory map[uint64]*mergedSegmentHistory
|
||||
notifyCh chan *mergeTaskIntroStatus
|
||||
mmaped uint32
|
||||
newCount uint64
|
||||
}
|
||||
|
||||
func cumulateBytesRead(sbs []segment.Segment) uint64 {
|
||||
var rv uint64
|
||||
for _, seg := range sbs {
|
||||
rv += seg.BytesRead()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func closeNewMergedSegments(segs []segment.Segment) error {
|
||||
for _, seg := range segs {
|
||||
if seg != nil {
|
||||
_ = seg.DecRef()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mergeAndPersistInMemorySegments takes an IndexSnapshot and a list of in-memory segments,
|
||||
// which are merged and persisted to disk concurrently. These are then introduced as
|
||||
// the new root snapshot in one-shot.
|
||||
func (s *Scorch) mergeAndPersistInMemorySegments(snapshot *IndexSnapshot,
|
||||
flushableObjs []*flushable) (*IndexSnapshot, []uint64, error) {
|
||||
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
|
||||
|
||||
memMergeZapStartTime := time.Now()
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
// we're tracking the merged segments and their doc number per worker
|
||||
// to be able to introduce them all at once, so the first dimension of the
|
||||
// slices here correspond to workerID
|
||||
newDocIDsSet := make([][][]uint64, len(flushableObjs))
|
||||
newMergedSegments := make([]segment.Segment, len(flushableObjs))
|
||||
newMergedSegmentIDs := make([]uint64, len(flushableObjs))
|
||||
numFlushes := len(flushableObjs)
|
||||
var numSegments, newMergedCount uint64
|
||||
var em sync.Mutex
|
||||
var errs []error
|
||||
|
||||
// deploy the workers to merge and flush the batches of segments concurrently
|
||||
// and create a new file segment
|
||||
for i := 0; i < numFlushes; i++ {
|
||||
wg.Add(1)
|
||||
go func(segsBatch []segment.Segment, dropsBatch []*roaring.Bitmap, id int) {
|
||||
defer wg.Done()
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
filename := zapFileName(newSegmentID)
|
||||
path := s.path + string(os.PathSeparator) + filename
|
||||
|
||||
// the newly merged segment is already flushed out to disk, just needs
|
||||
// to be opened using mmap.
|
||||
newDocIDs, _, err :=
|
||||
s.segPlugin.Merge(segsBatch, dropsBatch, path, s.closeCh, s)
|
||||
if err != nil {
|
||||
em.Lock()
|
||||
errs = append(errs, err)
|
||||
em.Unlock()
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return
|
||||
}
|
||||
// to prevent accidental cleanup of this newly created file, mark it
|
||||
// as ineligible for removal. this will be flipped back when the bolt
|
||||
// is updated - which is valid, since the snapshot updated in bolt is
|
||||
// cleaned up only if its zero ref'd (MB-66163 for more details)
|
||||
s.markIneligibleForRemoval(filename)
|
||||
newMergedSegmentIDs[id] = newSegmentID
|
||||
newDocIDsSet[id] = newDocIDs
|
||||
newMergedSegments[id], err = s.segPlugin.Open(path)
|
||||
if err != nil {
|
||||
em.Lock()
|
||||
errs = append(errs, err)
|
||||
em.Unlock()
|
||||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return
|
||||
}
|
||||
atomic.AddUint64(&newMergedCount, newMergedSegments[id].Count())
|
||||
atomic.AddUint64(&numSegments, uint64(len(segsBatch)))
|
||||
}(flushableObjs[i].segments, flushableObjs[i].drops, i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if errs != nil {
|
||||
// close the new merged segments
|
||||
_ = closeNewMergedSegments(newMergedSegments)
|
||||
var errf error
|
||||
for _, err := range errs {
|
||||
if err == segment.ErrClosed {
|
||||
// the index snapshot was closed which will be handled gracefully
|
||||
// by retrying the whole merge+flush operation in a later iteration
|
||||
// so its safe to early exit the same error.
|
||||
return nil, nil, err
|
||||
}
|
||||
errf = fmt.Errorf("%w; %v", errf, err)
|
||||
}
|
||||
return nil, nil, errf
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
|
||||
|
||||
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
|
||||
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
|
||||
}
|
||||
|
||||
// update the segmentMerge task with the newly merged + flushed segments which
|
||||
// are to be introduced atomically.
|
||||
sm := &segmentMerge{
|
||||
id: newMergedSegmentIDs,
|
||||
new: newMergedSegments,
|
||||
mergedSegHistory: make(map[uint64]*mergedSegmentHistory, numSegments),
|
||||
notifyCh: make(chan *mergeTaskIntroStatus),
|
||||
newCount: newMergedCount,
|
||||
}
|
||||
|
||||
// create a history map which maps the old in-memory segments with the specific
|
||||
// persister worker (also the specific file segment its going to be part of)
|
||||
// which flushed it out. This map will be used on the introducer side to out-ref
|
||||
// the in-memory segments and also track the new tombstones if present.
|
||||
for i, flushable := range flushableObjs {
|
||||
for j, idx := range flushable.sbIdxs {
|
||||
ss := snapshot.segment[idx]
|
||||
// oldSegmentSnapshot.id -> {workerID, oldSegmentSnapshot, docIDs}
|
||||
sm.mergedSegHistory[ss.id] = &mergedSegmentHistory{
|
||||
workerID: uint64(i),
|
||||
oldNewDocIDs: newDocIDsSet[i][j],
|
||||
oldSegment: ss,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
select { // send to introducer
|
||||
case <-s.closeCh:
|
||||
_ = closeNewMergedSegments(newMergedSegments)
|
||||
return nil, nil, segment.ErrClosed
|
||||
case s.merges <- sm:
|
||||
}
|
||||
|
||||
// blockingly wait for the introduction to complete
|
||||
var newSnapshot *IndexSnapshot
|
||||
introStatus := <-sm.notifyCh
|
||||
if introStatus != nil && introStatus.indexSnapshot != nil {
|
||||
newSnapshot = introStatus.indexSnapshot
|
||||
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(numSegments))
|
||||
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
|
||||
if introStatus.skipped {
|
||||
// close the segment on skipping introduction.
|
||||
_ = newSnapshot.DecRef()
|
||||
_ = closeNewMergedSegments(newMergedSegments)
|
||||
newSnapshot = nil
|
||||
}
|
||||
}
|
||||
|
||||
return newSnapshot, newMergedSegmentIDs, nil
|
||||
}
|
||||
|
||||
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
|
||||
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
|
||||
}
|
157
index/scorch/merge_test.go
Normal file
157
index/scorch/merge_test.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestObsoleteSegmentMergeIntroduction(t *testing.T) {
|
||||
testConfig := CreateConfig("TestObsoleteSegmentMergeIntroduction")
|
||||
err := InitTest(testConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(testConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var introComplete, mergeIntroStart, mergeIntroComplete sync.WaitGroup
|
||||
introComplete.Add(1)
|
||||
mergeIntroStart.Add(1)
|
||||
mergeIntroComplete.Add(1)
|
||||
var segIntroCompleted int
|
||||
RegistryEventCallbacks["test"] = func(e Event) bool {
|
||||
switch e.Kind {
|
||||
case EventKindBatchIntroduction:
|
||||
segIntroCompleted++
|
||||
if segIntroCompleted == 3 {
|
||||
// all 3 segments introduced
|
||||
introComplete.Done()
|
||||
}
|
||||
case EventKindMergeTaskIntroductionStart:
|
||||
// signal the start of merge task introduction so that
|
||||
// we can introduce a new batch which obsoletes the
|
||||
// merged segment's contents.
|
||||
mergeIntroStart.Done()
|
||||
// hold the merge task introduction until the merged segment contents
|
||||
// are obsoleted with the next batch/segment introduction.
|
||||
introComplete.Wait()
|
||||
case EventKindMergeTaskIntroduction:
|
||||
// signal the completion of the merge task introduction.
|
||||
mergeIntroComplete.Done()
|
||||
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
ourConfig := make(map[string]interface{}, len(testConfig))
|
||||
for k, v := range testConfig {
|
||||
ourConfig[k] = v
|
||||
}
|
||||
ourConfig["eventCallbackName"] = "test"
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, ourConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first introduce two documents over two batches.
|
||||
batch := index.NewBatch()
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
|
||||
batch.Update(doc)
|
||||
err = idx.Batch(batch)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
batch.Reset()
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
|
||||
batch.Update(doc)
|
||||
err = idx.Batch(batch)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
// wait until the merger trying to introduce the new merged segment.
|
||||
mergeIntroStart.Wait()
|
||||
|
||||
// execute another batch which obsoletes the contents of the new merged
|
||||
// segment awaiting introduction.
|
||||
batch.Reset()
|
||||
batch.Delete("1")
|
||||
batch.Delete("2")
|
||||
doc = document.NewDocument("3")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3updated")))
|
||||
batch.Update(doc)
|
||||
err = idx.Batch(batch)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
// wait until the merge task introduction complete.
|
||||
mergeIntroComplete.Wait()
|
||||
|
||||
idxr, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
numSegments := len(idxr.(*IndexSnapshot).segment)
|
||||
if numSegments != 1 {
|
||||
t.Errorf("expected one segment at the root, got: %d", numSegments)
|
||||
}
|
||||
|
||||
skipIntroCount := atomic.LoadUint64(&idxr.(*IndexSnapshot).parent.stats.TotFileMergeIntroductionsObsoleted)
|
||||
if skipIntroCount != 1 {
|
||||
t.Errorf("expected one obsolete merge segment skipping the introduction, got: %d", skipIntroCount)
|
||||
}
|
||||
|
||||
docCount, err := idxr.DocCount()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if docCount != 1 {
|
||||
t.Errorf("Expected document count to be %d got %d", 1, docCount)
|
||||
}
|
||||
|
||||
err = idxr.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
454
index/scorch/mergeplan/merge_plan.go
Normal file
454
index/scorch/mergeplan/merge_plan.go
Normal file
|
@ -0,0 +1,454 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package mergeplan provides a segment merge planning approach that's
|
||||
// inspired by Lucene's TieredMergePolicy.java and descriptions like
|
||||
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Segment represents the information that the planner needs to
|
||||
// calculate segment merging.
|
||||
type Segment interface {
|
||||
// Unique id of the segment -- used for sorting.
|
||||
Id() uint64
|
||||
|
||||
// Full segment size (the size before any logical deletions).
|
||||
FullSize() int64
|
||||
|
||||
// Size of the live data of the segment; i.e., FullSize() minus
|
||||
// any logical deletions.
|
||||
LiveSize() int64
|
||||
|
||||
HasVector() bool
|
||||
|
||||
// Size of the persisted segment file.
|
||||
FileSize() int64
|
||||
}
|
||||
|
||||
// Plan() will functionally compute a merge plan. A segment will be
|
||||
// assigned to at most a single MergeTask in the output MergePlan. A
|
||||
// segment not assigned to any MergeTask means the segment should
|
||||
// remain unmerged.
|
||||
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
return plan(segments, o)
|
||||
}
|
||||
|
||||
// A MergePlan is the result of the Plan() API.
|
||||
//
|
||||
// The planner doesn’t know how or whether these tasks are executed --
|
||||
// that’s up to a separate merge execution system, which might execute
|
||||
// these tasks concurrently or not, and which might execute all the
|
||||
// tasks or not.
|
||||
type MergePlan struct {
|
||||
Tasks []*MergeTask
|
||||
}
|
||||
|
||||
// A MergeTask represents several segments that should be merged
|
||||
// together into a single segment.
|
||||
type MergeTask struct {
|
||||
Segments []Segment
|
||||
}
|
||||
|
||||
// The MergePlanOptions is designed to be reusable between planning calls.
|
||||
type MergePlanOptions struct {
|
||||
// Max # segments per logarithmic tier, or max width of any
|
||||
// logarithmic “step”. Smaller values mean more merging but fewer
|
||||
// segments. Should be >= SegmentsPerMergeTask, else you'll have
|
||||
// too much merging.
|
||||
MaxSegmentsPerTier int
|
||||
|
||||
// Max size of any segment produced after merging. Actual
|
||||
// merging, however, may produce segment sizes different than the
|
||||
// planner’s predicted sizes.
|
||||
MaxSegmentSize int64
|
||||
|
||||
// Max size (in bytes) of the persisted segment file that contains the
|
||||
// vectors. This is used to prevent merging of segments that
|
||||
// contain vectors that are too large.
|
||||
MaxSegmentFileSize int64
|
||||
|
||||
// The growth factor for each tier in a staircase of idealized
|
||||
// segments computed by CalcBudget().
|
||||
TierGrowth float64
|
||||
|
||||
// The number of segments in any resulting MergeTask. e.g.,
|
||||
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
|
||||
SegmentsPerMergeTask int
|
||||
|
||||
// Small segments are rounded up to this size, i.e., treated as
|
||||
// equal (floor) size for consideration. This is to prevent lots
|
||||
// of tiny segments from resulting in a long tail in the index.
|
||||
FloorSegmentSize int64
|
||||
|
||||
// Small segments' file size are rounded up to this size to prevent lot
|
||||
// of tiny segments causing a long tail in the index.
|
||||
FloorSegmentFileSize int64
|
||||
|
||||
// Controls how aggressively merges that reclaim more deletions
|
||||
// are favored. Higher values will more aggressively target
|
||||
// merges that reclaim deletions, but be careful not to go so high
|
||||
// that way too much merging takes place; a value of 3.0 is
|
||||
// probably nearly too high. A value of 0.0 means deletions don't
|
||||
// impact merge selection.
|
||||
ReclaimDeletesWeight float64
|
||||
|
||||
// Optional, defaults to mergeplan.CalcBudget().
|
||||
CalcBudget func(totalSize int64, firstTierSize int64,
|
||||
o *MergePlanOptions) (budgetNumSegments int)
|
||||
|
||||
// Optional, defaults to mergeplan.ScoreSegments().
|
||||
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
|
||||
|
||||
// Optional.
|
||||
Logger func(string)
|
||||
}
|
||||
|
||||
// Returns the higher of the input or FloorSegmentSize.
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
|
||||
if s > o.FloorSegmentSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentSize
|
||||
}
|
||||
|
||||
func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 {
|
||||
if s > o.FloorSegmentFileSize {
|
||||
return s
|
||||
}
|
||||
return o.FloorSegmentFileSize
|
||||
}
|
||||
|
||||
// MaxSegmentSizeLimit represents the maximum size of a segment,
|
||||
// this limit comes with hit-1 optimisation/max encoding limit uint31.
|
||||
const MaxSegmentSizeLimit = 1<<31 - 1
|
||||
|
||||
// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
|
||||
// exceeds the MaxSegmentSizeLimit
|
||||
var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
|
||||
|
||||
// DefaultMergePlanOptions suggests the default options.
|
||||
var DefaultMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 10,
|
||||
MaxSegmentSize: 5000000,
|
||||
MaxSegmentFileSize: 4000000000, // 4GB
|
||||
TierGrowth: 10.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 2000,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
}
|
||||
|
||||
// SingleSegmentMergePlanOptions helps in creating a
|
||||
// single segment index.
|
||||
var SingleSegmentMergePlanOptions = MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1 << 30,
|
||||
MaxSegmentFileSize: 1 << 40,
|
||||
TierGrowth: 1.0,
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 1 << 30,
|
||||
ReclaimDeletesWeight: 2.0,
|
||||
FloorSegmentFileSize: 1 << 40,
|
||||
}
|
||||
|
||||
// -------------------------------------------
|
||||
|
||||
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
|
||||
if len(segmentsIn) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o == nil {
|
||||
o = &DefaultMergePlanOptions
|
||||
}
|
||||
|
||||
segments := append([]Segment(nil), segmentsIn...) // Copy.
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segments))
|
||||
|
||||
var minLiveSize int64 = math.MaxInt64
|
||||
|
||||
var eligibles []Segment
|
||||
var eligiblesLiveSize int64
|
||||
var eligiblesFileSize int64
|
||||
var minFileSize int64 = math.MaxInt64
|
||||
|
||||
for _, segment := range segments {
|
||||
if minLiveSize > segment.LiveSize() {
|
||||
minLiveSize = segment.LiveSize()
|
||||
}
|
||||
|
||||
if minFileSize > segment.FileSize() {
|
||||
minFileSize = segment.FileSize()
|
||||
}
|
||||
|
||||
isEligible := segment.LiveSize() < o.MaxSegmentSize/2
|
||||
// An eligible segment (based on #documents) may be too large
|
||||
// and thus need a stricter check based on the file size.
|
||||
// This is particularly important for segments that contain
|
||||
// vectors.
|
||||
if isEligible && segment.HasVector() && o.MaxSegmentFileSize > 0 {
|
||||
isEligible = segment.FileSize() < o.MaxSegmentFileSize/2
|
||||
}
|
||||
|
||||
// Only small-enough segments are eligible.
|
||||
if isEligible {
|
||||
eligibles = append(eligibles, segment)
|
||||
eligiblesLiveSize += segment.LiveSize()
|
||||
eligiblesFileSize += segment.FileSize()
|
||||
}
|
||||
}
|
||||
|
||||
calcBudget := o.CalcBudget
|
||||
if calcBudget == nil {
|
||||
calcBudget = CalcBudget
|
||||
}
|
||||
|
||||
var budgetNumSegments int
|
||||
if o.FloorSegmentFileSize > 0 {
|
||||
minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize)
|
||||
budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o)
|
||||
|
||||
} else {
|
||||
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
|
||||
budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o)
|
||||
}
|
||||
|
||||
scoreSegments := o.ScoreSegments
|
||||
if scoreSegments == nil {
|
||||
scoreSegments = ScoreSegments
|
||||
}
|
||||
|
||||
rv := &MergePlan{}
|
||||
|
||||
var empties []Segment
|
||||
for _, eligible := range eligibles {
|
||||
if eligible.LiveSize() <= 0 {
|
||||
empties = append(empties, eligible)
|
||||
}
|
||||
}
|
||||
if len(empties) > 0 {
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
|
||||
eligibles = removeSegments(eligibles, empties)
|
||||
}
|
||||
|
||||
// While we’re over budget, keep looping, which might produce
|
||||
// another MergeTask.
|
||||
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
|
||||
// Track a current best roster as we examine and score
|
||||
// potential rosters of merges.
|
||||
var bestRoster []Segment
|
||||
var bestRosterScore float64 // Lower score is better.
|
||||
|
||||
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
|
||||
var roster []Segment
|
||||
var rosterLiveSize int64
|
||||
var rosterFileSize int64 // useful for segments with vectors
|
||||
|
||||
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
|
||||
eligible := eligibles[idx]
|
||||
|
||||
if rosterLiveSize+eligible.LiveSize() >= o.MaxSegmentSize {
|
||||
continue
|
||||
}
|
||||
|
||||
if eligible.HasVector() {
|
||||
efs := eligible.FileSize()
|
||||
if rosterFileSize+efs >= o.MaxSegmentFileSize {
|
||||
continue
|
||||
}
|
||||
rosterFileSize += efs
|
||||
}
|
||||
|
||||
roster = append(roster, eligible)
|
||||
rosterLiveSize += eligible.LiveSize()
|
||||
}
|
||||
|
||||
if len(roster) > 0 {
|
||||
rosterScore := scoreSegments(roster, o)
|
||||
|
||||
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
|
||||
bestRoster = roster
|
||||
bestRosterScore = rosterScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(bestRoster) == 0 {
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
|
||||
|
||||
eligibles = removeSegments(eligibles, bestRoster)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Compute the number of segments that would be needed to cover the
|
||||
// totalSize, by climbing up a logarithmically growing staircase of
|
||||
// segment tiers.
|
||||
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
|
||||
budgetNumSegments int) {
|
||||
tierSize := firstTierSize
|
||||
if tierSize < 1 {
|
||||
tierSize = 1
|
||||
}
|
||||
|
||||
maxSegmentsPerTier := o.MaxSegmentsPerTier
|
||||
if maxSegmentsPerTier < 1 {
|
||||
maxSegmentsPerTier = 1
|
||||
}
|
||||
|
||||
tierGrowth := o.TierGrowth
|
||||
if tierGrowth < 1.0 {
|
||||
tierGrowth = 1.0
|
||||
}
|
||||
|
||||
for totalSize > 0 {
|
||||
segmentsInTier := float64(totalSize) / float64(tierSize)
|
||||
if segmentsInTier < float64(maxSegmentsPerTier) {
|
||||
budgetNumSegments += int(math.Ceil(segmentsInTier))
|
||||
break
|
||||
}
|
||||
|
||||
budgetNumSegments += maxSegmentsPerTier
|
||||
totalSize -= int64(maxSegmentsPerTier) * tierSize
|
||||
tierSize = int64(float64(tierSize) * tierGrowth)
|
||||
}
|
||||
|
||||
return budgetNumSegments
|
||||
}
|
||||
|
||||
// Of note, removeSegments() keeps the ordering of the results stable.
|
||||
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
|
||||
rv := make([]Segment, 0, len(segments)-len(toRemove))
|
||||
OUTER:
|
||||
for _, segment := range segments {
|
||||
for _, r := range toRemove {
|
||||
if segment == r {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
rv = append(rv, segment)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Smaller result score is better.
|
||||
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
|
||||
var totBeforeSize int64
|
||||
var totAfterSize int64
|
||||
var totAfterSizeFloored int64
|
||||
|
||||
for _, segment := range segments {
|
||||
totBeforeSize += segment.FullSize()
|
||||
totAfterSize += segment.LiveSize()
|
||||
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
|
||||
}
|
||||
|
||||
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Roughly guess the "balance" of the segments -- whether the
|
||||
// segments are about the same size.
|
||||
balance :=
|
||||
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
|
||||
float64(totAfterSizeFloored)
|
||||
|
||||
// Gently favor smaller merges over bigger ones. We don't want to
|
||||
// make the exponent too large else we end up with poor merges of
|
||||
// small segments in order to avoid the large merges.
|
||||
score := balance * math.Pow(float64(totAfterSize), 0.05)
|
||||
|
||||
// Strongly favor merges that reclaim deletes.
|
||||
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
|
||||
|
||||
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
|
||||
// ToBarChart returns an ASCII rendering of the segments and the plan.
|
||||
// The barMax is the max width of the bars in the bar chart.
|
||||
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
|
||||
rv := make([]string, 0, len(segments))
|
||||
|
||||
var maxFullSize int64
|
||||
for _, segment := range segments {
|
||||
if maxFullSize < segment.FullSize() {
|
||||
maxFullSize = segment.FullSize()
|
||||
}
|
||||
}
|
||||
if maxFullSize < 0 {
|
||||
maxFullSize = 1
|
||||
}
|
||||
|
||||
for _, segment := range segments {
|
||||
barFull := int(segment.FullSize())
|
||||
barLive := int(segment.LiveSize())
|
||||
|
||||
if maxFullSize > int64(barMax) {
|
||||
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
|
||||
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
|
||||
}
|
||||
|
||||
barKind := " "
|
||||
barChar := "."
|
||||
|
||||
if plan != nil {
|
||||
TASK_LOOP:
|
||||
for taski, task := range plan.Tasks {
|
||||
for _, taskSegment := range task.Segments {
|
||||
if taskSegment == segment {
|
||||
barKind = "*"
|
||||
barChar = fmt.Sprintf("%d", taski)
|
||||
break TASK_LOOP
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bar :=
|
||||
strings.Repeat(barChar, barLive)[0:barLive] +
|
||||
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
|
||||
|
||||
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
|
||||
segment.Id(),
|
||||
segment.LiveSize(),
|
||||
segment.FullSize(),
|
||||
barKind, bar))
|
||||
}
|
||||
|
||||
return strings.Join(rv, "\n")
|
||||
}
|
||||
|
||||
// ValidateMergePlannerOptions validates the merge planner options
|
||||
func ValidateMergePlannerOptions(options *MergePlanOptions) error {
|
||||
if options.MaxSegmentSize > MaxSegmentSizeLimit {
|
||||
return ErrMaxSegmentSizeTooLarge
|
||||
}
|
||||
return nil
|
||||
}
|
721
index/scorch/mergeplan/merge_plan_test.go
Normal file
721
index/scorch/mergeplan/merge_plan_test.go
Normal file
|
@ -0,0 +1,721 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Implements the Segment interface for testing,
|
||||
type segment struct {
|
||||
MyId uint64
|
||||
MyFullSize int64
|
||||
MyLiveSize int64
|
||||
|
||||
MyHasVector bool
|
||||
MyFileSize int64
|
||||
}
|
||||
|
||||
func (s *segment) Id() uint64 { return s.MyId }
|
||||
func (s *segment) FullSize() int64 { return s.MyFullSize }
|
||||
func (s *segment) LiveSize() int64 { return s.MyLiveSize }
|
||||
func (s *segment) HasVector() bool { return s.MyHasVector }
|
||||
func (s *segment) FileSize() int64 { return s.MyFileSize }
|
||||
|
||||
func makeLinearSegments(n int) (rv []Segment) {
|
||||
for i := 0; i < n; i++ {
|
||||
rv = append(rv, &segment{
|
||||
MyId: uint64(i),
|
||||
MyFullSize: int64(i),
|
||||
MyLiveSize: int64(i),
|
||||
})
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestSimplePlan(t *testing.T) {
|
||||
segs := makeLinearSegments(10)
|
||||
|
||||
tests := []struct {
|
||||
Desc string
|
||||
Segments []Segment
|
||||
Options *MergePlanOptions
|
||||
ExpectPlan *MergePlan
|
||||
ExpectErr error
|
||||
}{
|
||||
{
|
||||
"nil segments",
|
||||
nil, nil, nil, nil,
|
||||
},
|
||||
{
|
||||
"empty segments",
|
||||
[]Segment{},
|
||||
nil, nil, nil,
|
||||
},
|
||||
{
|
||||
"1 segment",
|
||||
[]Segment{segs[1]},
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"2 segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
},
|
||||
nil,
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[2],
|
||||
segs[1],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"3 segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
segs[9],
|
||||
},
|
||||
nil,
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[9],
|
||||
segs[2],
|
||||
segs[1],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"many segments",
|
||||
[]Segment{
|
||||
segs[1],
|
||||
segs[2],
|
||||
segs[3],
|
||||
segs[4],
|
||||
segs[5],
|
||||
segs[6],
|
||||
},
|
||||
&MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
&MergePlan{
|
||||
Tasks: []*MergeTask{
|
||||
{
|
||||
Segments: []Segment{
|
||||
segs[6],
|
||||
segs[5],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
plan, err := Plan(test.Segments, test.Options)
|
||||
|
||||
if err != test.ExpectErr {
|
||||
testj, _ := json.Marshal(&test)
|
||||
|
||||
t.Errorf("testi: %d, test: %s, got err: %v", testi, testj, err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(plan, test.ExpectPlan) {
|
||||
testj, _ := json.Marshal(&test)
|
||||
|
||||
planj, _ := json.Marshal(&plan)
|
||||
|
||||
t.Errorf("testi: %d, test: %s, got plan: %s",
|
||||
testi, testj, planj)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestSort(t *testing.T) {
|
||||
segs := makeLinearSegments(10)
|
||||
|
||||
sort.Sort(byLiveSizeDescending(segs))
|
||||
|
||||
for i := 1; i < len(segs); i++ {
|
||||
if segs[i].LiveSize() >= segs[i-1].LiveSize() {
|
||||
t.Errorf("not descending")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestCalcBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
totalSize int64
|
||||
firstTierSize int64
|
||||
o MergePlanOptions
|
||||
expect int
|
||||
}{
|
||||
{0, 0, MergePlanOptions{}, 0},
|
||||
{1, 0, MergePlanOptions{}, 1},
|
||||
{9, 0, MergePlanOptions{}, 9},
|
||||
{
|
||||
1, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
1,
|
||||
},
|
||||
{
|
||||
21, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
5,
|
||||
},
|
||||
{
|
||||
21, 1,
|
||||
MergePlanOptions{
|
||||
MaxSegmentsPerTier: 2,
|
||||
MaxSegmentSize: 1000,
|
||||
TierGrowth: 2.0,
|
||||
SegmentsPerMergeTask: 2,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
7,
|
||||
},
|
||||
{
|
||||
1000, 2000, DefaultMergePlanOptions,
|
||||
1,
|
||||
},
|
||||
{
|
||||
5000, 2000, DefaultMergePlanOptions,
|
||||
3,
|
||||
},
|
||||
{
|
||||
10000, 2000, DefaultMergePlanOptions,
|
||||
5,
|
||||
},
|
||||
{
|
||||
30000, 2000, DefaultMergePlanOptions,
|
||||
11,
|
||||
},
|
||||
{
|
||||
1000000, 2000, DefaultMergePlanOptions,
|
||||
24,
|
||||
},
|
||||
{
|
||||
1000000000, 2000, DefaultMergePlanOptions,
|
||||
54,
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
|
||||
if res != test.expect {
|
||||
t.Errorf("testi: %d, test: %#v, res: %v",
|
||||
testi, test, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalcBudgetForSingleSegmentMergePolicy(t *testing.T) {
|
||||
mpolicy := MergePlanOptions{
|
||||
MaxSegmentsPerTier: 1,
|
||||
MaxSegmentSize: 1 << 30, // ~ 1 Billion
|
||||
SegmentsPerMergeTask: 10,
|
||||
FloorSegmentSize: 1 << 30,
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
totalSize int64
|
||||
firstTierSize int64
|
||||
o MergePlanOptions
|
||||
expect int
|
||||
}{
|
||||
{0, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 0},
|
||||
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
|
||||
{9, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 1},
|
||||
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
|
||||
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
|
||||
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
|
||||
{1000, mpolicy.RaiseToFloorSegmentSize(2000), mpolicy, 1},
|
||||
{5000, mpolicy.RaiseToFloorSegmentSize(5000), mpolicy, 1},
|
||||
{10000, mpolicy.RaiseToFloorSegmentSize(10000), mpolicy, 1},
|
||||
{30000, mpolicy.RaiseToFloorSegmentSize(30000), mpolicy, 1},
|
||||
{1000000, mpolicy.RaiseToFloorSegmentSize(1000000), mpolicy, 1},
|
||||
{1000000000, 1 << 30, mpolicy, 1},
|
||||
{1013423541, 1 << 30, mpolicy, 1},
|
||||
{98765442, 1 << 30, mpolicy, 1},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
|
||||
if res != test.expect {
|
||||
t.Errorf("testi: %d, test: %#v, res: %v",
|
||||
testi, test, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "i1sssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "i1sssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 200,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imsssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imsssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManySameSizedSegmentsWithDeletionsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imssswdbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imssswdbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
// Deletions are a shrinking of the live size.
|
||||
for i, seg := range spec.segments {
|
||||
if (spec.cycle+i)%5 == 0 {
|
||||
s := seg.(*segment)
|
||||
if s.MyLiveSize > 0 {
|
||||
s.MyLiveSize -= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "imdssbm",
|
||||
verbose: os.Getenv("VERBOSE") == "imdssbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: int64(1 + (i % 5)),
|
||||
MyLiveSize: int64(1 + (i % 5)),
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
}
|
||||
|
||||
func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1000,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
|
||||
var numPlansWithTasks int
|
||||
|
||||
spec := testCyclesSpec{
|
||||
descrip: "mssswdbm",
|
||||
verbose: os.Getenv("VERBOSE") == "mssswdbm" || os.Getenv("VERBOSE") == "y",
|
||||
n: 20,
|
||||
o: o,
|
||||
beforePlan: func(spec *testCyclesSpec) {
|
||||
// Deletions are a shrinking of the live size.
|
||||
for i, seg := range spec.segments {
|
||||
if (spec.cycle+i)%5 == 0 {
|
||||
s := seg.(*segment)
|
||||
if s.MyLiveSize > 0 {
|
||||
s.MyLiveSize -= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: 1,
|
||||
MyLiveSize: 1,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
},
|
||||
afterPlan: func(spec *testCyclesSpec, plan *MergePlan) {
|
||||
if plan != nil && len(plan.Tasks) > 0 {
|
||||
numPlansWithTasks++
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
spec.runCycles(t)
|
||||
|
||||
if numPlansWithTasks <= 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateMergePlannerOptions(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 1 << 32,
|
||||
MaxSegmentsPerTier: 3,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 3,
|
||||
}
|
||||
err := ValidateMergePlannerOptions(o)
|
||||
if err != ErrMaxSegmentSizeTooLarge {
|
||||
t.Error("Validation expected to fail as the MaxSegmentSize exceeds limit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanMaxSegmentSizeLimit(t *testing.T) {
|
||||
o := &MergePlanOptions{
|
||||
MaxSegmentSize: 20,
|
||||
MaxSegmentsPerTier: 5,
|
||||
TierGrowth: 3.0,
|
||||
SegmentsPerMergeTask: 5,
|
||||
FloorSegmentSize: 5,
|
||||
}
|
||||
segments := makeLinearSegments(20)
|
||||
|
||||
s := rand.NewSource(time.Now().UnixNano())
|
||||
r := rand.New(s)
|
||||
|
||||
max := 20
|
||||
min := 5
|
||||
randomInRange := func() int64 {
|
||||
return int64(r.Intn(max-min) + min)
|
||||
}
|
||||
for i := 1; i < 20; i++ {
|
||||
o.MaxSegmentSize = randomInRange()
|
||||
plans, err := Plan(segments, o)
|
||||
if err != nil {
|
||||
t.Errorf("Plan failed, err: %v", err)
|
||||
}
|
||||
if len(plans.Tasks) == 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
|
||||
for _, task := range plans.Tasks {
|
||||
var totalLiveSize int64
|
||||
for _, segs := range task.Segments {
|
||||
totalLiveSize += segs.LiveSize()
|
||||
}
|
||||
if totalLiveSize >= o.MaxSegmentSize {
|
||||
t.Errorf("merged segments size: %d exceeding the MaxSegmentSize"+
|
||||
"limit: %d", totalLiveSize, o.MaxSegmentSize)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------
|
||||
|
||||
type testCyclesSpec struct {
|
||||
descrip string
|
||||
verbose bool
|
||||
|
||||
n int // Number of cycles to run.
|
||||
o *MergePlanOptions
|
||||
|
||||
beforePlan func(*testCyclesSpec)
|
||||
afterPlan func(*testCyclesSpec, *MergePlan)
|
||||
|
||||
cycle int
|
||||
segments []Segment
|
||||
nextSegmentId uint64
|
||||
}
|
||||
|
||||
func (spec *testCyclesSpec) runCycles(t *testing.T) {
|
||||
numPlansWithTasks := 0
|
||||
|
||||
for spec.cycle < spec.n {
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 0, spec.segments, nil)
|
||||
}
|
||||
|
||||
if spec.beforePlan != nil {
|
||||
spec.beforePlan(spec)
|
||||
}
|
||||
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 1, spec.segments, nil)
|
||||
}
|
||||
|
||||
plan, err := Plan(spec.segments, spec.o)
|
||||
if err != nil {
|
||||
t.Fatalf("expected no err, got: %v", err)
|
||||
}
|
||||
|
||||
if spec.afterPlan != nil {
|
||||
spec.afterPlan(spec, plan)
|
||||
}
|
||||
|
||||
if spec.verbose {
|
||||
emit(spec.descrip, spec.cycle, 2, spec.segments, plan)
|
||||
}
|
||||
|
||||
if plan != nil {
|
||||
if len(plan.Tasks) > 0 {
|
||||
numPlansWithTasks++
|
||||
}
|
||||
|
||||
for _, task := range plan.Tasks {
|
||||
spec.segments = removeSegments(spec.segments, task.Segments)
|
||||
|
||||
var totLiveSize int64
|
||||
for _, segment := range task.Segments {
|
||||
totLiveSize += segment.LiveSize()
|
||||
}
|
||||
|
||||
if totLiveSize > 0 {
|
||||
spec.segments = append(spec.segments, &segment{
|
||||
MyId: spec.nextSegmentId,
|
||||
MyFullSize: totLiveSize,
|
||||
MyLiveSize: totLiveSize,
|
||||
})
|
||||
spec.nextSegmentId++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec.cycle++
|
||||
}
|
||||
|
||||
if numPlansWithTasks <= 0 {
|
||||
t.Errorf("expected some plans with tasks")
|
||||
}
|
||||
}
|
||||
|
||||
func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePlan) {
|
||||
if os.Getenv("VERBOSE") == "" {
|
||||
return
|
||||
}
|
||||
|
||||
suffix := ""
|
||||
if plan != nil && len(plan.Tasks) > 0 {
|
||||
suffix = "hasPlan"
|
||||
}
|
||||
|
||||
fmt.Printf("%s %d.%d ---------- %s\n", descrip, cycle, step, suffix)
|
||||
fmt.Printf("%s\n", ToBarChart(descrip, 100, segments, plan))
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Test Vector Segment Merging
|
||||
|
||||
func TestPlanMaxSegmentFileSize(t *testing.T) {
|
||||
tests := []struct {
|
||||
segments []Segment
|
||||
o *MergePlanOptions
|
||||
|
||||
expectedTasks [][]uint64
|
||||
}{
|
||||
{
|
||||
[]Segment{
|
||||
&segment{ // ineligible
|
||||
MyId: 1,
|
||||
MyFullSize: 4000,
|
||||
MyLiveSize: 3900,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 3900 * 1000 * 4, // > 2MB
|
||||
},
|
||||
&segment{ // ineligible
|
||||
MyId: 2,
|
||||
MyFullSize: 6000,
|
||||
MyLiveSize: 5500, // > 5000
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 5500 * 1000 * 4, // > 2MB
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 3,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 490,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 490 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 4,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 480,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 480 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 5,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 300,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 300 * 1000 * 4,
|
||||
},
|
||||
&segment{ // eligible
|
||||
MyId: 6,
|
||||
MyFullSize: 500,
|
||||
MyLiveSize: 400,
|
||||
|
||||
MyHasVector: true,
|
||||
MyFileSize: 400 * 1000 * 4,
|
||||
},
|
||||
},
|
||||
&MergePlanOptions{
|
||||
MaxSegmentSize: 5000, // number of documents
|
||||
// considering vector dimension as 1000
|
||||
// vectorBytes = 5000 * 1000 * 4 = 20MB, which is too large
|
||||
// So, let's set the fileSize limit to 4MB
|
||||
MaxSegmentFileSize: 4000000, // 4MB
|
||||
MaxSegmentsPerTier: 1,
|
||||
SegmentsPerMergeTask: 2,
|
||||
TierGrowth: 2.0,
|
||||
FloorSegmentSize: 1,
|
||||
},
|
||||
[][]uint64{
|
||||
{3, 4},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for testi, test := range tests {
|
||||
t.Run(fmt.Sprintf("Test-%d", testi), func(t *testing.T) {
|
||||
plans, err := Plan(test.segments, test.o)
|
||||
if err != nil {
|
||||
t.Fatalf("Plan failed, err: %v", err)
|
||||
}
|
||||
|
||||
for i, task := range plans.Tasks {
|
||||
var segIDs []uint64
|
||||
for _, seg := range task.Segments {
|
||||
segIDs = append(segIDs, seg.Id())
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(segIDs, test.expectedTasks[0]) {
|
||||
t.Errorf("expected task segments: %v, got: %v", test.expectedTasks[i], segIDs)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
28
index/scorch/mergeplan/sort.go
Normal file
28
index/scorch/mergeplan/sort.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mergeplan
|
||||
|
||||
type byLiveSizeDescending []Segment
|
||||
|
||||
func (a byLiveSizeDescending) Len() int { return len(a) }
|
||||
|
||||
func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
func (a byLiveSizeDescending) Less(i, j int) bool {
|
||||
if a[i].LiveSize() != a[j].LiveSize() {
|
||||
return a[i].LiveSize() > a[j].LiveSize()
|
||||
}
|
||||
return a[i].Id() < a[j].Id()
|
||||
}
|
397
index/scorch/optimize.go
Normal file
397
index/scorch/optimize.go
Normal file
|
@ -0,0 +1,397 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var OptimizeConjunction = true
|
||||
var OptimizeConjunctionUnadorned = true
|
||||
var OptimizeDisjunctionUnadorned = true
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if OptimizeConjunction && kind == "conjunction" {
|
||||
return s.optimizeConjunction(octx)
|
||||
}
|
||||
|
||||
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
|
||||
return s.optimizeConjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
|
||||
return s.optimizeDisjunctionUnadorned(octx)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunction)
|
||||
if !ok {
|
||||
return octx, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunction struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr0.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr1.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
|
||||
|
||||
for _, tfr := range o.tfrs[2:] {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok || itr.ActualBitmap() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
bm.And(itr.ActualBitmap())
|
||||
}
|
||||
|
||||
// in this conjunction optimization, the postings iterators
|
||||
// will all share the same AND'ed together actual bitmap. The
|
||||
// regular conjunction searcher machinery will still be used,
|
||||
// but the underlying bitmap will be smaller.
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if ok && itr.ActualBitmap() != nil {
|
||||
itr.ReplaceActual(bm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" conjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRConjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>")
|
||||
var OptimizeTFRConjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned conjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps AND'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := o.snapshot.unadornedTermFieldReader(
|
||||
OptimizeTFRConjunctionUnadornedTerm, OptimizeTFRConjunctionUnadornedField)
|
||||
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
OUTER:
|
||||
for i := range o.snapshot.segment {
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
var docNum1HitLast uint64
|
||||
var docNum1HitLastOk bool
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
if _, ok := tfr.iterators[i].(*emptyPostingsIterator); ok {
|
||||
// An empty postings iterator means the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
// We only optimize postings iterators that support this operation.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// If the postings iterator is "1-hit" optimized, then we
|
||||
// can perform several optimizations up-front here.
|
||||
docNum1Hit, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
|
||||
// The docNum1Hit doesn't match the previous
|
||||
// docNum1HitLast, so the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
docNum1HitLast = docNum1Hit
|
||||
docNum1HitLastOk = true
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() == nil {
|
||||
// An empty actual bitmap means the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Collect the actual bitmap for more processing later.
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
|
||||
if docNum1HitLastOk {
|
||||
// We reach here if all the 1-hit optimized posting
|
||||
// iterators had the same 1-hit docNum, so we can check if
|
||||
// our collected actual bitmaps also have that docNum.
|
||||
for _, bm := range actualBMs {
|
||||
if !bm.Contains(uint32(docNum1HitLast)) {
|
||||
// The docNum1Hit isn't in one of our actual
|
||||
// bitmaps, so the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
|
||||
// The actual bitmaps and docNum1Hits all contain or have
|
||||
// the same 1-hit docNum, so that's our AND'ed result.
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 0 {
|
||||
// If we've collected no actual bitmaps at this point,
|
||||
// then the entire AND is empty.
|
||||
oTFR.iterators[i] = anEmptyPostingsIterator
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(actualBMs) == 1 {
|
||||
// If we've only 1 actual bitmap, then that's our result.
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(actualBMs[0])
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// Else, AND together our collected bitmaps as our result.
|
||||
bm := roaring.And(actualBMs[0], actualBMs[1])
|
||||
|
||||
for _, actualBM := range actualBMs[2:] {
|
||||
bm.And(actualBM)
|
||||
}
|
||||
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return oTFR, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// An "unadorned" disjunction optimization is appropriate when
|
||||
// additional or subsidiary information like freq-norm's and
|
||||
// term-vectors are not required, and instead only the internal-id's
|
||||
// are needed.
|
||||
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
|
||||
octx index.OptimizableContext) (index.OptimizableContext, error) {
|
||||
if octx == nil {
|
||||
octx = &OptimizeTFRDisjunctionUnadorned{
|
||||
snapshot: s.snapshot,
|
||||
}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
|
||||
}
|
||||
|
||||
o.tfrs = append(o.tfrs, s)
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
type OptimizeTFRDisjunctionUnadorned struct {
|
||||
snapshot *IndexSnapshot
|
||||
|
||||
tfrs []*IndexSnapshotTermFieldReader
|
||||
}
|
||||
|
||||
var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>")
|
||||
var OptimizeTFRDisjunctionUnadornedField = "*"
|
||||
|
||||
// Finish of an unadorned disjunction optimization will compute a
|
||||
// termFieldReader with an "actual" bitmap that represents the
|
||||
// constituent bitmaps OR'ed together. This termFieldReader cannot
|
||||
// provide any freq-norm or termVector associated information.
|
||||
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
|
||||
if len(o.tfrs) <= 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
var cMax uint64
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
c := itr.ActualBitmap().GetCardinality()
|
||||
if cMax < c {
|
||||
cMax = c
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We use an artificial term and field because the optimized
|
||||
// termFieldReader can represent multiple terms and fields.
|
||||
oTFR := o.snapshot.unadornedTermFieldReader(
|
||||
OptimizeTFRDisjunctionUnadornedTerm, OptimizeTFRDisjunctionUnadornedField)
|
||||
|
||||
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
|
||||
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
|
||||
|
||||
for i := range o.snapshot.segment {
|
||||
docNums = docNums[:0]
|
||||
actualBMs = actualBMs[:0]
|
||||
|
||||
for _, tfr := range o.tfrs {
|
||||
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docNum, ok := itr.DocNum1Hit()
|
||||
if ok {
|
||||
docNums = append(docNums, uint32(docNum))
|
||||
continue
|
||||
}
|
||||
|
||||
if itr.ActualBitmap() != nil {
|
||||
actualBMs = append(actualBMs, itr.ActualBitmap())
|
||||
}
|
||||
}
|
||||
|
||||
var bm *roaring.Bitmap
|
||||
if len(actualBMs) > 2 {
|
||||
bm = roaring.HeapOr(actualBMs...)
|
||||
} else if len(actualBMs) == 2 {
|
||||
bm = roaring.Or(actualBMs[0], actualBMs[1])
|
||||
} else if len(actualBMs) == 1 {
|
||||
bm = actualBMs[0].Clone()
|
||||
}
|
||||
|
||||
if bm == nil {
|
||||
bm = roaring.New()
|
||||
}
|
||||
|
||||
bm.AddMany(docNums)
|
||||
|
||||
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
|
||||
return oTFR, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
func (i *IndexSnapshot) unadornedTermFieldReader(
|
||||
term []byte, field string) *IndexSnapshotTermFieldReader {
|
||||
// This IndexSnapshotTermFieldReader will not be recycled, more
|
||||
// conversation here: https://github.com/blevesearch/bleve/pull/1438
|
||||
return &IndexSnapshotTermFieldReader{
|
||||
term: term,
|
||||
field: field,
|
||||
snapshot: i,
|
||||
iterators: make([]segment.PostingsIterator, len(i.segment)),
|
||||
segmentOffset: 0,
|
||||
includeFreq: false,
|
||||
includeNorm: false,
|
||||
includeTermVectors: false,
|
||||
recycle: false,
|
||||
}
|
||||
}
|
207
index/scorch/optimize_knn.go
Normal file
207
index/scorch/optimize_knn.go
Normal file
|
@ -0,0 +1,207 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type OptimizeVR struct {
|
||||
ctx context.Context
|
||||
snapshot *IndexSnapshot
|
||||
totalCost uint64
|
||||
// maps field to vector readers
|
||||
vrs map[string][]*IndexSnapshotVectorReader
|
||||
// if at least one of the vector readers requires filtered kNN.
|
||||
requiresFiltering bool
|
||||
}
|
||||
|
||||
// This setting _MUST_ only be changed during init and not after.
|
||||
var BleveMaxKNNConcurrency = 10
|
||||
|
||||
func (o *OptimizeVR) invokeSearcherEndCallback() {
|
||||
if o.ctx != nil {
|
||||
if cb := o.ctx.Value(search.SearcherEndCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(search.SearcherEndCallbackFn); ok {
|
||||
if o.totalCost > 0 {
|
||||
// notify the callback that the searcher creation etc. is finished
|
||||
// and report back the total cost for it to track and take actions
|
||||
// appropriately.
|
||||
_ = cbF(o.totalCost)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OptimizeVR) Finish() error {
|
||||
// for each field, get the vector index --> invoke the zap func.
|
||||
// for each VR, populate postings list and iterators
|
||||
// by passing the obtained vector index and getting similar vectors.
|
||||
// defer close index - just once.
|
||||
var errorsM sync.Mutex
|
||||
var errors []error
|
||||
|
||||
defer o.invokeSearcherEndCallback()
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
semaphore := make(chan struct{}, BleveMaxKNNConcurrency)
|
||||
// Launch goroutines to get vector index for each segment
|
||||
for i, seg := range o.snapshot.segment {
|
||||
if sv, ok := seg.segment.(segment_api.VectorSegment); ok {
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{} // Acquire a semaphore slot
|
||||
go func(index int, segment segment_api.VectorSegment, origSeg *SegmentSnapshot) {
|
||||
defer func() {
|
||||
<-semaphore // Release the semaphore slot
|
||||
wg.Done()
|
||||
}()
|
||||
for field, vrs := range o.vrs {
|
||||
vecIndex, err := segment.InterpretVectorIndex(field,
|
||||
o.requiresFiltering, origSeg.deleted)
|
||||
if err != nil {
|
||||
errorsM.Lock()
|
||||
errors = append(errors, err)
|
||||
errorsM.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// update the vector index size as a meta value in the segment snapshot
|
||||
vectorIndexSize := vecIndex.Size()
|
||||
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
|
||||
for _, vr := range vrs {
|
||||
var pl segment_api.VecPostingsList
|
||||
var err error
|
||||
|
||||
// for each VR, populate postings list and iterators
|
||||
// by passing the obtained vector index and getting similar vectors.
|
||||
|
||||
// check if the vector reader is configured to use a pre-filter
|
||||
// to filter out ineligible documents before performing
|
||||
// kNN search.
|
||||
if vr.eligibleSelector != nil {
|
||||
pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,
|
||||
vr.eligibleSelector.SegmentEligibleDocs(index), vr.searchParams)
|
||||
} else {
|
||||
pl, err = vecIndex.Search(vr.vector, vr.k, vr.searchParams)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
errorsM.Lock()
|
||||
errors = append(errors, err)
|
||||
errorsM.Unlock()
|
||||
go vecIndex.Close()
|
||||
return
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotKNNSearches, uint64(1))
|
||||
|
||||
// postings and iterators are already alloc'ed when
|
||||
// IndexSnapshotVectorReader is created
|
||||
vr.postings[index] = pl
|
||||
vr.iterators[index] = pl.Iterator(vr.iterators[index])
|
||||
}
|
||||
go vecIndex.Close()
|
||||
}
|
||||
}(i, sv, seg)
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
close(semaphore)
|
||||
if len(errors) > 0 {
|
||||
return errors[0]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IndexSnapshotVectorReader) VectorOptimize(ctx context.Context,
|
||||
octx index.VectorOptimizableContext,
|
||||
) (index.VectorOptimizableContext, error) {
|
||||
if s.snapshot.parent.segPlugin.Version() < VectorSearchSupportedSegmentVersion {
|
||||
return nil, fmt.Errorf("vector search not supported for this index, "+
|
||||
"index's segment version %v, supported segment version for vector search %v",
|
||||
s.snapshot.parent.segPlugin.Version(), VectorSearchSupportedSegmentVersion)
|
||||
}
|
||||
|
||||
if octx == nil {
|
||||
octx = &OptimizeVR{
|
||||
snapshot: s.snapshot,
|
||||
vrs: make(map[string][]*IndexSnapshotVectorReader),
|
||||
}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeVR)
|
||||
if !ok {
|
||||
return octx, nil
|
||||
}
|
||||
o.ctx = ctx
|
||||
if !o.requiresFiltering {
|
||||
o.requiresFiltering = s.eligibleSelector != nil
|
||||
}
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
o.invokeSearcherEndCallback()
|
||||
return nil, fmt.Errorf("tried to optimize KNN across different snapshots")
|
||||
}
|
||||
|
||||
// for every searcher creation, consult the segment snapshot to see
|
||||
// what's the vector index size and since you're anyways going
|
||||
// to use this vector index to perform the search etc. as part of the Finish()
|
||||
// perform a check as to whether we allow the searcher creation (the downstream)
|
||||
// Finish() logic to even occur or not.
|
||||
var sumVectorIndexSize uint64
|
||||
for _, seg := range o.snapshot.segment {
|
||||
vecIndexSize := seg.cachedMeta.fetchMeta(s.field)
|
||||
if vecIndexSize != nil {
|
||||
sumVectorIndexSize += vecIndexSize.(uint64)
|
||||
}
|
||||
}
|
||||
|
||||
if o.ctx != nil {
|
||||
if cb := o.ctx.Value(search.SearcherStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(search.SearcherStartCallbackFn); ok {
|
||||
err := cbF(sumVectorIndexSize)
|
||||
if err != nil {
|
||||
// it's important to invoke the end callback at this point since
|
||||
// if the earlier searchers of this optimze struct were successful
|
||||
// the cost corresponding to it would be incremented and if the
|
||||
// current searcher fails the check then we end up erroring out
|
||||
// the overall optimized searcher creation, the cost needs to be
|
||||
// handled appropriately.
|
||||
o.invokeSearcherEndCallback()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// total cost is essentially the sum of the vector indexes' size across all the
|
||||
// searchers - all of them end up reading and maintaining a vector index.
|
||||
// misacconting this value would end up calling the "end" callback with a value
|
||||
// not equal to the value passed to "start" callback.
|
||||
o.totalCost += sumVectorIndexSize
|
||||
o.vrs[s.field] = append(o.vrs[s.field], s)
|
||||
return o, nil
|
||||
}
|
1445
index/scorch/persister.go
Normal file
1445
index/scorch/persister.go
Normal file
File diff suppressed because it is too large
Load diff
697
index/scorch/reader_test.go
Normal file
697
index/scorch/reader_test.go
Normal file
|
@ -0,0 +1,697 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestIndexReader(t *testing.T) {
|
||||
cfg := CreateConfig("TestIndexReader")
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first look for a term that doesn't exist
|
||||
reader, err := indexReader.TermFieldReader(context.TODO(), []byte("nope"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
count := reader.Count()
|
||||
if count != 0 {
|
||||
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
|
||||
count = reader.Count()
|
||||
if count != expectedCount {
|
||||
t.Errorf("Expected doc count to be: %d got: %d", expectedCount, count)
|
||||
}
|
||||
|
||||
var match *index.TermFieldDoc
|
||||
var actualCount uint64
|
||||
match, err = reader.Next(nil)
|
||||
for err == nil && match != nil {
|
||||
match, err = reader.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error reading next")
|
||||
}
|
||||
actualCount++
|
||||
}
|
||||
if actualCount != count {
|
||||
t.Errorf("count was 2, but only saw %d", actualCount)
|
||||
}
|
||||
|
||||
internalIDBogus, err := indexReader.InternalID("a-bogus-docId")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if internalIDBogus != nil {
|
||||
t.Errorf("expected bogus docId to have nil InternalID")
|
||||
}
|
||||
|
||||
internalID2, err := indexReader.InternalID("2")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedMatch := &index.TermFieldDoc{
|
||||
ID: internalID2,
|
||||
Freq: 1,
|
||||
Norm: 0.5773502588272095,
|
||||
Vectors: []*index.TermFieldVector{
|
||||
{
|
||||
Field: "desc",
|
||||
Pos: 3,
|
||||
Start: 9,
|
||||
End: 13,
|
||||
},
|
||||
},
|
||||
}
|
||||
tfr, err := indexReader.TermFieldReader(context.TODO(), []byte("rice"), "desc", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
match, err = tfr.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedMatch, match) {
|
||||
t.Errorf("got %#v, expected %#v", match, expectedMatch)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now test usage of advance
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
|
||||
match, err = reader.Advance(internalID2, nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match == nil {
|
||||
t.Fatalf("Expected match, got nil")
|
||||
}
|
||||
if !match.ID.Equals(internalID2) {
|
||||
t.Errorf("Expected ID '2', got '%s'", match.ID)
|
||||
}
|
||||
// have to manually construct bogus id, because it doesn't exist
|
||||
internalID3 := make([]byte, 8)
|
||||
binary.BigEndian.PutUint64(internalID3, 3)
|
||||
match, err = reader.Advance(index.IndexInternalID(internalID3), nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now test creating a reader for a field that doesn't exist
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("water"), "doesnotexist", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
count = reader.Count()
|
||||
if count != 0 {
|
||||
t.Errorf("expected count 0 for reader of non-existent field")
|
||||
}
|
||||
match, err = reader.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexDocIdReader(t *testing.T) {
|
||||
cfg := CreateConfig("TestIndexDocIdReader")
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first get all doc ids
|
||||
reader, err := indexReader.DocIDReaderAll()
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err := reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count := uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != expectedCount {
|
||||
t.Errorf("expected %d, got %d", expectedCount, count)
|
||||
}
|
||||
|
||||
// try it again, but jump to the second doc this time
|
||||
reader2, err := indexReader.DocIDReaderAll()
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader2.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
internalID2, err := indexReader.InternalID("2")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
id, err = reader2.Advance(internalID2)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(internalID2) {
|
||||
t.Errorf("expected to find id '2', got '%s'", id)
|
||||
}
|
||||
|
||||
// again 3 doesn't exist cannot use internal id for 3 as there is none
|
||||
// the important aspect is that this id doesn't exist, so its ok
|
||||
id, err = reader2.Advance(index.IndexInternalID("3"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if id != nil {
|
||||
t.Errorf("expected to find id '', got '%s'", id)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexDocIdOnlyReader(t *testing.T) {
|
||||
cfg := CreateConfig("TestIndexDocIdOnlyReader")
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("3")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("5")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("7")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("9")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
onlyIds := []string{"1", "5", "9"}
|
||||
reader, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err := reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count := uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != 3 {
|
||||
t.Errorf("expected 3, got %d", count)
|
||||
}
|
||||
|
||||
// commented out because advance works with internal ids
|
||||
// this test presumes we see items in external doc id order
|
||||
// which is no longer the case, so simply converting external ids
|
||||
// to internal ones is not logically correct
|
||||
// not removing though because we need some way to test Advance()
|
||||
|
||||
// // try it again, but jump
|
||||
// reader2, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
// if err != nil {
|
||||
// t.Errorf("Error accessing doc id reader: %v", err)
|
||||
// }
|
||||
// defer func() {
|
||||
// err := reader2.Close()
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// }()
|
||||
//
|
||||
// id, err = reader2.Advance(index.IndexInternalID("5"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("5")) {
|
||||
// t.Errorf("expected to find id '5', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// id, err = reader2.Advance(index.IndexInternalID("a"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if id != nil {
|
||||
// t.Errorf("expected to find id '', got '%s'", id)
|
||||
// }
|
||||
|
||||
// some keys aren't actually there
|
||||
onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
|
||||
reader3, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader3.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err = reader3.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count = uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader3.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("expected 1, got %d", count)
|
||||
}
|
||||
|
||||
// commented out because advance works with internal ids
|
||||
// this test presumes we see items in external doc id order
|
||||
// which is no longer the case, so simply converting external ids
|
||||
// to internal ones is not logically correct
|
||||
// not removing though because we need some way to test Advance()
|
||||
|
||||
// // mix advance and next
|
||||
// onlyIds = []string{"0", "1", "3", "5", "6", "9"}
|
||||
// reader4, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
// if err != nil {
|
||||
// t.Errorf("Error accessing doc id reader: %v", err)
|
||||
// }
|
||||
// defer func() {
|
||||
// err := reader4.Close()
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// }()
|
||||
//
|
||||
// // first key is "1"
|
||||
// id, err = reader4.Next()
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("1")) {
|
||||
// t.Errorf("expected to find id '1', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // advancing to key we dont have gives next
|
||||
// id, err = reader4.Advance(index.IndexInternalID("2"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("3")) {
|
||||
// t.Errorf("expected to find id '3', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // next after advance works
|
||||
// id, err = reader4.Next()
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("5")) {
|
||||
// t.Errorf("expected to find id '5', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // advancing to key we do have works
|
||||
// id, err = reader4.Advance(index.IndexInternalID("9"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("9")) {
|
||||
// t.Errorf("expected to find id '9', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // advance backwards at end
|
||||
// id, err = reader4.Advance(index.IndexInternalID("4"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("5")) {
|
||||
// t.Errorf("expected to find id '5', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // next after advance works
|
||||
// id, err = reader4.Next()
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("9")) {
|
||||
// t.Errorf("expected to find id '9', got '%s'", id)
|
||||
// }
|
||||
//
|
||||
// // advance backwards to key that exists, but not in only set
|
||||
// id, err = reader4.Advance(index.IndexInternalID("7"))
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
// if !id.Equals(index.IndexInternalID("9")) {
|
||||
// t.Errorf("expected to find id '9', got '%s'", id)
|
||||
// }
|
||||
}
|
||||
|
||||
func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
|
||||
tests := []struct {
|
||||
offsets []uint64
|
||||
globalDocNum uint64
|
||||
segmentIndex int
|
||||
localDocNum uint64
|
||||
}{
|
||||
// just 1 segment
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 1,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
// now 2 segments, 30 docs in first
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 1,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 30,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30},
|
||||
globalDocNum: 35,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 5,
|
||||
},
|
||||
// lots of segments
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 0,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 0,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 25,
|
||||
segmentIndex: 0,
|
||||
localDocNum: 25,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 35,
|
||||
segmentIndex: 1,
|
||||
localDocNum: 5,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 100,
|
||||
segmentIndex: 4,
|
||||
localDocNum: 1,
|
||||
},
|
||||
{
|
||||
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
|
||||
globalDocNum: 825,
|
||||
segmentIndex: 6,
|
||||
localDocNum: 25,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
i := &IndexSnapshot{
|
||||
offsets: test.offsets,
|
||||
refs: 1,
|
||||
}
|
||||
gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
|
||||
if gotSegmentIndex != test.segmentIndex {
|
||||
t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum)
|
||||
}
|
||||
if gotLocalDocNum != test.localDocNum {
|
||||
t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
|
||||
}
|
||||
err := i.DecRef()
|
||||
if err != nil {
|
||||
t.Errorf("expected no err, got: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
63
index/scorch/regexp.go
Normal file
63
index/scorch/regexp.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
|
||||
"github.com/blevesearch/vellum/regexp"
|
||||
)
|
||||
|
||||
func parseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
|
||||
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
|
||||
|
||||
parsed, err := syntax.Parse(pattern, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
prefix := literalPrefix(parsed)
|
||||
if prefix != "" {
|
||||
prefixBeg := []byte(prefix)
|
||||
prefixEnd := calculateExclusiveEndFromPrefix(prefixBeg)
|
||||
return re, prefixBeg, prefixEnd, nil
|
||||
}
|
||||
|
||||
return re, nil, nil, nil
|
||||
}
|
||||
|
||||
// Returns the literal prefix given the parse tree for a regexp
|
||||
func literalPrefix(s *syntax.Regexp) string {
|
||||
// traverse the left-most branch in the parse tree as long as the
|
||||
// node represents a concatenation
|
||||
for s != nil && s.Op == syntax.OpConcat {
|
||||
if len(s.Sub) < 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
s = s.Sub[0]
|
||||
}
|
||||
|
||||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
|
||||
return string(s.Rune)
|
||||
}
|
||||
|
||||
return "" // no literal prefix
|
||||
}
|
57
index/scorch/regexp_test.go
Normal file
57
index/scorch/regexp_test.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLiteralPrefix(t *testing.T) {
|
||||
tests := []struct {
|
||||
input, expected string
|
||||
}{
|
||||
{"", ""},
|
||||
{"hello", "hello"},
|
||||
{"hello.?", "hello"},
|
||||
{"hello$", "hello"},
|
||||
{`[h][e][l][l][o].*world`, "hello"},
|
||||
{`[h-h][e-e][l-l][l-l][o-o].*world`, "hello"},
|
||||
{".*", ""},
|
||||
{"h.*", "h"},
|
||||
{"h.?", "h"},
|
||||
{"h[a-z]", "h"},
|
||||
{`h\s`, "h"},
|
||||
{`(hello)world`, ""},
|
||||
{`日本語`, "日本語"},
|
||||
{`日本語\w`, "日本語"},
|
||||
{`^hello`, ""},
|
||||
{`^`, ""},
|
||||
{`$`, ""},
|
||||
{`(?i)mArTy`, ""},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
s, err := syntax.Parse(test.input, syntax.Perl)
|
||||
if err != nil {
|
||||
t.Fatalf("expected no syntax.Parse error, got: %v", err)
|
||||
}
|
||||
|
||||
got := literalPrefix(s)
|
||||
if test.expected != got {
|
||||
t.Fatalf("test: %d, %+v, got: %s", i, test, got)
|
||||
}
|
||||
}
|
||||
}
|
216
index/scorch/rollback.go
Normal file
216
index/scorch/rollback.go
Normal file
|
@ -0,0 +1,216 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type RollbackPoint struct {
|
||||
epoch uint64
|
||||
meta map[string][]byte
|
||||
}
|
||||
|
||||
func (r *RollbackPoint) GetInternal(key []byte) []byte {
|
||||
return r.meta[string(key)]
|
||||
}
|
||||
|
||||
// RollbackPoints returns an array of rollback points available for
|
||||
// the application to rollback to, with more recent rollback points
|
||||
// (higher epochs) coming first.
|
||||
func RollbackPoints(path string) ([]*RollbackPoint, error) {
|
||||
if len(path) == 0 {
|
||||
return nil, fmt.Errorf("RollbackPoints: invalid path")
|
||||
}
|
||||
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: true,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// start a read-only bolt transaction
|
||||
tx, err := rootBolt.Begin(false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("RollbackPoints: failed to start" +
|
||||
" read-only transaction")
|
||||
}
|
||||
|
||||
// read-only bolt transactions to be rolled back
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
rollbackPoints := []*RollbackPoint{}
|
||||
|
||||
c1 := snapshots.Cursor()
|
||||
for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(k)
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" unable to parse segment epoch %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
snapshot := snapshots.Bucket(k)
|
||||
if snapshot == nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" snapshot key, but bucket missing %x, continuing", k)
|
||||
continue
|
||||
}
|
||||
|
||||
meta := map[string][]byte{}
|
||||
c2 := snapshot.Cursor()
|
||||
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
|
||||
if j[0] == boltInternalKey[0] {
|
||||
internalBucket := snapshot.Bucket(j)
|
||||
if internalBucket == nil {
|
||||
err = fmt.Errorf("internal bucket missing")
|
||||
break
|
||||
}
|
||||
err = internalBucket.ForEach(func(key []byte, val []byte) error {
|
||||
copiedVal := append([]byte(nil), val...)
|
||||
meta[string(key)] = copiedVal
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("RollbackPoints:"+
|
||||
" failed in fetching internal data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
rollbackPoints = append(rollbackPoints, &RollbackPoint{
|
||||
epoch: snapshotEpoch,
|
||||
meta: meta,
|
||||
})
|
||||
}
|
||||
|
||||
return rollbackPoints, nil
|
||||
}
|
||||
|
||||
// Rollback atomically and durably brings the store back to the point
|
||||
// in time as represented by the RollbackPoint.
|
||||
// Rollback() should only be passed a RollbackPoint that came from the
|
||||
// same store using the RollbackPoints() API along with the index path.
|
||||
func Rollback(path string, to *RollbackPoint) error {
|
||||
if to == nil {
|
||||
return fmt.Errorf("Rollback: RollbackPoint is nil")
|
||||
}
|
||||
if len(path) == 0 {
|
||||
return fmt.Errorf("Rollback: index path is empty")
|
||||
}
|
||||
|
||||
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBoltOpt := &bolt.Options{
|
||||
ReadOnly: false,
|
||||
}
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
|
||||
if err != nil || rootBolt == nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err1 := rootBolt.Close()
|
||||
if err1 != nil && err == nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
// pick all the younger persisted epochs in bolt store
|
||||
// including the target one.
|
||||
var found bool
|
||||
var eligibleEpochs []uint64
|
||||
err = rootBolt.View(func(tx *bolt.Tx) error {
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
sc := snapshots.Cursor()
|
||||
for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
|
||||
_, snapshotEpoch, err := decodeUvarintAscending(sk)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if snapshotEpoch == to.epoch {
|
||||
found = true
|
||||
}
|
||||
eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if len(eligibleEpochs) == 0 {
|
||||
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
|
||||
}
|
||||
if !found {
|
||||
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
if err == nil {
|
||||
err = rootBolt.Sync()
|
||||
}
|
||||
}()
|
||||
|
||||
snapshots := tx.Bucket(boltSnapshotsBucket)
|
||||
if snapshots == nil {
|
||||
return nil
|
||||
}
|
||||
for _, epoch := range eligibleEpochs {
|
||||
k := encodeUvarintAscending(nil, epoch)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if epoch == to.epoch {
|
||||
// return here as it already processed until the given epoch
|
||||
return nil
|
||||
}
|
||||
err = snapshots.DeleteBucket(k)
|
||||
if err == bolt.ErrBucketNotFound {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
623
index/scorch/rollback_test.go
Normal file
623
index/scorch/rollback_test.go
Normal file
|
@ -0,0 +1,623 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestIndexRollback(t *testing.T) {
|
||||
cfg := CreateConfig("TestIndexRollback")
|
||||
numSnapshotsToKeepOrig := NumSnapshotsToKeep
|
||||
NumSnapshotsToKeep = 1000
|
||||
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
NumSnapshotsToKeep = numSnapshotsToKeepOrig
|
||||
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, ok := idx.(*Scorch)
|
||||
if !ok {
|
||||
t.Fatalf("Not a scorch index?")
|
||||
}
|
||||
|
||||
indexPath, _ := cfg["path"].(string)
|
||||
// should have no rollback points initially
|
||||
rollbackPoints, err := RollbackPoints(indexPath)
|
||||
if err == nil {
|
||||
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
|
||||
}
|
||||
if len(rollbackPoints) != 0 {
|
||||
t.Fatalf("expected no rollbackPoints, got %d", len(rollbackPoints))
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// create a batch, insert 2 new documents
|
||||
batch := index.NewBatch()
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
|
||||
batch.Update(doc)
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
|
||||
batch.Update(doc)
|
||||
|
||||
err = idx.Batch(batch)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
readerSlow, err := idx.Reader() // keep snapshot around so it's not cleaned up
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
_ = readerSlow.Close()
|
||||
}()
|
||||
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// fetch rollback points after first batch
|
||||
rollbackPoints, err = RollbackPoints(indexPath)
|
||||
if err != nil {
|
||||
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
|
||||
}
|
||||
if len(rollbackPoints) == 0 {
|
||||
t.Fatalf("expected some rollbackPoints, got none")
|
||||
}
|
||||
|
||||
// set this as a rollback point for the future
|
||||
rollbackPoint := rollbackPoints[0]
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// create another batch, insert 2 new documents, and delete an existing one
|
||||
batch = index.NewBatch()
|
||||
doc = document.NewDocument("3")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
|
||||
batch.Update(doc)
|
||||
doc = document.NewDocument("4")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test4")))
|
||||
batch.Update(doc)
|
||||
batch.Delete("1")
|
||||
|
||||
err = idx.Batch(batch)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rollbackPointsB, err := RollbackPoints(indexPath)
|
||||
if err != nil || len(rollbackPointsB) <= len(rollbackPoints) {
|
||||
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPointsB))
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, p := range rollbackPointsB {
|
||||
if rollbackPoint.epoch == p.epoch {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatalf("expected rollbackPoint epoch to still be available")
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
reader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
docCount, err := reader.DocCount()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// expect docs 2, 3, 4
|
||||
if docCount != 3 {
|
||||
t.Fatalf("unexpected doc count: %v", docCount)
|
||||
}
|
||||
ret, err := reader.Document("1")
|
||||
if err != nil || ret != nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("2")
|
||||
if err != nil || ret == nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("3")
|
||||
if err != nil || ret == nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("4")
|
||||
if err != nil || ret == nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// rollback to a non existing rollback point
|
||||
err = Rollback(indexPath, &RollbackPoint{epoch: 100})
|
||||
if err == nil {
|
||||
t.Fatalf("expected err: Rollback: target epoch 100 not found in bolt")
|
||||
}
|
||||
|
||||
// rollback to the selected rollback point
|
||||
err = Rollback(indexPath, rollbackPoint)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
reader, err = idx.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
docCount, err = reader.DocCount()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// expect only docs 1, 2
|
||||
if docCount != 2 {
|
||||
t.Fatalf("unexpected doc count: %v", docCount)
|
||||
}
|
||||
ret, err = reader.Document("1")
|
||||
if err != nil || ret == nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("2")
|
||||
if err != nil || ret == nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("3")
|
||||
if err != nil || ret != nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
ret, err = reader.Document("4")
|
||||
if err != nil || ret != nil {
|
||||
t.Fatal(ret, err)
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProtectedSnapshots(t *testing.T) {
|
||||
origRollbackSamplingInterval := RollbackSamplingInterval
|
||||
defer func() {
|
||||
RollbackSamplingInterval = origRollbackSamplingInterval
|
||||
}()
|
||||
RollbackSamplingInterval = 10 * time.Minute
|
||||
currentTimeStamp := time.Now()
|
||||
tests := []struct {
|
||||
title string
|
||||
metaData []*snapshotMetaData
|
||||
numSnapshotsToKeep int
|
||||
expCount int
|
||||
expEpochs []uint64
|
||||
}{
|
||||
{
|
||||
title: "epochs that have exact timestamps as per expectation for protecting",
|
||||
metaData: []*snapshotMetaData{
|
||||
{epoch: 100, timeStamp: currentTimeStamp},
|
||||
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
|
||||
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
|
||||
{epoch: 50, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval))},
|
||||
{epoch: 35, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
|
||||
{epoch: 10, timeStamp: currentTimeStamp.Add(-(2 * RollbackSamplingInterval))},
|
||||
},
|
||||
numSnapshotsToKeep: 3,
|
||||
expCount: 3,
|
||||
expEpochs: []uint64{100, 50, 10},
|
||||
},
|
||||
{
|
||||
title: "epochs that have exact timestamps as per expectation for protecting",
|
||||
metaData: []*snapshotMetaData{
|
||||
{epoch: 100, timeStamp: currentTimeStamp},
|
||||
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
|
||||
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
|
||||
{epoch: 50, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval))},
|
||||
},
|
||||
numSnapshotsToKeep: 2,
|
||||
expCount: 2,
|
||||
expEpochs: []uint64{100, 50},
|
||||
},
|
||||
{
|
||||
title: "epochs that have timestamps approximated to the expected value, " +
|
||||
"always retain the latest one",
|
||||
metaData: []*snapshotMetaData{
|
||||
{epoch: 100, timeStamp: currentTimeStamp},
|
||||
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
|
||||
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
|
||||
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
|
||||
{epoch: 35, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
|
||||
{epoch: 10, timeStamp: currentTimeStamp.Add(-(2 * RollbackSamplingInterval))},
|
||||
},
|
||||
numSnapshotsToKeep: 3,
|
||||
expCount: 3,
|
||||
expEpochs: []uint64{100, 35, 10},
|
||||
},
|
||||
{
|
||||
title: "protecting epochs when we don't have enough snapshots with RollbackSamplingInterval" +
|
||||
" separated timestamps",
|
||||
metaData: []*snapshotMetaData{
|
||||
{epoch: 100, timeStamp: currentTimeStamp},
|
||||
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
|
||||
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
|
||||
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
|
||||
{epoch: 35, timeStamp: currentTimeStamp.Add(-(5 * RollbackSamplingInterval / 6))},
|
||||
{epoch: 10, timeStamp: currentTimeStamp.Add(-(7 * RollbackSamplingInterval / 8))},
|
||||
},
|
||||
numSnapshotsToKeep: 4,
|
||||
expCount: 4,
|
||||
expEpochs: []uint64{100, 99, 88, 10},
|
||||
},
|
||||
{
|
||||
title: "epochs of which some are approximated to the expected timestamps, and" +
|
||||
" we don't have enough snapshots with RollbackSamplingInterval separated timestamps",
|
||||
metaData: []*snapshotMetaData{
|
||||
{epoch: 100, timeStamp: currentTimeStamp},
|
||||
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
|
||||
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
|
||||
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
|
||||
{epoch: 35, timeStamp: currentTimeStamp.Add(-(8 * RollbackSamplingInterval / 7))},
|
||||
{epoch: 10, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
|
||||
},
|
||||
numSnapshotsToKeep: 3,
|
||||
expCount: 3,
|
||||
expEpochs: []uint64{100, 50, 10},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
protectedEpochs := getProtectedSnapshots(RollbackSamplingInterval,
|
||||
test.numSnapshotsToKeep, test.metaData)
|
||||
if len(protectedEpochs) != test.expCount {
|
||||
t.Errorf("%d test: %s, getProtectedSnapshots expected to return %d "+
|
||||
"snapshots, but got: %d", i, test.title, test.expCount, len(protectedEpochs))
|
||||
}
|
||||
for _, e := range test.expEpochs {
|
||||
if _, found := protectedEpochs[e]; !found {
|
||||
t.Errorf("%d test: %s, %d epoch expected to be protected, "+
|
||||
"but missing from protected list: %v", i, test.title, e, protectedEpochs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func indexDummyData(t *testing.T, scorchi *Scorch, i int) {
|
||||
// create a batch, insert 2 new documents
|
||||
batch := index.NewBatch()
|
||||
doc := document.NewDocument(fmt.Sprintf("%d", i))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
|
||||
batch.Update(doc)
|
||||
doc = document.NewDocument(fmt.Sprintf("%d", i+1))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
|
||||
batch.Update(doc)
|
||||
|
||||
err := scorchi.Batch(batch)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
type testFSDirector string
|
||||
|
||||
func (f testFSDirector) GetWriter(filePath string) (io.WriteCloser,
|
||||
error) {
|
||||
dir, file := filepath.Split(filePath)
|
||||
if dir != "" {
|
||||
err := os.MkdirAll(filepath.Join(string(f), dir), os.ModePerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return os.OpenFile(filepath.Join(string(f), dir, file),
|
||||
os.O_RDWR|os.O_CREATE, 0600)
|
||||
}
|
||||
|
||||
func TestLatestSnapshotProtected(t *testing.T) {
|
||||
cfg := CreateConfig("TestLatestSnapshotProtected")
|
||||
numSnapshotsToKeepOrig := NumSnapshotsToKeep
|
||||
NumSnapshotsToKeep = 3
|
||||
rollbackSamplingIntervalOrig := RollbackSamplingInterval
|
||||
RollbackSamplingInterval = 10 * time.Second
|
||||
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
NumSnapshotsToKeep = numSnapshotsToKeepOrig
|
||||
RollbackSamplingInterval = rollbackSamplingIntervalOrig
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// disable merger and purger
|
||||
RegistryEventCallbacks["test"] = func(e Event) bool {
|
||||
if e.Kind == EventKindPreMergeCheck || e.Kind == EventKindPurgerCheck {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
cfg["eventCallbackName"] = "test"
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
scorchi, ok := idx.(*Scorch)
|
||||
if !ok {
|
||||
t.Fatalf("Not a scorch index?")
|
||||
}
|
||||
|
||||
err = scorchi.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// replicate the following scenario of persistence of snapshots
|
||||
// tc, tc - d/12, tc - d/6, tc - 3d/4, tc - 5d/6, tc - 6d/5
|
||||
// approximate timestamps where there's a chance that the latest snapshot
|
||||
// might not fit into the time-series
|
||||
indexDummyData(t, scorchi, 1)
|
||||
persistedSnapshots, err := scorchi.rootBoltSnapshotMetaData()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(persistedSnapshots) != 1 {
|
||||
t.Fatalf("expected 1 persisted snapshot, got %d", len(persistedSnapshots))
|
||||
}
|
||||
time.Sleep(4 * RollbackSamplingInterval / 5)
|
||||
indexDummyData(t, scorchi, 3)
|
||||
time.Sleep(9 * RollbackSamplingInterval / 20)
|
||||
indexDummyData(t, scorchi, 5)
|
||||
time.Sleep(7 * RollbackSamplingInterval / 12)
|
||||
indexDummyData(t, scorchi, 7)
|
||||
time.Sleep(1 * RollbackSamplingInterval / 12)
|
||||
indexDummyData(t, scorchi, 9)
|
||||
|
||||
persistedSnapshots, err = scorchi.rootBoltSnapshotMetaData()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
protectedSnapshots := getProtectedSnapshots(RollbackSamplingInterval, NumSnapshotsToKeep, persistedSnapshots)
|
||||
if len(protectedSnapshots) != 3 {
|
||||
t.Fatalf("expected %d protected snapshots, got %d", NumSnapshotsToKeep, len(protectedSnapshots))
|
||||
}
|
||||
if _, ok := protectedSnapshots[persistedSnapshots[0].epoch]; !ok {
|
||||
t.Fatalf("expected %d to be protected, but not found", persistedSnapshots[0].epoch)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupRacingWithPurge(t *testing.T) {
|
||||
cfg := CreateConfig("TestBackupRacingWithPurge")
|
||||
numSnapshotsToKeepOrig := NumSnapshotsToKeep
|
||||
NumSnapshotsToKeep = 3
|
||||
rollbackSamplingIntervalOrig := RollbackSamplingInterval
|
||||
RollbackSamplingInterval = 10 * time.Second
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
NumSnapshotsToKeep = numSnapshotsToKeepOrig
|
||||
RollbackSamplingInterval = rollbackSamplingIntervalOrig
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// disable merger and purger
|
||||
RegistryEventCallbacks["test"] = func(e Event) bool {
|
||||
if e.Kind == EventKindPreMergeCheck || e.Kind == EventKindPurgerCheck {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
cfg["eventCallbackName"] = "test"
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer idx.Close()
|
||||
|
||||
scorchi, ok := idx.(*Scorch)
|
||||
if !ok {
|
||||
t.Fatalf("Not a scorch index?")
|
||||
}
|
||||
|
||||
err = scorchi.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// replicate the following scenario of persistence of snapshots
|
||||
// tc, tc - d/12, tc - d/6, tc - 3d/4, tc - 5d/6, tc - 6d/5
|
||||
// approximate timestamps where there's a chance that the latest snapshot
|
||||
// might not fit into the time-series
|
||||
indexDummyData(t, scorchi, 1)
|
||||
time.Sleep(4 * RollbackSamplingInterval / 5)
|
||||
indexDummyData(t, scorchi, 3)
|
||||
time.Sleep(9 * RollbackSamplingInterval / 20)
|
||||
indexDummyData(t, scorchi, 5)
|
||||
time.Sleep(7 * RollbackSamplingInterval / 12)
|
||||
indexDummyData(t, scorchi, 7)
|
||||
time.Sleep(1 * RollbackSamplingInterval / 12)
|
||||
indexDummyData(t, scorchi, 9)
|
||||
|
||||
// now if the purge code is invoked, there's a possiblity of the latest snapshot
|
||||
// being removed from bolt and the corresponding file segment getting cleaned up.
|
||||
scorchi.removeOldData()
|
||||
|
||||
copyReader := scorchi.CopyReader()
|
||||
defer func() { copyReader.CloseCopyReader() }()
|
||||
|
||||
backupidxConfig := CreateConfig("backup-directory")
|
||||
err = InitTest(backupidxConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := DestroyTest(backupidxConfig)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// if the latest snapshot was purged, the following will return error
|
||||
err = copyReader.CopyTo(testFSDirector(backupidxConfig["path"].(string)))
|
||||
if err != nil {
|
||||
t.Fatalf("error copying the index: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSparseMutationCheckpointing(t *testing.T) {
|
||||
cfg := CreateConfig("TestSparseMutationCheckpointing")
|
||||
numSnapshotsToKeepOrig := NumSnapshotsToKeep
|
||||
NumSnapshotsToKeep = 3
|
||||
rollbackSamplingIntervalOrig := RollbackSamplingInterval
|
||||
RollbackSamplingInterval = 2 * time.Second
|
||||
|
||||
err := InitTest(cfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
NumSnapshotsToKeep = numSnapshotsToKeepOrig
|
||||
RollbackSamplingInterval = rollbackSamplingIntervalOrig
|
||||
err := DestroyTest(cfg)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// disable merger and purger
|
||||
RegistryEventCallbacks["test"] = func(e Event) bool {
|
||||
if e.Kind == EventKindPreMergeCheck {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
cfg["eventCallbackName"] = "test"
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewScorch(Name, cfg, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
scorchi, ok := idx.(*Scorch)
|
||||
if !ok {
|
||||
t.Fatalf("Not a scorch index?")
|
||||
}
|
||||
|
||||
err = scorchi.Open()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// create 4 snapshots every 2 seconds
|
||||
indexDummyData(t, scorchi, 1)
|
||||
time.Sleep(RollbackSamplingInterval)
|
||||
indexDummyData(t, scorchi, 3)
|
||||
time.Sleep(RollbackSamplingInterval)
|
||||
indexDummyData(t, scorchi, 5)
|
||||
time.Sleep(RollbackSamplingInterval)
|
||||
indexDummyData(t, scorchi, 7)
|
||||
|
||||
// now the another snapshot is persisted outside of the window of checkpointing
|
||||
// and we should be able to retain some older checkpoints as well along with
|
||||
// the latest one
|
||||
time.Sleep(time.Duration(NumSnapshotsToKeep) * RollbackSamplingInterval)
|
||||
indexDummyData(t, scorchi, 9)
|
||||
|
||||
persistedSnapshots, err := scorchi.rootBoltSnapshotMetaData()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// should have more than 1 snapshots
|
||||
protectedSnapshots := getProtectedSnapshots(RollbackSamplingInterval, NumSnapshotsToKeep, persistedSnapshots)
|
||||
if len(protectedSnapshots) <= 1 {
|
||||
t.Fatalf("expected %d protected snapshots, got %d", NumSnapshotsToKeep, len(protectedSnapshots))
|
||||
}
|
||||
}
|
942
index/scorch/scorch.go
Normal file
942
index/scorch/scorch.go
Normal file
|
@ -0,0 +1,942 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const Name = "scorch"
|
||||
|
||||
const Version uint8 = 2
|
||||
|
||||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
nextSegmentID uint64
|
||||
stats Stats
|
||||
iStats internalStats
|
||||
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
||||
rootLock sync.RWMutex
|
||||
|
||||
root *IndexSnapshot // holds 1 ref-count on the root
|
||||
rootPersisted []chan error // closed when root is persisted
|
||||
persistedCallbacks []index.BatchCallback
|
||||
nextSnapshotEpoch uint64
|
||||
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
|
||||
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
|
||||
|
||||
// keeps track of segments scheduled for online copy/backup operation. Each segment's filename maps to
|
||||
// the count of copy schedules. Segments with non-zero counts are protected from removal by the cleanup
|
||||
// operation. Counts decrement upon successful copy, allowing removal of segments with zero or absent counts.
|
||||
// must be accessed within the rootLock as it is accessed by the asynchronous cleanup routine.
|
||||
copyScheduled map[string]int
|
||||
|
||||
numSnapshotsToKeep int
|
||||
rollbackRetentionFactor float64
|
||||
checkPoints []*snapshotMetaData
|
||||
rollbackSamplingInterval time.Duration
|
||||
closeCh chan struct{}
|
||||
introductions chan *segmentIntroduction
|
||||
persists chan *persistIntroduction
|
||||
merges chan *segmentMerge
|
||||
introducerNotifier chan *epochWatcher
|
||||
persisterNotifier chan *epochWatcher
|
||||
rootBolt *bolt.DB
|
||||
asyncTasks sync.WaitGroup
|
||||
|
||||
onEvent func(event Event) bool
|
||||
onAsyncError func(err error, path string)
|
||||
|
||||
forceMergeRequestCh chan *mergerCtrl
|
||||
|
||||
segPlugin SegmentPlugin
|
||||
|
||||
spatialPlugin index.SpatialAnalyzerPlugin
|
||||
}
|
||||
|
||||
// AsyncPanicError is passed to scorch asyncErrorHandler when panic occurs in scorch background process
|
||||
type AsyncPanicError struct {
|
||||
Source string
|
||||
Path string
|
||||
}
|
||||
|
||||
func (e *AsyncPanicError) Error() string {
|
||||
return fmt.Sprintf("%s panic when processing %s", e.Source, e.Path)
|
||||
}
|
||||
|
||||
type internalStats struct {
|
||||
persistEpoch uint64
|
||||
persistSnapshotSize uint64
|
||||
mergeEpoch uint64
|
||||
mergeSnapshotSize uint64
|
||||
newSegBufBytesAdded uint64
|
||||
newSegBufBytesRemoved uint64
|
||||
analysisBytesAdded uint64
|
||||
analysisBytesRemoved uint64
|
||||
}
|
||||
|
||||
func NewScorch(storeName string,
|
||||
config map[string]interface{},
|
||||
analysisQueue *index.AnalysisQueue,
|
||||
) (index.Index, error) {
|
||||
rv := &Scorch{
|
||||
version: Version,
|
||||
config: config,
|
||||
analysisQueue: analysisQueue,
|
||||
nextSnapshotEpoch: 1,
|
||||
closeCh: make(chan struct{}),
|
||||
ineligibleForRemoval: map[string]bool{},
|
||||
forceMergeRequestCh: make(chan *mergerCtrl, 1),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
copyScheduled: map[string]int{},
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
|
||||
err := rv.loadSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
typ, ok := config["spatialPlugin"].(string)
|
||||
if ok {
|
||||
if err := rv.loadSpatialAnalyzerPlugin(typ); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
rv.readOnly = ro
|
||||
}
|
||||
ub, ok := config["unsafe_batch"].(bool)
|
||||
if ok {
|
||||
rv.unsafeBatch = ub
|
||||
}
|
||||
ecbName, ok := config["eventCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onEvent = RegistryEventCallbacks[ecbName]
|
||||
}
|
||||
aecbName, ok := config["asyncErrorCallbackName"].(string)
|
||||
if ok {
|
||||
rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName]
|
||||
}
|
||||
// validate any custom persistor options to
|
||||
// prevent an async error in the persistor routine
|
||||
_, err = rv.parsePersisterOptions()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// validate any custom merge planner options to
|
||||
// prevent an async error in the merger routine
|
||||
_, err = rv.parseMergePlannerOptions()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// configForceSegmentTypeVersion checks if the caller has requested a
|
||||
// specific segment type/version
|
||||
func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
|
||||
forcedSegmentVersion, err := parseToInteger(config["forceSegmentVersion"])
|
||||
if err != nil {
|
||||
return "", 0, nil
|
||||
}
|
||||
|
||||
forcedSegmentType, ok := config["forceSegmentType"].(string)
|
||||
if !ok {
|
||||
return "", 0, fmt.Errorf(
|
||||
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
|
||||
}
|
||||
|
||||
return forcedSegmentType, uint32(forcedSegmentVersion), nil
|
||||
}
|
||||
|
||||
func (s *Scorch) NumEventsBlocking() uint64 {
|
||||
eventsCompleted := atomic.LoadUint64(&s.stats.TotEventTriggerCompleted)
|
||||
eventsStarted := atomic.LoadUint64(&s.stats.TotEventTriggerStarted)
|
||||
return eventsStarted - eventsCompleted
|
||||
}
|
||||
|
||||
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) bool {
|
||||
res := true
|
||||
if s.onEvent != nil {
|
||||
atomic.AddUint64(&s.stats.TotEventTriggerStarted, 1)
|
||||
res = s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
|
||||
atomic.AddUint64(&s.stats.TotEventTriggerCompleted, 1)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (s *Scorch) fireAsyncError(err error) {
|
||||
if s.onAsyncError != nil {
|
||||
s.onAsyncError(err, s.path)
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
}
|
||||
|
||||
func (s *Scorch) Open() error {
|
||||
err := s.openBolt()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.asyncTasks.Add(1)
|
||||
go s.introducerLoop()
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
s.asyncTasks.Add(1)
|
||||
go s.persisterLoop()
|
||||
s.asyncTasks.Add(1)
|
||||
go s.mergerLoop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) openBolt() error {
|
||||
var ok bool
|
||||
s.path, ok = s.config["path"].(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("must specify path")
|
||||
}
|
||||
if s.path == "" {
|
||||
s.unsafeBatch = true
|
||||
}
|
||||
|
||||
rootBoltOpt := *bolt.DefaultOptions
|
||||
if s.readOnly {
|
||||
rootBoltOpt.ReadOnly = true
|
||||
rootBoltOpt.OpenFile = func(path string, flag int, mode os.FileMode) (*os.File, error) {
|
||||
// Bolt appends an O_CREATE flag regardless.
|
||||
// See - https://github.com/etcd-io/bbolt/blob/v1.3.5/db.go#L210
|
||||
// Use os.O_RDONLY only if path exists (#1623)
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
return os.OpenFile(path, flag, mode)
|
||||
}
|
||||
return os.OpenFile(path, os.O_RDONLY, mode)
|
||||
}
|
||||
} else {
|
||||
if s.path != "" {
|
||||
err := os.MkdirAll(s.path, 0o700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if boltTimeoutStr, ok := s.config["bolt_timeout"].(string); ok {
|
||||
var err error
|
||||
boltTimeout, err := time.ParseDuration(boltTimeoutStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid duration specified for bolt_timeout: %v", err)
|
||||
}
|
||||
rootBoltOpt.Timeout = boltTimeout
|
||||
}
|
||||
|
||||
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
|
||||
var err error
|
||||
if s.path != "" {
|
||||
s.rootBolt, err = bolt.Open(rootBoltPath, 0o600, &rootBoltOpt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// now see if there is any existing state to load
|
||||
err = s.loadFromBolt()
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
|
||||
|
||||
s.introductions = make(chan *segmentIntroduction)
|
||||
s.persists = make(chan *persistIntroduction)
|
||||
s.merges = make(chan *segmentMerge)
|
||||
s.introducerNotifier = make(chan *epochWatcher, 1)
|
||||
s.persisterNotifier = make(chan *epochWatcher, 1)
|
||||
s.closeCh = make(chan struct{})
|
||||
s.forceMergeRequestCh = make(chan *mergerCtrl, 1)
|
||||
|
||||
if !s.readOnly && s.path != "" {
|
||||
err := s.removeOldZapFiles() // Before persister or merger create any new files.
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
s.numSnapshotsToKeep = NumSnapshotsToKeep
|
||||
if v, ok := s.config["numSnapshotsToKeep"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
s.numSnapshotsToKeep = t
|
||||
}
|
||||
}
|
||||
|
||||
s.rollbackSamplingInterval = RollbackSamplingInterval
|
||||
if v, ok := s.config["rollbackSamplingInterval"]; ok {
|
||||
var t time.Duration
|
||||
if t, err = parseToTimeDuration(v); err != nil {
|
||||
return fmt.Errorf("rollbackSamplingInterval parse err: %v", err)
|
||||
}
|
||||
s.rollbackSamplingInterval = t
|
||||
}
|
||||
|
||||
s.rollbackRetentionFactor = RollbackRetentionFactor
|
||||
if v, ok := s.config["rollbackRetentionFactor"]; ok {
|
||||
var r float64
|
||||
if r, ok = v.(float64); ok {
|
||||
return fmt.Errorf("rollbackRetentionFactor parse err: %v", err)
|
||||
}
|
||||
s.rollbackRetentionFactor = r
|
||||
}
|
||||
|
||||
typ, ok := s.config["spatialPlugin"].(string)
|
||||
if ok {
|
||||
if err := s.loadSpatialAnalyzerPlugin(typ); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) Close() (err error) {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
s.fireEvent(EventKindClose, time.Since(startTime))
|
||||
}()
|
||||
|
||||
s.fireEvent(EventKindCloseStart, 0)
|
||||
|
||||
// signal to async tasks we want to close
|
||||
close(s.closeCh)
|
||||
// wait for them to close
|
||||
s.asyncTasks.Wait()
|
||||
// now close the root bolt
|
||||
if s.rootBolt != nil {
|
||||
err = s.rootBolt.Close()
|
||||
s.rootLock.Lock()
|
||||
if s.root != nil {
|
||||
err2 := s.root.DecRef()
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
}
|
||||
s.root = nil
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Scorch) Update(doc index.Document) error {
|
||||
b := index.NewBatch()
|
||||
b.Update(doc)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) Delete(id string) error {
|
||||
b := index.NewBatch()
|
||||
b.Delete(id)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Batch applices a batch of changes to the index atomically
|
||||
func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
start := time.Now()
|
||||
|
||||
// notify handlers that we're about to index a batch of data
|
||||
s.fireEvent(EventKindBatchIntroductionStart, 0)
|
||||
defer func() {
|
||||
s.fireEvent(EventKindBatchIntroduction, time.Since(start))
|
||||
}()
|
||||
|
||||
resultChan := make(chan index.Document, len(batch.IndexOps))
|
||||
|
||||
var numUpdates uint64
|
||||
var numDeletes uint64
|
||||
var numPlainTextBytes uint64
|
||||
var ids []string
|
||||
for docID, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddIDField()
|
||||
numUpdates++
|
||||
numPlainTextBytes += doc.NumPlainTextBytes()
|
||||
} else {
|
||||
numDeletes++
|
||||
}
|
||||
ids = append(ids, docID)
|
||||
}
|
||||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for k := range batch.IndexOps {
|
||||
doc := batch.IndexOps[k]
|
||||
if doc != nil {
|
||||
// put the work on the queue
|
||||
s.analysisQueue.Queue(func() {
|
||||
analyze(doc, s.setSpatialAnalyzerPlugin)
|
||||
resultChan <- doc
|
||||
})
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// wait for analysis result
|
||||
analysisResults := make([]index.Document, int(numUpdates))
|
||||
var itemsDeQueued uint64
|
||||
var totalAnalysisSize int
|
||||
for itemsDeQueued < numUpdates {
|
||||
result := <-resultChan
|
||||
resultSize := result.Size()
|
||||
// check if the document is searchable by the index
|
||||
if result.Indexed() {
|
||||
atomic.AddUint64(&s.stats.TotMutationsFiltered, 1)
|
||||
}
|
||||
atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
|
||||
totalAnalysisSize += resultSize
|
||||
analysisResults[itemsDeQueued] = result
|
||||
itemsDeQueued++
|
||||
}
|
||||
close(resultChan)
|
||||
defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
|
||||
|
||||
atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
|
||||
|
||||
indexStart := time.Now()
|
||||
|
||||
var newSegment segment.Segment
|
||||
var bufBytes uint64
|
||||
stats := newFieldStats()
|
||||
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if segB, ok := newSegment.(segment.DiskStatsReporter); ok {
|
||||
atomic.AddUint64(&s.stats.TotBytesWrittenAtIndexTime,
|
||||
segB.BytesWritten())
|
||||
}
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
|
||||
if fsr, ok := newSegment.(segment.FieldStatsReporter); ok {
|
||||
fsr.UpdateFieldStats(stats)
|
||||
}
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
|
||||
}
|
||||
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback(), stats)
|
||||
if err != nil {
|
||||
if newSegment != nil {
|
||||
_ = newSegment.Close()
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotOnErrors, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
|
||||
atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
|
||||
atomic.AddUint64(&s.stats.TotBatches, 1)
|
||||
atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
|
||||
atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback, stats *fieldStats,
|
||||
) error {
|
||||
// new introduction
|
||||
introduction := &segmentIntroduction{
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
internal: internalOps,
|
||||
stats: stats,
|
||||
applied: make(chan error),
|
||||
persistedCallback: persistedCallback,
|
||||
}
|
||||
|
||||
if !s.unsafeBatch {
|
||||
introduction.persisted = make(chan error, 1)
|
||||
}
|
||||
|
||||
// optimistically prepare obsoletes outside of rootLock
|
||||
s.rootLock.RLock()
|
||||
root := s.root
|
||||
root.AddRef()
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
introduction.obsoletes = make(map[uint64]*roaring.Bitmap, len(root.segment))
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
introduction.obsoletes[seg.id] = delta
|
||||
}
|
||||
|
||||
introStartTime := time.Now()
|
||||
|
||||
s.introductions <- introduction
|
||||
|
||||
// block until this segment is applied
|
||||
err := <-introduction.applied
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if introduction.persisted != nil {
|
||||
err = <-introduction.persisted
|
||||
}
|
||||
|
||||
introTime := uint64(time.Since(introStartTime))
|
||||
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
|
||||
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
|
||||
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Scorch) SetInternal(key, val []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.SetInternal(key, val)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
func (s *Scorch) DeleteInternal(key []byte) error {
|
||||
b := index.NewBatch()
|
||||
b.DeleteInternal(key)
|
||||
return s.Batch(b)
|
||||
}
|
||||
|
||||
// Reader returns a low-level accessor on the index data. Close it to
|
||||
// release associated resources.
|
||||
func (s *Scorch) Reader() (index.IndexReader, error) {
|
||||
return s.currentSnapshot(), nil
|
||||
}
|
||||
|
||||
func (s *Scorch) currentSnapshot() *IndexSnapshot {
|
||||
s.rootLock.RLock()
|
||||
rv := s.root
|
||||
if rv != nil {
|
||||
rv.AddRef()
|
||||
}
|
||||
s.rootLock.RUnlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *Scorch) Stats() json.Marshaler {
|
||||
return &s.stats
|
||||
}
|
||||
|
||||
func (s *Scorch) BytesReadQueryTime() uint64 {
|
||||
return s.stats.TotBytesReadAtQueryTime
|
||||
}
|
||||
|
||||
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
|
||||
uint64, uint64,
|
||||
) {
|
||||
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
|
||||
if s.path != "" {
|
||||
files, err := os.ReadDir(s.path)
|
||||
if err == nil {
|
||||
for _, f := range files {
|
||||
if !f.IsDir() {
|
||||
if finfo, err := f.Info(); err == nil {
|
||||
numBytesUsedDisk += uint64(finfo.Size())
|
||||
numFilesOnDisk++
|
||||
if rootSegmentPaths != nil {
|
||||
fname := s.path + string(os.PathSeparator) + finfo.Name()
|
||||
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
|
||||
numBytesOnDiskByRoot += uint64(finfo.Size())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// if no root files path given, then consider all disk files.
|
||||
if rootSegmentPaths == nil {
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
|
||||
}
|
||||
|
||||
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
|
||||
}
|
||||
|
||||
func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
if indexSnapshot == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
rootSegPaths := indexSnapshot.diskSegmentsPaths()
|
||||
|
||||
s.rootLock.RLock()
|
||||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
|
||||
s.rootLock.RUnlock()
|
||||
|
||||
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
|
||||
|
||||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
m["updates"] = m["TotUpdates"]
|
||||
m["deletes"] = m["TotDeletes"]
|
||||
m["batches"] = m["TotBatches"]
|
||||
m["errors"] = m["TotOnErrors"]
|
||||
m["analysis_time"] = m["TotAnalysisTime"]
|
||||
m["index_time"] = m["TotIndexTime"]
|
||||
m["term_searchers_started"] = m["TotTermSearchersStarted"]
|
||||
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
|
||||
m["knn_searches"] = m["TotKNNSearches"]
|
||||
m["synonym_searches"] = m["TotSynonymSearches"]
|
||||
m["total_mutations_filtered"] = m["TotMutationsFiltered"]
|
||||
|
||||
m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"]
|
||||
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
|
||||
m["num_bytes_written_at_index_time"] = m["TotBytesWrittenAtIndexTime"]
|
||||
m["num_items_introduced"] = m["TotIntroducedItems"]
|
||||
m["num_items_persisted"] = m["TotPersistedItems"]
|
||||
m["num_recs_to_persist"] = m["TotItemsToPersist"]
|
||||
// total disk bytes found in index directory inclusive of older snapshots
|
||||
m["num_bytes_used_disk"] = numBytesUsedDisk
|
||||
// total disk bytes by the latest root index, exclusive of older snapshots
|
||||
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
|
||||
// num_bytes_used_disk_by_root_reclaimable is an approximation about the
|
||||
// reclaimable disk space in an index. (eg: from a full compaction)
|
||||
m["num_bytes_used_disk_by_root_reclaimable"] = uint64(float64(numBytesOnDiskByRoot) *
|
||||
indexSnapshot.reClaimableDocsRatio())
|
||||
m["num_files_on_disk"] = numFilesOnDisk
|
||||
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
|
||||
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
|
||||
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
|
||||
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
|
||||
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
|
||||
|
||||
// the bool stat `index_bgthreads_active` indicates whether the background routines
|
||||
// (which are responsible for the index to attain a steady state) are still
|
||||
// doing some work.
|
||||
if rootEpoch, ok := m["CurRootEpoch"].(uint64); ok {
|
||||
if lastMergedEpoch, ok := m["LastMergedEpoch"].(uint64); ok {
|
||||
if lastPersistedEpoch, ok := m["LastPersistedEpoch"].(uint64); ok {
|
||||
m["index_bgthreads_active"] = !(lastMergedEpoch == rootEpoch && lastPersistedEpoch == rootEpoch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// calculate the aggregate of all the segment's field stats
|
||||
aggFieldStats := newFieldStats()
|
||||
for _, segmentSnapshot := range indexSnapshot.Segments() {
|
||||
if segmentSnapshot.stats != nil {
|
||||
aggFieldStats.Aggregate(segmentSnapshot.stats)
|
||||
}
|
||||
}
|
||||
|
||||
aggFieldStatsMap := aggFieldStats.Fetch()
|
||||
for statName, stats := range aggFieldStatsMap {
|
||||
for fieldName, val := range stats {
|
||||
m["field:"+fieldName+":"+statName] = val
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *Scorch) Analyze(d index.Document) {
|
||||
analyze(d, s.setSpatialAnalyzerPlugin)
|
||||
}
|
||||
|
||||
type customAnalyzerPluginInitFunc func(field index.Field)
|
||||
|
||||
func (s *Scorch) setSpatialAnalyzerPlugin(f index.Field) {
|
||||
if s.segPlugin != nil {
|
||||
// check whether the current field is a custom tokenizable
|
||||
// spatial field then set the spatial analyser plugin for
|
||||
// overriding the tokenisation during the analysis stage.
|
||||
if sf, ok := f.(index.TokenizableSpatialField); ok {
|
||||
sf.SetSpatialAnalyzerPlugin(s.spatialPlugin)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func analyze(d index.Document, fn customAnalyzerPluginInitFunc) {
|
||||
d.VisitFields(func(field index.Field) {
|
||||
if field.Options().IsIndexed() {
|
||||
if fn != nil {
|
||||
fn(field)
|
||||
}
|
||||
|
||||
field.Analyze()
|
||||
|
||||
if d.HasComposite() && field.Name() != "_id" {
|
||||
// see if any of the composite fields need this
|
||||
d.VisitComposite(func(cf index.CompositeField) {
|
||||
cf.Compose(field.Name(), field.AnalyzedLength(), field.AnalyzedTokenFrequencies())
|
||||
})
|
||||
// Since the encoded geoShape is only necessary within the doc values
|
||||
// of the geoShapeField, it has been removed from the field's term dictionary.
|
||||
// However, '_all' field uses its term dictionary as its docValues, so it
|
||||
// becomes necessary to add the geoShape into the '_all' field's term dictionary
|
||||
if f, ok := field.(index.GeoShapeField); ok {
|
||||
d.VisitComposite(func(cf index.CompositeField) {
|
||||
geoshape := f.EncodedShape()
|
||||
cf.Compose(field.Name(), 1, index.TokenFrequencies{
|
||||
string(geoshape): &index.TokenFreq{
|
||||
Term: geoshape,
|
||||
Locations: []*index.TokenLocation{
|
||||
{
|
||||
Start: 0,
|
||||
End: len(geoshape),
|
||||
Position: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
|
||||
s.rootLock.Lock()
|
||||
if s.root == nil || s.root.epoch != epoch {
|
||||
s.eligibleForRemoval = append(s.eligibleForRemoval, epoch)
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) MemoryUsed() (memUsed uint64) {
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
if indexSnapshot == nil {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
// Account for current root snapshot overhead
|
||||
memUsed += uint64(indexSnapshot.Size())
|
||||
|
||||
// Account for snapshot that the persister may be working on
|
||||
persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
|
||||
persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
|
||||
if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
|
||||
// the snapshot that the persister is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += persistSnapshotSize
|
||||
}
|
||||
|
||||
// Account for snapshot that the merger may be working on
|
||||
mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
|
||||
mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
|
||||
if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
|
||||
// the snapshot that the merger is working on isn't the same as
|
||||
// the current snapshot
|
||||
memUsed += mergeSnapshotSize
|
||||
}
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
|
||||
|
||||
memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
|
||||
atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
|
||||
|
||||
return memUsed
|
||||
}
|
||||
|
||||
func (s *Scorch) markIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
s.ineligibleForRemoval[filename] = true
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
|
||||
s.rootLock.Lock()
|
||||
delete(s.ineligibleForRemoval, filename)
|
||||
s.rootLock.Unlock()
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterIndexType(Name, NewScorch)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func parseToTimeDuration(i interface{}) (time.Duration, error) {
|
||||
switch v := i.(type) {
|
||||
case string:
|
||||
return time.ParseDuration(v)
|
||||
|
||||
default:
|
||||
return 0, fmt.Errorf("expects a duration string")
|
||||
}
|
||||
}
|
||||
|
||||
func parseToInteger(i interface{}) (int, error) {
|
||||
switch v := i.(type) {
|
||||
case float64:
|
||||
return int(v), nil
|
||||
case int:
|
||||
return v, nil
|
||||
|
||||
default:
|
||||
return 0, fmt.Errorf("expects int or float64 value")
|
||||
}
|
||||
}
|
||||
|
||||
// Holds Zap's field level stats at a segment level
|
||||
type fieldStats struct {
|
||||
// StatName -> FieldName -> value
|
||||
statMap map[string]map[string]uint64
|
||||
}
|
||||
|
||||
// Add the data into the map after checking if the statname is valid
|
||||
func (fs *fieldStats) Store(statName, fieldName string, value uint64) {
|
||||
if _, exists := fs.statMap[statName]; !exists {
|
||||
fs.statMap[statName] = make(map[string]uint64)
|
||||
}
|
||||
fs.statMap[statName][fieldName] = value
|
||||
}
|
||||
|
||||
// Combine the given stats map with the existing map
|
||||
func (fs *fieldStats) Aggregate(stats segment.FieldStats) {
|
||||
statMap := stats.Fetch()
|
||||
if statMap == nil {
|
||||
return
|
||||
}
|
||||
for statName, statMap := range statMap {
|
||||
if _, exists := fs.statMap[statName]; !exists {
|
||||
fs.statMap[statName] = make(map[string]uint64)
|
||||
}
|
||||
for fieldName, val := range statMap {
|
||||
if _, exists := fs.statMap[statName][fieldName]; !exists {
|
||||
fs.statMap[statName][fieldName] = 0
|
||||
}
|
||||
fs.statMap[statName][fieldName] += val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the stats map
|
||||
func (fs *fieldStats) Fetch() map[string]map[string]uint64 {
|
||||
if fs == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fs.statMap
|
||||
}
|
||||
|
||||
// Initializes an empty stats map
|
||||
func newFieldStats() *fieldStats {
|
||||
rv := &fieldStats{
|
||||
statMap: map[string]map[string]uint64{},
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// CopyReader returns a low-level accessor for index data, ensuring persisted segments
|
||||
// remain on disk for backup, preventing race conditions with the persister/merger cleanup.
|
||||
// Close the reader after backup to allow segment removal by the persister/merger.
|
||||
func (s *Scorch) CopyReader() index.CopyReader {
|
||||
s.rootLock.Lock()
|
||||
rv := s.root
|
||||
if rv != nil {
|
||||
rv.AddRef()
|
||||
var fileName string
|
||||
// schedule a backup for all the segments from the root. Note that the
|
||||
// both the unpersisted and persisted segments are scheduled for backup.
|
||||
// because during the backup, the unpersisted segments may get persisted and
|
||||
// hence we need to protect both the unpersisted and persisted segments from removal
|
||||
// by the cleanup routine during the online backup
|
||||
for _, seg := range rv.segment {
|
||||
if perSeg, ok := seg.segment.(segment.PersistedSegment); ok {
|
||||
// segment is persisted
|
||||
fileName = filepath.Base(perSeg.Path())
|
||||
} else {
|
||||
// segment is not persisted
|
||||
// the name of the segment file that is generated if the
|
||||
// the segment is persisted in the future.
|
||||
fileName = zapFileName(seg.id)
|
||||
}
|
||||
rv.parent.copyScheduled[fileName]++
|
||||
}
|
||||
}
|
||||
s.rootLock.Unlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
// external API to fire a scorch event (EventKindIndexStart) externally from bleve
|
||||
func (s *Scorch) FireIndexEvent() {
|
||||
s.fireEvent(EventKindIndexStart, 0)
|
||||
}
|
2812
index/scorch/scorch_test.go
Normal file
2812
index/scorch/scorch_test.go
Normal file
File diff suppressed because it is too large
Load diff
144
index/scorch/segment_plugin.go
Normal file
144
index/scorch/segment_plugin.go
Normal file
|
@ -0,0 +1,144 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/geo"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
|
||||
zapv11 "github.com/blevesearch/zapx/v11"
|
||||
zapv12 "github.com/blevesearch/zapx/v12"
|
||||
zapv13 "github.com/blevesearch/zapx/v13"
|
||||
zapv14 "github.com/blevesearch/zapx/v14"
|
||||
zapv15 "github.com/blevesearch/zapx/v15"
|
||||
zapv16 "github.com/blevesearch/zapx/v16"
|
||||
)
|
||||
|
||||
// SegmentPlugin represents the essential functions required by a package to plug in
|
||||
// it's segment implementation
|
||||
type SegmentPlugin interface {
|
||||
|
||||
// Type is the name for this segment plugin
|
||||
Type() string
|
||||
|
||||
// Version is a numeric value identifying a specific version of this type.
|
||||
// When incompatible changes are made to a particular type of plugin, the
|
||||
// version must be incremented.
|
||||
Version() uint32
|
||||
|
||||
// New takes a set of Documents and turns them into a new Segment
|
||||
New(results []index.Document) (segment.Segment, uint64, error)
|
||||
|
||||
// Open attempts to open the file at the specified path and
|
||||
// return the corresponding Segment
|
||||
Open(path string) (segment.Segment, error)
|
||||
|
||||
// Merge takes a set of Segments, and creates a new segment on disk at
|
||||
// the specified path.
|
||||
// Drops is a set of bitmaps (one for each segment) indicating which
|
||||
// documents can be dropped from the segments during the merge.
|
||||
// If the closeCh channel is closed, Merge will cease doing work at
|
||||
// the next opportunity, and return an error (closed).
|
||||
// StatsReporter can optionally be provided, in which case progress
|
||||
// made during the merge is reported while operation continues.
|
||||
// Returns:
|
||||
// A slice of new document numbers (one for each input segment),
|
||||
// this allows the caller to know a particular document's new
|
||||
// document number in the newly merged segment.
|
||||
// The number of bytes written to the new segment file.
|
||||
// An error, if any occurred.
|
||||
Merge(segments []segment.Segment, drops []*roaring.Bitmap, path string,
|
||||
closeCh chan struct{}, s segment.StatsReporter) (
|
||||
[][]uint64, uint64, error)
|
||||
}
|
||||
|
||||
var supportedSegmentPlugins map[string]map[uint32]SegmentPlugin
|
||||
var defaultSegmentPlugin SegmentPlugin
|
||||
|
||||
func init() {
|
||||
ResetSegmentPlugins()
|
||||
RegisterSegmentPlugin(&zapv16.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv12.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv11.ZapPlugin{}, false)
|
||||
}
|
||||
|
||||
func ResetSegmentPlugins() {
|
||||
supportedSegmentPlugins = map[string]map[uint32]SegmentPlugin{}
|
||||
}
|
||||
|
||||
func RegisterSegmentPlugin(plugin SegmentPlugin, makeDefault bool) {
|
||||
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
|
||||
supportedSegmentPlugins[plugin.Type()] = map[uint32]SegmentPlugin{}
|
||||
}
|
||||
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
|
||||
if makeDefault {
|
||||
defaultSegmentPlugin = plugin
|
||||
}
|
||||
}
|
||||
|
||||
func SupportedSegmentTypes() (rv []string) {
|
||||
for k := range supportedSegmentPlugins {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
|
||||
for k := range supportedSegmentPlugins[typ] {
|
||||
rv = append(rv, k)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func chooseSegmentPlugin(forcedSegmentType string,
|
||||
forcedSegmentVersion uint32) (SegmentPlugin, error) {
|
||||
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
|
||||
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
|
||||
return segPlugin, nil
|
||||
}
|
||||
return nil, fmt.Errorf(
|
||||
"unsupported version %d for segment type: %s, supported: %v",
|
||||
forcedSegmentVersion, forcedSegmentType,
|
||||
SupportedSegmentTypeVersions(forcedSegmentType))
|
||||
}
|
||||
return nil, fmt.Errorf("unsupported segment type: %s, supported: %v",
|
||||
forcedSegmentType, SupportedSegmentTypes())
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
|
||||
forcedSegmentVersion uint32) error {
|
||||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
|
||||
forcedSegmentVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.segPlugin = segPlugin
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Scorch) loadSpatialAnalyzerPlugin(typ string) error {
|
||||
s.spatialPlugin = geo.GetSpatialAnalyzerPlugin(typ)
|
||||
if s.spatialPlugin == nil {
|
||||
return fmt.Errorf("unsupported spatial plugin type: %s", typ)
|
||||
}
|
||||
return nil
|
||||
}
|
1163
index/scorch/snapshot_index.go
Normal file
1163
index/scorch/snapshot_index.go
Normal file
File diff suppressed because it is too large
Load diff
119
index/scorch/snapshot_index_dict.go
Normal file
119
index/scorch/snapshot_index_dict.go
Normal file
|
@ -0,0 +1,119 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
dict segment.TermDictionary
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
||||
type IndexSnapshotFieldDict struct {
|
||||
cardinality int
|
||||
bytesRead uint64
|
||||
|
||||
snapshot *IndexSnapshot
|
||||
cursors []*segmentDictCursor
|
||||
entry index.DictEntry
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) BytesRead() uint64 {
|
||||
return i.bytesRead
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
|
||||
func (i *IndexSnapshotFieldDict) Less(a, b int) bool {
|
||||
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
|
||||
}
|
||||
func (i *IndexSnapshotFieldDict) Swap(a, b int) {
|
||||
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Push(x interface{}) {
|
||||
i.cursors = append(i.cursors, x.(*segmentDictCursor))
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Pop() interface{} {
|
||||
n := len(i.cursors)
|
||||
x := i.cursors[n-1]
|
||||
i.cursors = i.cursors[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
i.entry = i.cursors[0].curr
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
// look for any other entries with the exact same term
|
||||
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
|
||||
i.entry.Count += i.cursors[0].curr.Count
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return &i.entry, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Cardinality() int {
|
||||
return i.cardinality
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.dict.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
80
index/scorch/snapshot_index_doc.go
Normal file
80
index/scorch/snapshot_index_doc.go
Normal file
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotDocIDReader int
|
||||
|
||||
func init() {
|
||||
var isdr IndexSnapshotDocIDReader
|
||||
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotDocIDReader struct {
|
||||
snapshot *IndexSnapshot
|
||||
iterators []roaring.IntIterable
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Size() int {
|
||||
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if !i.iterators[i.segmentOffset].HasNext() {
|
||||
i.segmentOffset++
|
||||
continue
|
||||
}
|
||||
next := i.iterators[i.segmentOffset].Next()
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
return docNumberToBytes(nil, uint64(next)+globalOffset), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) {
|
||||
// FIXME do something better
|
||||
next, err := i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
return nil, nil
|
||||
}
|
||||
for bytes.Compare(next, ID) < 0 {
|
||||
next, err = i.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return next, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotDocIDReader) Close() error {
|
||||
return nil
|
||||
}
|
79
index/scorch/snapshot_index_str.go
Normal file
79
index/scorch/snapshot_index_str.go
Normal file
|
@ -0,0 +1,79 @@
|
|||
// Copyright (c) 2024 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotThesaurusTermReader int
|
||||
|
||||
func init() {
|
||||
var istr IndexSnapshotThesaurusTermReader
|
||||
reflectStaticSizeIndexSnapshotThesaurusTermReader = int(reflect.TypeOf(istr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotThesaurusTermReader struct {
|
||||
name string
|
||||
snapshot *IndexSnapshot
|
||||
thesauri []segment.Thesaurus
|
||||
postings []segment.SynonymsList
|
||||
iterators []segment.SynonymsIterator
|
||||
segmentOffset int
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusTermReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotThesaurusTermReader + size.SizeOfPtr +
|
||||
len(i.name) + size.SizeOfString
|
||||
|
||||
for _, postings := range i.postings {
|
||||
if postings != nil {
|
||||
sizeInBytes += postings.Size()
|
||||
}
|
||||
}
|
||||
|
||||
for _, iterator := range i.iterators {
|
||||
if iterator != nil {
|
||||
sizeInBytes += iterator.Size()
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusTermReader) Next() (string, error) {
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
if i.iterators[i.segmentOffset] != nil {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if next != nil {
|
||||
synTerm := next.Term()
|
||||
return synTerm, nil
|
||||
}
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusTermReader) Close() error {
|
||||
return nil
|
||||
}
|
90
index/scorch/snapshot_index_test.go
Normal file
90
index/scorch/snapshot_index_test.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
package scorch
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/vellum"
|
||||
)
|
||||
|
||||
func TestIndexSnapshot_getLevAutomaton(t *testing.T) {
|
||||
// Create a dummy IndexSnapshot (parent doesn't matter for this method)
|
||||
is := &IndexSnapshot{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
term string
|
||||
fuzziness uint8
|
||||
expectError bool
|
||||
errorMsg string // Optional: check specific error message
|
||||
}{
|
||||
{
|
||||
name: "fuzziness 1",
|
||||
term: "test",
|
||||
fuzziness: 1,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "fuzziness 2",
|
||||
term: "another",
|
||||
fuzziness: 2,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "fuzziness 0",
|
||||
term: "zero",
|
||||
fuzziness: 0,
|
||||
expectError: true,
|
||||
errorMsg: "fuzziness exceeds the max limit",
|
||||
},
|
||||
{
|
||||
name: "fuzziness 3",
|
||||
term: "three",
|
||||
fuzziness: 3,
|
||||
expectError: true,
|
||||
errorMsg: "fuzziness exceeds the max limit",
|
||||
},
|
||||
{
|
||||
name: "empty term fuzziness 1",
|
||||
term: "",
|
||||
fuzziness: 1,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "empty term fuzziness 2",
|
||||
term: "",
|
||||
fuzziness: 2,
|
||||
expectError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotAutomaton, err := is.getLevAutomaton(tt.term, tt.fuzziness)
|
||||
|
||||
if tt.expectError {
|
||||
if err == nil {
|
||||
t.Errorf("getLevAutomaton() expected an error but got nil")
|
||||
} else if tt.errorMsg != "" && err.Error() != tt.errorMsg {
|
||||
t.Errorf("getLevAutomaton() expected error msg %q but got %q", tt.errorMsg, err.Error())
|
||||
}
|
||||
if gotAutomaton != nil {
|
||||
t.Errorf("getLevAutomaton() expected nil automaton on error but got %v", gotAutomaton)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("getLevAutomaton() got unexpected error: %v", err)
|
||||
}
|
||||
if gotAutomaton == nil {
|
||||
t.Errorf("getLevAutomaton() expected a valid automaton but got nil")
|
||||
}
|
||||
// Optional: Check type if needed, though non-nil is usually sufficient
|
||||
_, ok := gotAutomaton.(vellum.Automaton)
|
||||
if !ok {
|
||||
t.Errorf("getLevAutomaton() returned type is not vellum.Automaton")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add other tests for snapshot_index.go below if needed...
|
216
index/scorch/snapshot_index_tfr.go
Normal file
216
index/scorch/snapshot_index_tfr.go
Normal file
|
@ -0,0 +1,216 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexSnapshotTermFieldReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotTermFieldReader
|
||||
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotTermFieldReader struct {
|
||||
term []byte
|
||||
field string
|
||||
snapshot *IndexSnapshot
|
||||
dicts []segment.TermDictionary
|
||||
postings []segment.PostingsList
|
||||
iterators []segment.PostingsIterator
|
||||
segmentOffset int
|
||||
includeFreq bool
|
||||
includeNorm bool
|
||||
includeTermVectors bool
|
||||
currPosting segment.Posting
|
||||
currID index.IndexInternalID
|
||||
recycle bool
|
||||
bytesRead uint64
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) {
|
||||
i.bytesRead += val
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
|
||||
len(i.term) +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
// find the next hit
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
prevBytesRead := i.iterators[i.segmentOffset].BytesRead()
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
i.postingToTermFieldDoc(next, rv)
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
// postingsIterators is maintain the bytesRead stat in a cumulative fashion.
|
||||
// this is because there are chances of having a series of loadChunk calls,
|
||||
// and they have to be added together before sending the bytesRead at this point
|
||||
// upstream.
|
||||
bytesRead := i.iterators[i.segmentOffset].BytesRead()
|
||||
if bytesRead > prevBytesRead {
|
||||
i.incrementBytesRead(bytesRead - prevBytesRead)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
|
||||
if i.includeFreq {
|
||||
rv.Freq = next.Frequency()
|
||||
}
|
||||
if i.includeNorm {
|
||||
rv.Norm = next.Norm()
|
||||
}
|
||||
if i.includeTermVectors {
|
||||
locs := next.Locations()
|
||||
if cap(rv.Vectors) < len(locs) {
|
||||
rv.Vectors = make([]*index.TermFieldVector, len(locs))
|
||||
backing := make([]index.TermFieldVector, len(locs))
|
||||
for i := range backing {
|
||||
rv.Vectors[i] = &backing[i]
|
||||
}
|
||||
}
|
||||
rv.Vectors = rv.Vectors[:len(locs)]
|
||||
for i, loc := range locs {
|
||||
*rv.Vectors[i] = index.TermFieldVector{
|
||||
Start: loc.Start(),
|
||||
End: loc.End(),
|
||||
Pos: loc.Pos(),
|
||||
ArrayPositions: loc.ArrayPositions(),
|
||||
Field: loc.Field(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
// FIXME do something better
|
||||
// for now, if we need to seek backwards, then restart from the beginning
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field,
|
||||
i.includeFreq, i.includeNorm, i.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// close the current term field reader before replacing it with a new one
|
||||
_ = i.Close()
|
||||
*i = *(i2.(*IndexSnapshotTermFieldReader))
|
||||
}
|
||||
num, err := docInternalToNumber(ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
||||
if segIndex >= len(i.snapshot.segment) {
|
||||
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
||||
segIndex, len(i.snapshot.segment))
|
||||
}
|
||||
// skip directly to the target segment
|
||||
i.segmentOffset = segIndex
|
||||
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// we jumped directly to the segment that should have contained it
|
||||
// but it wasn't there, so reuse Next() which should correctly
|
||||
// get the next hit after it (we moved i.segmentOffset)
|
||||
return i.Next(preAlloced)
|
||||
}
|
||||
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.TermFieldDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.postingToTermFieldDoc(next, preAlloced)
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
return preAlloced, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
|
||||
var rv uint64
|
||||
for _, posting := range i.postings {
|
||||
rv += posting.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotTermFieldReader) Close() error {
|
||||
if i.ctx != nil {
|
||||
statsCallbackFn := i.ctx.Value(search.SearchIOStatsCallbackKey)
|
||||
if statsCallbackFn != nil {
|
||||
// essentially before you close the TFR, you must report this
|
||||
// reader's bytesRead value
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
|
||||
}
|
||||
|
||||
search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
|
||||
}
|
||||
|
||||
if i.snapshot != nil {
|
||||
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
|
||||
i.snapshot.recycleTermFieldReader(i)
|
||||
}
|
||||
return nil
|
||||
}
|
107
index/scorch/snapshot_index_thes.go
Normal file
107
index/scorch/snapshot_index_thes.go
Normal file
|
@ -0,0 +1,107 @@
|
|||
// Copyright (c) 2024 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type segmentThesCursor struct {
|
||||
thes segment.Thesaurus
|
||||
itr segment.ThesaurusIterator
|
||||
curr index.ThesaurusEntry
|
||||
}
|
||||
|
||||
type IndexSnapshotThesaurusKeys struct {
|
||||
snapshot *IndexSnapshot
|
||||
cursors []*segmentThesCursor
|
||||
entry index.ThesaurusEntry
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Len() int { return len(i.cursors) }
|
||||
func (i *IndexSnapshotThesaurusKeys) Less(a, b int) bool {
|
||||
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
|
||||
}
|
||||
func (i *IndexSnapshotThesaurusKeys) Swap(a, b int) {
|
||||
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Push(x interface{}) {
|
||||
i.cursors = append(i.cursors, x.(*segmentThesCursor))
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Pop() interface{} {
|
||||
n := len(i.cursors)
|
||||
x := i.cursors[n-1]
|
||||
i.cursors = i.cursors[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Next() (*index.ThesaurusEntry, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
i.entry = i.cursors[0].curr
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
// look for any other entries with the exact same term
|
||||
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
|
||||
next, err := i.cursors[0].itr.Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// at end of this cursor, remove it
|
||||
heap.Pop(i)
|
||||
} else {
|
||||
// modified heap, fix it
|
||||
i.cursors[0].curr = *next
|
||||
heap.Fix(i, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return &i.entry, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotThesaurusKeys) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.thes.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
165
index/scorch/snapshot_index_vr.go
Normal file
165
index/scorch/snapshot_index_vr.go
Normal file
|
@ -0,0 +1,165 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
const VectorSearchSupportedSegmentVersion = 16
|
||||
|
||||
var reflectStaticSizeIndexSnapshotVectorReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotVectorReader
|
||||
reflectStaticSizeIndexSnapshotVectorReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotVectorReader struct {
|
||||
vector []float32
|
||||
field string
|
||||
k int64
|
||||
snapshot *IndexSnapshot
|
||||
postings []segment_api.VecPostingsList
|
||||
iterators []segment_api.VecPostingsIterator
|
||||
segmentOffset int
|
||||
currPosting segment_api.VecPosting
|
||||
currID index.IndexInternalID
|
||||
ctx context.Context
|
||||
|
||||
searchParams json.RawMessage
|
||||
eligibleSelector index.EligibleDocumentSelector
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotVectorReader + size.SizeOfPtr +
|
||||
len(i.vector)*size.SizeOfFloat32 +
|
||||
len(i.field) +
|
||||
len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
|
||||
*index.VectorDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.VectorDoc{}
|
||||
}
|
||||
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
rv.Score = float64(next.Score())
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
|
||||
preAlloced *index.VectorDoc) (*index.VectorDoc, error) {
|
||||
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
i2, err := i.snapshot.VectorReader(i.ctx, i.vector, i.field, i.k,
|
||||
i.searchParams, i.eligibleSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// close the current term field reader before replacing it with a new one
|
||||
_ = i.Close()
|
||||
*i = *(i2.(*IndexSnapshotVectorReader))
|
||||
}
|
||||
|
||||
num, err := docInternalToNumber(ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
||||
if segIndex >= len(i.snapshot.segment) {
|
||||
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
||||
segIndex, len(i.snapshot.segment))
|
||||
}
|
||||
// skip directly to the target segment
|
||||
i.segmentOffset = segIndex
|
||||
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// we jumped directly to the segment that should have contained it
|
||||
// but it wasn't there, so reuse Next() which should correctly
|
||||
// get the next hit after it (we moved i.segmentOffset)
|
||||
return i.Next(preAlloced)
|
||||
}
|
||||
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.VectorDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
return preAlloced, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Count() uint64 {
|
||||
var rv uint64
|
||||
for _, posting := range i.postings {
|
||||
rv += posting.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Close() error {
|
||||
// TODO Consider if any scope of recycling here.
|
||||
return nil
|
||||
}
|
340
index/scorch/snapshot_segment.go
Normal file
340
index/scorch/snapshot_segment.go
Normal file
|
@ -0,0 +1,340 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var TermSeparator byte = 0xff
|
||||
|
||||
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
||||
|
||||
type SegmentSnapshot struct {
|
||||
// this flag is needed to identify whether this
|
||||
// segment was mmaped recently, in which case
|
||||
// we consider the loading cost of the metadata
|
||||
// as part of IO stats.
|
||||
mmaped uint32
|
||||
id uint64
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
stats *fieldStats
|
||||
|
||||
cachedMeta *cachedMeta
|
||||
|
||||
cachedDocs *cachedDocs
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Segment() segment.Segment {
|
||||
return s.segment
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Deleted() *roaring.Bitmap {
|
||||
return s.deleted
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Id() uint64 {
|
||||
return s.id
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) FullSize() int64 {
|
||||
return int64(s.segment.Count())
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) LiveSize() int64 {
|
||||
return int64(s.Count())
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) HasVector() bool {
|
||||
// number of vectors, for each vector field in the segment
|
||||
numVecs := s.stats.Fetch()["num_vectors"]
|
||||
return len(numVecs) > 0
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) FileSize() int64 {
|
||||
ps, ok := s.segment.(segment.PersistedSegment)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
|
||||
path := ps.Path()
|
||||
if path == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
fi, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return fi.Size()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Close() error {
|
||||
return s.segment.Close()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.StoredFieldValueVisitor) error {
|
||||
return s.segment.VisitStoredFields(num, visitor)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
|
||||
return s.segment.DocID(num)
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Count() uint64 {
|
||||
rv := s.segment.Count()
|
||||
if s.deleted != nil {
|
||||
rv -= s.deleted.GetCardinality()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
||||
rv, err := s.segment.DocNumbers(docIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// DocNumbersLive returns a bitmap containing doc numbers for all live docs
|
||||
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
|
||||
rv := roaring.NewBitmap()
|
||||
rv.AddRange(0, s.segment.Count())
|
||||
if s.deleted != nil {
|
||||
rv.AndNot(s.deleted)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Fields() []string {
|
||||
return s.segment.Fields()
|
||||
}
|
||||
|
||||
func (s *SegmentSnapshot) Size() (rv int) {
|
||||
rv = s.segment.Size()
|
||||
if s.deleted != nil {
|
||||
rv += int(s.deleted.GetSizeInBytes())
|
||||
}
|
||||
rv += s.cachedDocs.Size()
|
||||
return
|
||||
}
|
||||
|
||||
type cachedFieldDocs struct {
|
||||
m sync.Mutex
|
||||
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
|
||||
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
|
||||
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) Size() int {
|
||||
var rv int
|
||||
cfd.m.Lock()
|
||||
for _, entry := range cfd.docs {
|
||||
rv += 8 /* size of uint64 */ + len(entry)
|
||||
}
|
||||
cfd.m.Unlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
||||
cfd.m.Lock()
|
||||
defer func() {
|
||||
close(cfd.readyCh)
|
||||
cfd.m.Unlock()
|
||||
}()
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* size field */
|
||||
dict, err := ss.segment.Dictionary(field)
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
|
||||
var postings segment.PostingsList
|
||||
var postingsItr segment.PostingsIterator
|
||||
|
||||
dictItr := dict.AutomatonIterator(nil, nil, nil)
|
||||
next, err := dictItr.Next()
|
||||
for err == nil && next != nil {
|
||||
var err1 error
|
||||
postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
|
||||
if err1 != nil {
|
||||
cfd.err = err1
|
||||
return
|
||||
}
|
||||
|
||||
cfd.size += uint64(size.SizeOfUint64) /* map key */
|
||||
postingsItr = postings.Iterator(false, false, false, postingsItr)
|
||||
nextPosting, err2 := postingsItr.Next()
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
|
||||
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
|
||||
cfd.size += uint64(len(next.Term) + 1) // map value
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
|
||||
if err2 != nil {
|
||||
cfd.err = err2
|
||||
return
|
||||
}
|
||||
|
||||
next, err = dictItr.Next()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cfd.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
c.m.Lock()
|
||||
|
||||
if c.cache == nil {
|
||||
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
_, exists := c.cache[field]
|
||||
if !exists {
|
||||
c.cache[field] = &cachedFieldDocs{
|
||||
readyCh: make(chan struct{}),
|
||||
docs: make(map[uint64][]byte),
|
||||
}
|
||||
|
||||
go c.cache[field].prepareField(field, ss)
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range wantedFields {
|
||||
cachedFieldDocs := c.cache[field]
|
||||
c.m.Unlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
|
||||
if cachedFieldDocs.err != nil {
|
||||
return cachedFieldDocs.err
|
||||
}
|
||||
c.m.Lock()
|
||||
}
|
||||
|
||||
c.updateSizeLOCKED()
|
||||
|
||||
c.m.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// hasFields returns true if the cache has all the given fields
|
||||
func (c *cachedDocs) hasFields(fields []string) bool {
|
||||
c.m.Lock()
|
||||
for _, field := range fields {
|
||||
if _, exists := c.cache[field]; !exists {
|
||||
c.m.Unlock()
|
||||
return false // found a field not in cache
|
||||
}
|
||||
}
|
||||
c.m.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *cachedDocs) Size() int {
|
||||
return int(atomic.LoadUint64(&c.size))
|
||||
}
|
||||
|
||||
func (c *cachedDocs) updateSizeLOCKED() {
|
||||
sizeInBytes := 0
|
||||
for k, v := range c.cache { // cachedFieldDocs
|
||||
sizeInBytes += len(k)
|
||||
if v != nil {
|
||||
sizeInBytes += v.Size()
|
||||
}
|
||||
}
|
||||
atomic.StoreUint64(&c.size, uint64(sizeInBytes))
|
||||
}
|
||||
|
||||
func (c *cachedDocs) visitDoc(localDocNum uint64,
|
||||
fields []string, visitor index.DocValueVisitor) {
|
||||
c.m.Lock()
|
||||
|
||||
for _, field := range fields {
|
||||
if cachedFieldDocs, exists := c.cache[field]; exists {
|
||||
c.m.Unlock()
|
||||
<-cachedFieldDocs.readyCh
|
||||
c.m.Lock()
|
||||
|
||||
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
||||
for {
|
||||
i := bytes.Index(tlist, TermSeparatorSplitSlice)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
visitor(field, tlist[0:i])
|
||||
tlist = tlist[i+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.m.Unlock()
|
||||
}
|
||||
|
||||
// the purpose of the cachedMeta is to simply allow the user of this type to record
|
||||
// and cache certain meta data information (specific to the segment) that can be
|
||||
// used across calls to save compute on the same.
|
||||
// for example searcher creations on the same index snapshot can use this struct
|
||||
// to help and fetch the backing index size information which can be used in
|
||||
// memory usage calculation thereby deciding whether to allow a query or not.
|
||||
type cachedMeta struct {
|
||||
m sync.RWMutex
|
||||
meta map[string]interface{}
|
||||
}
|
||||
|
||||
func (c *cachedMeta) updateMeta(field string, val interface{}) {
|
||||
c.m.Lock()
|
||||
if c.meta == nil {
|
||||
c.meta = make(map[string]interface{})
|
||||
}
|
||||
c.meta[field] = val
|
||||
c.m.Unlock()
|
||||
}
|
||||
|
||||
func (c *cachedMeta) fetchMeta(field string) (rv interface{}) {
|
||||
c.m.RLock()
|
||||
rv = c.meta[field]
|
||||
c.m.RUnlock()
|
||||
return rv
|
||||
}
|
85
index/scorch/snapshot_vector_index.go
Normal file
85
index/scorch/snapshot_vector_index.go
Normal file
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
func (is *IndexSnapshot) VectorReader(ctx context.Context, vector []float32,
|
||||
field string, k int64, searchParams json.RawMessage,
|
||||
eligibleSelector index.EligibleDocumentSelector) (
|
||||
index.VectorReader, error) {
|
||||
rv := &IndexSnapshotVectorReader{
|
||||
vector: vector,
|
||||
field: field,
|
||||
k: k,
|
||||
snapshot: is,
|
||||
searchParams: searchParams,
|
||||
eligibleSelector: eligibleSelector,
|
||||
}
|
||||
|
||||
if rv.postings == nil {
|
||||
rv.postings = make([]segment_api.VecPostingsList, len(is.segment))
|
||||
}
|
||||
if rv.iterators == nil {
|
||||
rv.iterators = make([]segment_api.VecPostingsIterator, len(is.segment))
|
||||
}
|
||||
// initialize postings and iterators within the OptimizeVR's Finish()
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// eligibleDocumentSelector is used to filter out documents that are eligible for
|
||||
// the KNN search from a pre-filter query.
|
||||
type eligibleDocumentSelector struct {
|
||||
// segment ID -> segment local doc nums
|
||||
eligibleDocNums map[int][]uint64
|
||||
is *IndexSnapshot
|
||||
}
|
||||
|
||||
// SegmentEligibleDocs returns the list of eligible local doc numbers for the given segment.
|
||||
func (eds *eligibleDocumentSelector) SegmentEligibleDocs(segmentID int) []uint64 {
|
||||
return eds.eligibleDocNums[segmentID]
|
||||
}
|
||||
|
||||
// AddEligibleDocumentMatch adds a document match to the list of eligible documents.
|
||||
func (eds *eligibleDocumentSelector) AddEligibleDocumentMatch(id index.IndexInternalID) error {
|
||||
if eds.is == nil {
|
||||
return fmt.Errorf("eligibleDocumentSelector is not initialized with IndexSnapshot")
|
||||
}
|
||||
// Get the segment number and the local doc number for this document.
|
||||
segIdx, docNum, err := eds.is.segmentIndexAndLocalDocNum(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Add the local doc number to the list of eligible doc numbers for this segment.
|
||||
eds.eligibleDocNums[segIdx] = append(eds.eligibleDocNums[segIdx], docNum)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (is *IndexSnapshot) NewEligibleDocumentSelector() index.EligibleDocumentSelector {
|
||||
return &eligibleDocumentSelector{
|
||||
eligibleDocNums: map[int][]uint64{},
|
||||
is: is,
|
||||
}
|
||||
}
|
160
index/scorch/stats.go
Normal file
160
index/scorch/stats.go
Normal file
|
@ -0,0 +1,160 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
|
||||
// Stats tracks statistics about the index, fields that are
|
||||
// prefixed like CurXxxx are gauges (can go up and down),
|
||||
// and fields that are prefixed like TotXxxx are monotonically
|
||||
// increasing counters.
|
||||
type Stats struct {
|
||||
TotUpdates uint64
|
||||
TotDeletes uint64
|
||||
|
||||
TotBatches uint64
|
||||
TotBatchesEmpty uint64
|
||||
TotBatchIntroTime uint64
|
||||
MaxBatchIntroTime uint64
|
||||
|
||||
CurRootEpoch uint64
|
||||
LastPersistedEpoch uint64
|
||||
LastMergedEpoch uint64
|
||||
|
||||
TotOnErrors uint64
|
||||
|
||||
TotAnalysisTime uint64
|
||||
TotIndexTime uint64
|
||||
|
||||
TotIndexedPlainTextBytes uint64
|
||||
|
||||
TotBytesReadAtQueryTime uint64
|
||||
TotBytesWrittenAtIndexTime uint64
|
||||
|
||||
TotTermSearchersStarted uint64
|
||||
TotTermSearchersFinished uint64
|
||||
|
||||
TotKNNSearches uint64
|
||||
TotSynonymSearches uint64
|
||||
|
||||
TotEventTriggerStarted uint64
|
||||
TotEventTriggerCompleted uint64
|
||||
|
||||
TotIntroduceLoop uint64
|
||||
TotIntroduceSegmentBeg uint64
|
||||
TotIntroduceSegmentEnd uint64
|
||||
TotIntroducePersistBeg uint64
|
||||
TotIntroducePersistEnd uint64
|
||||
TotIntroduceMergeBeg uint64
|
||||
TotIntroduceMergeEnd uint64
|
||||
TotIntroduceRevertBeg uint64
|
||||
TotIntroduceRevertEnd uint64
|
||||
|
||||
TotIntroducedItems uint64
|
||||
TotIntroducedSegmentsBatch uint64
|
||||
TotIntroducedSegmentsMerge uint64
|
||||
|
||||
TotPersistLoopBeg uint64
|
||||
TotPersistLoopErr uint64
|
||||
TotPersistLoopProgress uint64
|
||||
TotPersistLoopWait uint64
|
||||
TotPersistLoopWaitNotified uint64
|
||||
TotPersistLoopEnd uint64
|
||||
|
||||
TotPersistedItems uint64
|
||||
TotItemsToPersist uint64
|
||||
TotPersistedSegments uint64
|
||||
TotMutationsFiltered uint64
|
||||
|
||||
TotPersisterSlowMergerPause uint64
|
||||
TotPersisterSlowMergerResume uint64
|
||||
|
||||
TotPersisterNapPauseCompleted uint64
|
||||
TotPersisterMergerNapBreak uint64
|
||||
|
||||
TotFileMergeLoopBeg uint64
|
||||
TotFileMergeLoopErr uint64
|
||||
TotFileMergeLoopEnd uint64
|
||||
|
||||
TotFileMergeForceOpsStarted uint64
|
||||
TotFileMergeForceOpsCompleted uint64
|
||||
|
||||
TotFileMergePlan uint64
|
||||
TotFileMergePlanErr uint64
|
||||
TotFileMergePlanNone uint64
|
||||
TotFileMergePlanOk uint64
|
||||
|
||||
TotFileMergePlanTasks uint64
|
||||
TotFileMergePlanTasksDone uint64
|
||||
TotFileMergePlanTasksErr uint64
|
||||
TotFileMergePlanTasksSegments uint64
|
||||
TotFileMergePlanTasksSegmentsEmpty uint64
|
||||
|
||||
TotFileMergeSegmentsEmpty uint64
|
||||
TotFileMergeSegments uint64
|
||||
TotFileSegmentsAtRoot uint64
|
||||
TotFileMergeWrittenBytes uint64
|
||||
|
||||
TotFileMergeZapBeg uint64
|
||||
TotFileMergeZapEnd uint64
|
||||
TotFileMergeZapTime uint64
|
||||
MaxFileMergeZapTime uint64
|
||||
TotFileMergeZapIntroductionTime uint64
|
||||
MaxFileMergeZapIntroductionTime uint64
|
||||
|
||||
TotFileMergeIntroductions uint64
|
||||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductionsSkipped uint64
|
||||
TotFileMergeIntroductionsObsoleted uint64
|
||||
|
||||
CurFilesIneligibleForRemoval uint64
|
||||
TotSnapshotsRemovedFromMetaStore uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
TotMemMergeZapBeg uint64
|
||||
TotMemMergeZapEnd uint64
|
||||
TotMemMergeZapTime uint64
|
||||
MaxMemMergeZapTime uint64
|
||||
TotMemMergeSegments uint64
|
||||
TotMemorySegmentsAtRoot uint64
|
||||
}
|
||||
|
||||
// atomically populates the returned map
|
||||
func (s *Stats) ToMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
sve := reflect.ValueOf(s).Elem()
|
||||
svet := sve.Type()
|
||||
for i := 0; i < svet.NumField(); i++ {
|
||||
svef := sve.Field(i)
|
||||
if svef.CanAddr() {
|
||||
svefp := svef.Addr().Interface()
|
||||
m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler, and in contrast to standard
|
||||
// json marshaling provides atomic safety
|
||||
func (s *Stats) MarshalJSON() ([]byte, error) {
|
||||
return util.MarshalJSON(s.ToMap())
|
||||
}
|
182
index/scorch/unadorned.go
Normal file
182
index/scorch/unadorned.go
Normal file
|
@ -0,0 +1,182 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
|
||||
var reflectStaticSizeUnadornedPostingsIterator1Hit int
|
||||
var reflectStaticSizeUnadornedPosting int
|
||||
|
||||
func init() {
|
||||
var pib unadornedPostingsIteratorBitmap
|
||||
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
|
||||
var pi1h unadornedPostingsIterator1Hit
|
||||
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
|
||||
var up UnadornedPosting
|
||||
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
|
||||
}
|
||||
|
||||
type unadornedPostingsIteratorBitmap struct {
|
||||
actual roaring.IntPeekable
|
||||
actualBM *roaring.Bitmap
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Next() (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Advance(docNum uint64) (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.actual == nil || !i.actual.HasNext() {
|
||||
return 0, false
|
||||
}
|
||||
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if !i.actual.HasNext() {
|
||||
return 0, false // couldn't find anything
|
||||
}
|
||||
|
||||
return uint64(i.actual.Next()), true
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIteratorBitmap
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) BytesWritten() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) ResetBytesRead(uint64) {}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) ActualBitmap() *roaring.Bitmap {
|
||||
return i.actualBM
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) DocNum1Hit() (uint64, bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap) {
|
||||
i.actualBM = actual
|
||||
i.actual = actual.Iterator()
|
||||
}
|
||||
|
||||
func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.PostingsIterator {
|
||||
return &unadornedPostingsIteratorBitmap{
|
||||
actualBM: bm,
|
||||
actual: bm.Iterator(),
|
||||
}
|
||||
}
|
||||
|
||||
const docNum1HitFinished = math.MaxUint64
|
||||
|
||||
type unadornedPostingsIterator1Hit struct {
|
||||
docNum uint64
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(0)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Advance(docNum uint64) (segment.Posting, error) {
|
||||
return i.nextAtOrAfter(docNum)
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
|
||||
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
return UnadornedPosting(docNum), nil
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
|
||||
if i.docNum == docNum1HitFinished {
|
||||
return 0, false
|
||||
}
|
||||
if i.docNum < atOrAfter {
|
||||
// advanced past our 1-hit
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return 0, false
|
||||
}
|
||||
docNum := i.docNum
|
||||
i.docNum = docNum1HitFinished // consume our 1-hit docNum
|
||||
return docNum, true
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) Size() int {
|
||||
return reflectStaticSizeUnadornedPostingsIterator1Hit
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) BytesWritten() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *unadornedPostingsIterator1Hit) ResetBytesRead(uint64) {}
|
||||
|
||||
func newUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) segment.PostingsIterator {
|
||||
return &unadornedPostingsIterator1Hit{
|
||||
docNum1Hit,
|
||||
}
|
||||
}
|
||||
|
||||
type UnadornedPosting uint64
|
||||
|
||||
func (p UnadornedPosting) Number() uint64 {
|
||||
return uint64(p)
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Frequency() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Norm() float64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Locations() []segment.Location {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p UnadornedPosting) Size() int {
|
||||
return reflectStaticSizeUnadornedPosting
|
||||
}
|
129
index/upsidedown/analysis.go
Normal file
129
index/upsidedown/analysis.go
Normal file
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type IndexRow interface {
|
||||
KeySize() int
|
||||
KeyTo([]byte) (int, error)
|
||||
Key() []byte
|
||||
|
||||
ValueSize() int
|
||||
ValueTo([]byte) (int, error)
|
||||
Value() []byte
|
||||
}
|
||||
|
||||
type AnalysisResult struct {
|
||||
DocID string
|
||||
Rows []IndexRow
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Analyze(d index.Document) *AnalysisResult {
|
||||
return udc.analyze(d)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) analyze(d index.Document) *AnalysisResult {
|
||||
rv := &AnalysisResult{
|
||||
DocID: d.ID(),
|
||||
Rows: make([]IndexRow, 0, 100),
|
||||
}
|
||||
|
||||
docIDBytes := []byte(d.ID())
|
||||
|
||||
// track our back index entries
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]index.TokenFrequencies)
|
||||
fieldLengths := make(map[uint16]int)
|
||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||
fieldNames := make(map[uint16]string)
|
||||
|
||||
analyzeField := func(field index.Field, storable bool) {
|
||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
|
||||
if newFieldRow != nil {
|
||||
rv.Rows = append(rv.Rows, newFieldRow)
|
||||
}
|
||||
fieldNames[fieldIndex] = field.Name()
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
field.Analyze()
|
||||
fieldLength := field.AnalyzedLength()
|
||||
tokenFreqs := field.AnalyzedTokenFrequencies()
|
||||
existingFreqs := fieldTermFreqs[fieldIndex]
|
||||
if existingFreqs == nil {
|
||||
fieldTermFreqs[fieldIndex] = tokenFreqs
|
||||
} else {
|
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs)
|
||||
fieldTermFreqs[fieldIndex] = existingFreqs
|
||||
}
|
||||
fieldLengths[fieldIndex] += fieldLength
|
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||
}
|
||||
|
||||
if storable && field.Options().IsStored() {
|
||||
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
|
||||
}
|
||||
}
|
||||
|
||||
// walk all the fields, record stored fields now
|
||||
// place information about indexed fields into map
|
||||
// this collates information across fields with
|
||||
// same names (arrays)
|
||||
d.VisitFields(func(field index.Field) {
|
||||
analyzeField(field, true)
|
||||
})
|
||||
|
||||
if d.HasComposite() {
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
// see if any of the composite fields need this
|
||||
d.VisitComposite(func(field index.CompositeField) {
|
||||
field.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
|
||||
})
|
||||
}
|
||||
|
||||
d.VisitComposite(func(field index.CompositeField) {
|
||||
analyzeField(field, false)
|
||||
})
|
||||
}
|
||||
|
||||
rowsCapNeeded := len(rv.Rows) + 1
|
||||
for _, tokenFreqs := range fieldTermFreqs {
|
||||
rowsCapNeeded += len(tokenFreqs)
|
||||
}
|
||||
|
||||
rv.Rows = append(make([]IndexRow, 0, rowsCapNeeded), rv.Rows...)
|
||||
|
||||
backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
|
||||
|
||||
// walk through the collated information and process
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
fieldLength := fieldLengths[fieldIndex]
|
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||
|
||||
// encode this field
|
||||
rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
|
||||
}
|
||||
|
||||
// build the back index row
|
||||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
|
||||
rv.Rows = append(rv.Rows, backIndexRow)
|
||||
|
||||
return rv
|
||||
}
|
115
index/upsidedown/analysis_test.go
Normal file
115
index/upsidedown/analysis_test.go
Normal file
|
@ -0,0 +1,115 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestAnalysisBug328(t *testing.T) {
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(standard.Name)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
d := document.NewDocument("1")
|
||||
f := document.NewTextFieldCustom("title", nil, []byte("bleve"), index.IndexField|index.IncludeTermVectors, analyzer)
|
||||
d.AddField(f)
|
||||
f = document.NewTextFieldCustom("body", nil, []byte("bleve"), index.IndexField|index.IncludeTermVectors, analyzer)
|
||||
d.AddField(f)
|
||||
cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, index.IndexField|index.IncludeTermVectors)
|
||||
d.AddField(cf)
|
||||
|
||||
rv := idx.(*UpsideDownCouch).analyze(d)
|
||||
fieldIndexes := make(map[uint16]string)
|
||||
for _, row := range rv.Rows {
|
||||
if row, ok := row.(*FieldRow); ok {
|
||||
fieldIndexes[row.index] = row.name
|
||||
}
|
||||
if row, ok := row.(*TermFrequencyRow); ok && string(row.term) == "bleve" {
|
||||
for _, vec := range row.vectors {
|
||||
if vec.field != row.field {
|
||||
if fieldIndexes[row.field] != "_all" {
|
||||
t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.field)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAnalyze(b *testing.B) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed(standard.Name)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
d := document.NewDocument("1")
|
||||
f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
|
||||
d.AddField(f)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
rv := idx.(*UpsideDownCouch).analyze(d)
|
||||
if len(rv.Rows) < 92 || len(rv.Rows) > 93 {
|
||||
b.Fatalf("expected 512-13 rows, got %d", len(rv.Rows))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var bleveWikiArticle1K = []byte(`Boiling liquid expanding vapor explosion
|
||||
From Wikipedia, the free encyclopedia
|
||||
See also: Boiler explosion and Steam explosion
|
||||
|
||||
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
|
||||
|
||||
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
|
||||
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
|
||||
Contents [hide]
|
||||
1 Mechanism
|
||||
1.1 Water example
|
||||
1.2 BLEVEs without chemical reactions
|
||||
2 Fires
|
||||
3 Incidents
|
||||
4 Safety measures
|
||||
5 See also
|
||||
6 References
|
||||
7 External links
|
||||
Mechanism[edit]
|
||||
|
||||
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
|
||||
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
|
8
index/upsidedown/benchmark_all.sh
Executable file
8
index/upsidedown/benchmark_all.sh
Executable file
|
@ -0,0 +1,8 @@
|
|||
#!/bin/sh
|
||||
|
||||
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//`
|
||||
|
||||
for BENCHMARK in $BENCHMARKS
|
||||
do
|
||||
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS
|
||||
done
|
75
index/upsidedown/benchmark_boltdb_test.go
Normal file
75
index/upsidedown/benchmark_boltdb_test.go
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
)
|
||||
|
||||
var boltTestConfig = map[string]interface{}{
|
||||
"path": "test",
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 1)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 2)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltDBIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkBoltBIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 1000)
|
||||
}
|
149
index/upsidedown/benchmark_common_test.go
Normal file
149
index/upsidedown/benchmark_common_test.go
Normal file
|
@ -0,0 +1,149 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var benchmarkDocBodies = []string{
|
||||
"A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.",
|
||||
"A boiler explosion is a catastrophic failure of a boiler. As seen today, boiler explosions are of two kinds. One kind is a failure of the pressure parts of the steam and water sides. There can be many different causes, such as failure of the safety valve, corrosion of critical parts of the boiler, or low water level. Corrosion along the edges of lap joints was a common cause of early boiler explosions.",
|
||||
"A boiler is a closed vessel in which water or other fluid is heated. The fluid does not necessarily boil. (In North America the term \"furnace\" is normally used if the purpose is not actually to boil the fluid.) The heated or vaporized fluid exits the boiler for use in various processes or heating applications,[1][2] including central heating, boiler-based power generation, cooking, and sanitation.",
|
||||
"A pressure vessel is a closed container designed to hold gases or liquids at a pressure substantially different from the ambient pressure.",
|
||||
"Pressure (symbol: p or P) is the ratio of force to the area over which that force is distributed.",
|
||||
"Liquid is one of the four fundamental states of matter (the others being solid, gas, and plasma), and is the only state with a definite volume but no fixed shape.",
|
||||
"The boiling point of a substance is the temperature at which the vapor pressure of the liquid equals the pressure surrounding the liquid[1][2] and the liquid changes into a vapor.",
|
||||
"Vapor pressure or equilibrium vapor pressure is defined as the pressure exerted by a vapor in thermodynamic equilibrium with its condensed phases (solid or liquid) at a given temperature in a closed system.",
|
||||
"Industrial gases are a group of gases that are specifically manufactured for use in a wide range of industries, which include oil and gas, petrochemicals, chemicals, power, mining, steelmaking, metals, environmental protection, medicine, pharmaceuticals, biotechnology, food, water, fertilizers, nuclear power, electronics and aerospace.",
|
||||
"The expansion ratio of a liquefied and cryogenic substance is the volume of a given amount of that substance in liquid form compared to the volume of the same amount of substance in gaseous form, at room temperature and normal atmospheric pressure.",
|
||||
}
|
||||
|
||||
type KVStoreDestroy func() error
|
||||
|
||||
func DestroyTest() error {
|
||||
return os.RemoveAll("test")
|
||||
}
|
||||
|
||||
func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed("standard")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
indexDocument := document.NewDocument("").
|
||||
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[0]), analyzer))
|
||||
|
||||
b.ResetTimer()
|
||||
b.StopTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
|
||||
idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
indexDocument.SetID(strconv.Itoa(i))
|
||||
// just time the indexing portion
|
||||
b.StartTimer()
|
||||
err = idx.Update(indexDocument)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.StopTimer()
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = destroy()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
analysisQueue.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) {
|
||||
|
||||
cache := registry.NewCache()
|
||||
analyzer, err := cache.AnalyzerNamed("standard")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.StopTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
|
||||
idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.StartTimer()
|
||||
batch := index.NewBatch()
|
||||
for j := 0; j < 1000; j++ {
|
||||
if j%batchSize == 0 {
|
||||
if len(batch.IndexOps) > 0 {
|
||||
err := idx.Batch(batch)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
batch = index.NewBatch()
|
||||
}
|
||||
indexDocument := document.NewDocument("").
|
||||
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[j%10]), analyzer))
|
||||
indexDocument.SetID(strconv.Itoa(i) + "-" + strconv.Itoa(j))
|
||||
batch.Update(indexDocument)
|
||||
}
|
||||
// close last batch
|
||||
if len(batch.IndexOps) > 0 {
|
||||
err := idx.Batch(batch)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
b.StopTimer()
|
||||
err = idx.Close()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = destroy()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
analysisQueue.Close()
|
||||
}
|
||||
}
|
71
index/upsidedown/benchmark_gtreap_test.go
Normal file
71
index/upsidedown/benchmark_gtreap_test.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
|
||||
)
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 1)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 2)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkGTreapIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 1000)
|
||||
}
|
71
index/upsidedown/benchmark_null_test.go
Normal file
71
index/upsidedown/benchmark_null_test.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
|
||||
)
|
||||
|
||||
func BenchmarkNullIndexing1Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 1)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 2)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers(b *testing.B) {
|
||||
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 4)
|
||||
}
|
||||
|
||||
// batches
|
||||
|
||||
func BenchmarkNullIndexing1Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers10Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 10)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing1Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers100Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 100)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing1Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing2Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 1000)
|
||||
}
|
||||
|
||||
func BenchmarkNullIndexing4Workers1000Batch(b *testing.B) {
|
||||
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 1000)
|
||||
}
|
174
index/upsidedown/dump.go
Normal file
174
index/upsidedown/dump.go
Normal file
|
@ -0,0 +1,174 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
// the functions in this file are only intended to be used by
|
||||
// the bleve_dump utility and the debug http handlers
|
||||
// if your application relies on them, you're doing something wrong
|
||||
// they may change or be removed at any time
|
||||
|
||||
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) {
|
||||
start := prefix
|
||||
if start == nil {
|
||||
start = []byte{0}
|
||||
}
|
||||
it := kvreader.PrefixIterator(start)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
ck := make([]byte, len(key))
|
||||
copy(ck, key)
|
||||
cv := make([]byte, len(val))
|
||||
copy(cv, val)
|
||||
row, err := ParseFromKeyValue(ck, cv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
}
|
||||
|
||||
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) {
|
||||
it := kvreader.RangeIterator(start, end)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
ck := make([]byte, len(key))
|
||||
copy(ck, key)
|
||||
cv := make([]byte, len(val))
|
||||
copy(cv, val)
|
||||
row, err := ParseFromKeyValue(ck, cv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
}
|
||||
|
||||
func (i *IndexReader) DumpAll() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
dumpRange(i.kvreader, rv, nil, nil)
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexReader) DumpFields() chan interface{} {
|
||||
rv := make(chan interface{})
|
||||
go func() {
|
||||
defer close(rv)
|
||||
dumpPrefix(i.kvreader, rv, []byte{'f'})
|
||||
}()
|
||||
return rv
|
||||
}
|
||||
|
||||
type keyset [][]byte
|
||||
|
||||
func (k keyset) Len() int { return len(k) }
|
||||
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
|
||||
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
|
||||
|
||||
// DumpDoc returns all rows in the index related to this doc id
|
||||
func (i *IndexReader) DumpDoc(id string) chan interface{} {
|
||||
idBytes := []byte(id)
|
||||
|
||||
rv := make(chan interface{})
|
||||
|
||||
go func() {
|
||||
defer close(rv)
|
||||
|
||||
back, err := backIndexRowForDoc(i.kvreader, []byte(id))
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
|
||||
// no such doc
|
||||
if back == nil {
|
||||
return
|
||||
}
|
||||
// build sorted list of term keys
|
||||
keys := make(keyset, 0)
|
||||
for _, entry := range back.termsEntries {
|
||||
for i := range entry.Terms {
|
||||
tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0)
|
||||
key := tfr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
sort.Sort(keys)
|
||||
|
||||
// first add all the stored rows
|
||||
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
|
||||
dumpPrefix(i.kvreader, rv, storedRowPrefix)
|
||||
|
||||
// now walk term keys in order and add them as well
|
||||
if len(keys) > 0 {
|
||||
it := i.kvreader.RangeIterator(keys[0], nil)
|
||||
defer func() {
|
||||
cerr := it.Close()
|
||||
if cerr != nil {
|
||||
rv <- cerr
|
||||
}
|
||||
}()
|
||||
|
||||
for _, key := range keys {
|
||||
it.Seek(key)
|
||||
rkey, rval, valid := it.Current()
|
||||
if !valid {
|
||||
break
|
||||
}
|
||||
rck := make([]byte, len(rkey))
|
||||
copy(rck, key)
|
||||
rcv := make([]byte, len(rval))
|
||||
copy(rcv, rval)
|
||||
row, err := ParseFromKeyValue(rck, rcv)
|
||||
if err != nil {
|
||||
rv <- err
|
||||
return
|
||||
}
|
||||
rv <- row
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return rv
|
||||
}
|
155
index/upsidedown/dump_test.go
Normal file
155
index/upsidedown/dump_test.go
Normal file
|
@ -0,0 +1,155 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
)
|
||||
|
||||
func TestDump(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
reader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
docCount, err := reader.DocCount()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if docCount != expectedCount {
|
||||
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
|
||||
dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
doc.AddField(dateField)
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), index.IndexField|index.StoreField))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
|
||||
dateField, err = document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
doc.AddField(dateField)
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
fieldsCount := 0
|
||||
reader, err = idx.Reader()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
upsideDownReader, ok := reader.(*IndexReader)
|
||||
if !ok {
|
||||
t.Fatal("dump is only supported by index type upsidedown")
|
||||
}
|
||||
fieldsRows := upsideDownReader.DumpFields()
|
||||
for range fieldsRows {
|
||||
fieldsCount++
|
||||
}
|
||||
if fieldsCount != 3 {
|
||||
t.Errorf("expected 3 fields, got %d", fieldsCount)
|
||||
}
|
||||
|
||||
// 1 text term
|
||||
// 16 numeric terms
|
||||
// 16 date terms
|
||||
// 3 stored fields
|
||||
expectedDocRowCount := int(1 + (2 * (64 / document.DefaultPrecisionStep)) + 3)
|
||||
docRowCount := 0
|
||||
docRows := upsideDownReader.DumpDoc("1")
|
||||
for range docRows {
|
||||
docRowCount++
|
||||
}
|
||||
if docRowCount != expectedDocRowCount {
|
||||
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
|
||||
}
|
||||
|
||||
docRowCount = 0
|
||||
docRows = upsideDownReader.DumpDoc("2")
|
||||
for range docRows {
|
||||
docRowCount++
|
||||
}
|
||||
if docRowCount != expectedDocRowCount {
|
||||
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
|
||||
}
|
||||
|
||||
// 1 version
|
||||
// fieldsCount field rows
|
||||
// 2 docs * expectedDocRowCount
|
||||
// 2 back index rows
|
||||
// 2 text term row count (2 different text terms)
|
||||
// 16 numeric term row counts (shared for both docs, same numeric value)
|
||||
// 16 date term row counts (shared for both docs, same date value)
|
||||
expectedAllRowCount := int(1 + fieldsCount + (2 * expectedDocRowCount) + 2 + 2 + int((2 * (64 / document.DefaultPrecisionStep))))
|
||||
allRowCount := 0
|
||||
allRows := upsideDownReader.DumpAll()
|
||||
for range allRows {
|
||||
allRowCount++
|
||||
}
|
||||
if allRowCount != expectedAllRowCount {
|
||||
t.Errorf("expected %d rows for all, got %d", expectedAllRowCount, allRowCount)
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
88
index/upsidedown/field_cache.go
Normal file
88
index/upsidedown/field_cache.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type FieldCache struct {
|
||||
fieldIndexes map[string]uint16
|
||||
indexFields []string
|
||||
lastFieldIndex int
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewFieldCache() *FieldCache {
|
||||
return &FieldCache{
|
||||
fieldIndexes: make(map[string]uint16),
|
||||
lastFieldIndex: -1,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FieldCache) AddExisting(field string, index uint16) {
|
||||
f.mutex.Lock()
|
||||
f.addLOCKED(field, index)
|
||||
f.mutex.Unlock()
|
||||
}
|
||||
|
||||
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 {
|
||||
f.fieldIndexes[field] = index
|
||||
if len(f.indexFields) < int(index)+1 {
|
||||
prevIndexFields := f.indexFields
|
||||
f.indexFields = make([]string, int(index)+16)
|
||||
copy(f.indexFields, prevIndexFields)
|
||||
}
|
||||
f.indexFields[int(index)] = field
|
||||
if int(index) > f.lastFieldIndex {
|
||||
f.lastFieldIndex = int(index)
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// FieldNamed returns the index of the field, and whether or not it existed
|
||||
// before this call. if createIfMissing is true, and new field index is assigned
|
||||
// but the second return value will still be false
|
||||
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) {
|
||||
f.mutex.RLock()
|
||||
if index, ok := f.fieldIndexes[field]; ok {
|
||||
f.mutex.RUnlock()
|
||||
return index, true
|
||||
} else if !createIfMissing {
|
||||
f.mutex.RUnlock()
|
||||
return 0, false
|
||||
}
|
||||
// trade read lock for write lock
|
||||
f.mutex.RUnlock()
|
||||
f.mutex.Lock()
|
||||
// need to check again with write lock
|
||||
if index, ok := f.fieldIndexes[field]; ok {
|
||||
f.mutex.Unlock()
|
||||
return index, true
|
||||
}
|
||||
// assign next field id
|
||||
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1))
|
||||
f.mutex.Unlock()
|
||||
return index, false
|
||||
}
|
||||
|
||||
func (f *FieldCache) FieldIndexed(index uint16) (field string) {
|
||||
f.mutex.RLock()
|
||||
if int(index) < len(f.indexFields) {
|
||||
field = f.indexFields[int(index)]
|
||||
}
|
||||
f.mutex.RUnlock()
|
||||
return field
|
||||
}
|
86
index/upsidedown/field_dict.go
Normal file
86
index/upsidedown/field_dict.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type UpsideDownCouchFieldDict struct {
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
dictRow *DictionaryRow
|
||||
dictEntry *index.DictEntry
|
||||
field uint16
|
||||
}
|
||||
|
||||
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
|
||||
|
||||
startKey := NewDictionaryRow(startTerm, field, 0).Key()
|
||||
if endTerm == nil {
|
||||
endTerm = []byte{ByteSeparator}
|
||||
} else {
|
||||
endTerm = incrementBytes(endTerm)
|
||||
}
|
||||
endKey := NewDictionaryRow(endTerm, field, 0).Key()
|
||||
|
||||
it := indexReader.kvreader.RangeIterator(startKey, endKey)
|
||||
|
||||
return &UpsideDownCouchFieldDict{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
|
||||
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
|
||||
field: field,
|
||||
}, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) BytesRead() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if !valid {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
err := r.dictRow.parseDictionaryK(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err)
|
||||
}
|
||||
err = r.dictRow.parseDictionaryV(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err)
|
||||
}
|
||||
r.dictEntry.Term = string(r.dictRow.term)
|
||||
r.dictEntry.Count = r.dictRow.count
|
||||
// advance the iterator to the next term
|
||||
r.iterator.Next()
|
||||
return r.dictEntry, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) Cardinality() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldDict) Close() error {
|
||||
return r.iterator.Close()
|
||||
}
|
183
index/upsidedown/field_dict_test.go
Normal file
183
index/upsidedown/field_dict_test.go
Normal file
|
@ -0,0 +1,183 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestIndexFieldDict(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
dict, err := indexReader.FieldDict("name")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount := 0
|
||||
curr, err := dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
if curr.Term != "test" {
|
||||
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
|
||||
}
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
|
||||
dict2, err := indexReader.FieldDict("desc")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict2.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms := make([]string, 0)
|
||||
curr, err = dict2.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict2.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms := []string{"eat", "more", "rice"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test start and end range
|
||||
dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict3.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict3.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict3.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"more"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := dict4.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict4.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict4.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"cat", "cats", "catting"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
}
|
228
index/upsidedown/index_reader.go
Normal file
228
index/upsidedown/index_reader.go
Normal file
|
@ -0,0 +1,228 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeIndexReader int
|
||||
|
||||
func init() {
|
||||
var ir IndexReader
|
||||
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
|
||||
}
|
||||
|
||||
type IndexReader struct {
|
||||
index *UpsideDownCouch
|
||||
kvreader store.KVReader
|
||||
docCount uint64
|
||||
}
|
||||
|
||||
func (i *IndexReader) TermFieldReader(ctx context.Context, term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors)
|
||||
}
|
||||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors)
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, nil, nil)
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
|
||||
}
|
||||
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, termPrefix, termPrefix)
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
return newUpsideDownCouchDocIDReader(i)
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
||||
return newUpsideDownCouchDocIDReaderOnly(i, ids)
|
||||
}
|
||||
|
||||
func (i *IndexReader) Document(id string) (doc index.Document, err error) {
|
||||
// first hit the back index to confirm doc exists
|
||||
var backIndexRow *BackIndexRow
|
||||
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if backIndexRow == nil {
|
||||
return
|
||||
}
|
||||
rvd := document.NewDocument(id)
|
||||
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
|
||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
||||
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
|
||||
defer func() {
|
||||
if cerr := it.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
safeVal := make([]byte, len(val))
|
||||
copy(safeVal, val)
|
||||
var row *StoredRow
|
||||
row, err = NewStoredRowKV(key, safeVal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if row != nil {
|
||||
fieldName := i.index.fieldCache.FieldIndexed(row.field)
|
||||
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value)
|
||||
if field != nil {
|
||||
rvd.AddField(field)
|
||||
}
|
||||
}
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
return rvd, nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocValueVisitor) error {
|
||||
fieldsMap := make(map[uint16]string, len(fields))
|
||||
for _, f := range fields {
|
||||
id, ok := i.index.fieldCache.FieldNamed(f, false)
|
||||
if ok {
|
||||
fieldsMap[id] = f
|
||||
}
|
||||
}
|
||||
|
||||
tempRow := BackIndexRow{
|
||||
doc: id,
|
||||
}
|
||||
|
||||
keyBuf := GetRowBuffer()
|
||||
if tempRow.KeySize() > len(keyBuf.buf) {
|
||||
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
|
||||
}
|
||||
defer PutRowBuffer(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
value, err := i.kvreader.Get(keyBuf.buf[:keySize])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return visitBackIndexRow(value, func(field uint32, term []byte) {
|
||||
if field, ok := fieldsMap[uint16(field)]; ok {
|
||||
visitor(field, term)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (i *IndexReader) Fields() (fields []string, err error) {
|
||||
fields = make([]string, 0)
|
||||
it := i.kvreader.PrefixIterator([]byte{'f'})
|
||||
defer func() {
|
||||
if cerr := it.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
key, val, valid := it.Current()
|
||||
for valid {
|
||||
var row UpsideDownCouchRow
|
||||
row, err = ParseFromKeyValue(key, val)
|
||||
if err != nil {
|
||||
fields = nil
|
||||
return
|
||||
}
|
||||
if row != nil {
|
||||
fieldRow, ok := row.(*FieldRow)
|
||||
if ok {
|
||||
fields = append(fields, fieldRow.name)
|
||||
}
|
||||
}
|
||||
|
||||
it.Next()
|
||||
key, val, valid = it.Current()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) {
|
||||
internalRow := NewInternalRow(key, nil)
|
||||
return i.kvreader.Get(internalRow.Key())
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocCount() (uint64, error) {
|
||||
return i.docCount, nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) Close() error {
|
||||
return i.kvreader.Close()
|
||||
}
|
||||
|
||||
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) {
|
||||
return string(id), nil
|
||||
}
|
||||
|
||||
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) {
|
||||
return index.IndexInternalID(id), nil
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
// didn't overflow, so stop
|
||||
break
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
|
||||
return &DocValueReader{i: i, fields: fields}, nil
|
||||
}
|
||||
|
||||
type DocValueReader struct {
|
||||
i *IndexReader
|
||||
fields []string
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
|
||||
visitor index.DocValueVisitor) error {
|
||||
return dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor)
|
||||
}
|
||||
|
||||
func (dvr *DocValueReader) BytesRead() uint64 { return 0 }
|
376
index/upsidedown/reader.go
Normal file
376
index/upsidedown/reader.go
Normal file
|
@ -0,0 +1,376 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeUpsideDownCouchTermFieldReader int
|
||||
var reflectStaticSizeUpsideDownCouchDocIDReader int
|
||||
|
||||
func init() {
|
||||
var tfr UpsideDownCouchTermFieldReader
|
||||
reflectStaticSizeUpsideDownCouchTermFieldReader =
|
||||
int(reflect.TypeOf(tfr).Size())
|
||||
var cdr UpsideDownCouchDocIDReader
|
||||
reflectStaticSizeUpsideDownCouchDocIDReader =
|
||||
int(reflect.TypeOf(cdr).Size())
|
||||
}
|
||||
|
||||
type UpsideDownCouchTermFieldReader struct {
|
||||
count uint64
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
term []byte
|
||||
tfrNext *TermFrequencyRow
|
||||
tfrPrealloc TermFrequencyRow
|
||||
keyBuf []byte
|
||||
field uint16
|
||||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
|
||||
len(r.term) +
|
||||
r.tfrPrealloc.Size() +
|
||||
len(r.keyBuf)
|
||||
|
||||
if r.tfrNext != nil {
|
||||
sizeInBytes += r.tfrNext.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
bufNeeded = dictionaryRowKeySize(term)
|
||||
}
|
||||
buf := make([]byte, bufNeeded)
|
||||
|
||||
bufUsed := dictionaryRowKeyTo(buf, field, term)
|
||||
val, err := indexReader.kvreader.Get(buf[:bufUsed])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if val == nil {
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
rv := &UpsideDownCouchTermFieldReader{
|
||||
count: 0,
|
||||
term: term,
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
}
|
||||
rv.tfrNext = &rv.tfrPrealloc
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
count, err := dictionaryRowParseV(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
|
||||
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
|
||||
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
count: count,
|
||||
term: term,
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
|
||||
return r.count
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
|
||||
if r.iterator != nil {
|
||||
// We treat tfrNext also like an initialization flag, which
|
||||
// tells us whether we need to invoke the underlying
|
||||
// iterator.Next(). The first time, don't call iterator.Next().
|
||||
if r.tfrNext != nil {
|
||||
r.iterator.Next()
|
||||
} else {
|
||||
r.tfrNext = &r.tfrPrealloc
|
||||
}
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
tfr := r.tfrNext
|
||||
err := tfr.parseKDoc(key, r.term)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = tfr.parseV(val, r.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
rv.ID = append(rv.ID, tfr.doc...)
|
||||
rv.Freq = tfr.freq
|
||||
rv.Norm = float64(tfr.norm)
|
||||
if tfr.vectors != nil {
|
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
|
||||
if r.iterator != nil {
|
||||
if r.tfrNext == nil {
|
||||
r.tfrNext = &TermFrequencyRow{}
|
||||
}
|
||||
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
|
||||
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.iterator.Seek(r.keyBuf)
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
err := tfr.parseKDoc(key, r.term)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = tfr.parseV(val, r.includeTermVectors)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv = preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.TermFieldDoc{}
|
||||
}
|
||||
rv.ID = append(rv.ID, tfr.doc...)
|
||||
rv.Freq = tfr.freq
|
||||
rv.Norm = float64(tfr.norm)
|
||||
if tfr.vectors != nil {
|
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Close() error {
|
||||
if r.indexReader != nil {
|
||||
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
|
||||
}
|
||||
if r.iterator != nil {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type UpsideDownCouchDocIDReader struct {
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
only []string
|
||||
onlyPos int
|
||||
onlyMode bool
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
|
||||
reflectStaticSizeIndexReader + size.SizeOfPtr
|
||||
|
||||
for _, entry := range r.only {
|
||||
sizeInBytes += size.SizeOfString + len(entry)
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
|
||||
startBytes := []byte{0x0}
|
||||
endBytes := []byte{0xff}
|
||||
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
||||
|
||||
return &UpsideDownCouchDocIDReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
|
||||
// we don't actually own the list of ids, so if before we sort we must copy
|
||||
idsCopy := make([]string, len(ids))
|
||||
copy(idsCopy, ids)
|
||||
// ensure ids are sorted
|
||||
sort.Strings(idsCopy)
|
||||
startBytes := []byte{0x0}
|
||||
if len(idsCopy) > 0 {
|
||||
startBytes = []byte(idsCopy[0])
|
||||
}
|
||||
endBytes := []byte{0xff}
|
||||
if len(idsCopy) > 0 {
|
||||
endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
|
||||
}
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
||||
|
||||
return &UpsideDownCouchDocIDReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
only: idsCopy,
|
||||
onlyMode: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
|
||||
if r.onlyMode {
|
||||
var rv index.IndexInternalID
|
||||
for valid && r.onlyPos < len(r.only) {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
||||
ok := r.nextOnly()
|
||||
if !ok {
|
||||
return nil, nil
|
||||
}
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid = r.iterator.Current()
|
||||
continue
|
||||
} else {
|
||||
rv = append([]byte(nil), br.doc...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if valid && r.onlyPos < len(r.only) {
|
||||
ok := r.nextOnly()
|
||||
if ok {
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
} else {
|
||||
if valid {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := append([]byte(nil), br.doc...)
|
||||
r.iterator.Next()
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
|
||||
|
||||
if r.onlyMode {
|
||||
r.onlyPos = sort.SearchStrings(r.only, string(docID))
|
||||
if r.onlyPos >= len(r.only) {
|
||||
// advanced to key after our last only key
|
||||
return nil, nil
|
||||
}
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
|
||||
var rv index.IndexInternalID
|
||||
for valid && r.onlyPos < len(r.only) {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
|
||||
// the only key we seek'd to didn't exist
|
||||
// now look for the closest key that did exist in only
|
||||
r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
|
||||
if r.onlyPos >= len(r.only) {
|
||||
// advanced to key after our last only key
|
||||
return nil, nil
|
||||
}
|
||||
// now seek to this new only key
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
key, val, valid = r.iterator.Current()
|
||||
continue
|
||||
} else {
|
||||
rv = append([]byte(nil), br.doc...)
|
||||
break
|
||||
}
|
||||
}
|
||||
if valid && r.onlyPos < len(r.only) {
|
||||
ok := r.nextOnly()
|
||||
if ok {
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
} else {
|
||||
bir := NewBackIndexRow(docID, nil, nil)
|
||||
r.iterator.Seek(bir.Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := append([]byte(nil), br.doc...)
|
||||
r.iterator.Next()
|
||||
return rv, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Close() error {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
|
||||
// move the r.only pos forward one, skipping duplicates
|
||||
// return true if there is more data, or false if we got to the end of the list
|
||||
func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
|
||||
|
||||
// advance 1 position, until we see a different key
|
||||
// it's already sorted, so this skips duplicates
|
||||
start := r.onlyPos
|
||||
r.onlyPos++
|
||||
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
|
||||
start = r.onlyPos
|
||||
r.onlyPos++
|
||||
}
|
||||
// inidicate if we got to the end of the list
|
||||
return r.onlyPos < len(r.only)
|
||||
}
|
548
index/upsidedown/reader_test.go
Normal file
548
index/upsidedown/reader_test.go
Normal file
|
@ -0,0 +1,548 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TestIndexReader(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first look for a term that doesn't exist
|
||||
reader, err := indexReader.TermFieldReader(context.TODO(), []byte("nope"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
count := reader.Count()
|
||||
if count != 0 {
|
||||
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
|
||||
count = reader.Count()
|
||||
if count != expectedCount {
|
||||
t.Errorf("Expected doc count to be: %d got: %d", expectedCount, count)
|
||||
}
|
||||
|
||||
var match *index.TermFieldDoc
|
||||
var actualCount uint64
|
||||
match, err = reader.Next(nil)
|
||||
for err == nil && match != nil {
|
||||
match, err = reader.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error reading next")
|
||||
}
|
||||
actualCount++
|
||||
}
|
||||
if actualCount != count {
|
||||
t.Errorf("count was 2, but only saw %d", actualCount)
|
||||
}
|
||||
|
||||
expectedMatch := &index.TermFieldDoc{
|
||||
ID: index.IndexInternalID("2"),
|
||||
Freq: 1,
|
||||
Norm: 0.5773502588272095,
|
||||
Vectors: []*index.TermFieldVector{
|
||||
{
|
||||
Field: "desc",
|
||||
Pos: 3,
|
||||
Start: 9,
|
||||
End: 13,
|
||||
},
|
||||
},
|
||||
}
|
||||
tfr, err := indexReader.TermFieldReader(context.TODO(), []byte("rice"), "desc", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
match, err = tfr.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(expectedMatch, match) {
|
||||
t.Errorf("got %#v, expected %#v", match, expectedMatch)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now test usage of advance
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
|
||||
match, err = reader.Advance(index.IndexInternalID("2"), nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match == nil {
|
||||
t.Fatalf("Expected match, got nil")
|
||||
}
|
||||
if !match.ID.Equals(index.IndexInternalID("2")) {
|
||||
t.Errorf("Expected ID '2', got '%s'", match.ID)
|
||||
}
|
||||
match, err = reader.Advance(index.IndexInternalID("3"), nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// now test creating a reader for a field that doesn't exist
|
||||
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("water"), "doesnotexist", true, true, true)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing term field reader: %v", err)
|
||||
}
|
||||
count = reader.Count()
|
||||
if count != 0 {
|
||||
t.Errorf("expected count 0 for reader of non-existent field")
|
||||
}
|
||||
match, err = reader.Next(nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
if match != nil {
|
||||
t.Errorf("expected nil, got %v", match)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexDocIdReader(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var expectedCount uint64
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount++
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first get all doc ids
|
||||
reader, err := indexReader.DocIDReaderAll()
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err := reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count := uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != expectedCount {
|
||||
t.Errorf("expected %d, got %d", expectedCount, count)
|
||||
}
|
||||
|
||||
// try it again, but jump to the second doc this time
|
||||
reader2, err := indexReader.DocIDReaderAll()
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader2.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err = reader2.Advance(index.IndexInternalID("2"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("2")) {
|
||||
t.Errorf("expected to find id '2', got '%s'", id)
|
||||
}
|
||||
|
||||
id, err = reader2.Advance(index.IndexInternalID("3"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if id != nil {
|
||||
t.Errorf("expected to find id '', got '%s'", id)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrashBadBackIndexRow(t *testing.T) {
|
||||
br, err := NewBackIndexRowKV([]byte{byte('b'), byte('a'), ByteSeparator}, []byte{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if string(br.doc) != "a" {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexDocIdOnlyReader(t *testing.T) {
|
||||
defer func() {
|
||||
err := DestroyTest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
analysisQueue := index.NewAnalysisQueue(1)
|
||||
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := idx.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("3")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("5")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("7")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("9")
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
indexReader, err := idx.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := indexReader.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
onlyIds := []string{"1", "5", "9"}
|
||||
reader, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err := reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count := uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != 3 {
|
||||
t.Errorf("expected 3, got %d", count)
|
||||
}
|
||||
|
||||
// try it again, but jump
|
||||
reader2, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader2.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err = reader2.Advance(index.IndexInternalID("5"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("5")) {
|
||||
t.Errorf("expected to find id '5', got '%s'", id)
|
||||
}
|
||||
|
||||
id, err = reader2.Advance(index.IndexInternalID("a"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if id != nil {
|
||||
t.Errorf("expected to find id '', got '%s'", id)
|
||||
}
|
||||
|
||||
// some keys aren't actually there
|
||||
onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
|
||||
reader3, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader3.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
id, err = reader3.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
count = uint64(0)
|
||||
for id != nil {
|
||||
count++
|
||||
id, err = reader3.Next()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("expected 1, got %d", count)
|
||||
}
|
||||
|
||||
// mix advance and next
|
||||
onlyIds = []string{"0", "1", "3", "5", "6", "9"}
|
||||
reader4, err := indexReader.DocIDReaderOnly(onlyIds)
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader4.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
// first key is "1"
|
||||
id, err = reader4.Next()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("1")) {
|
||||
t.Errorf("expected to find id '1', got '%s'", id)
|
||||
}
|
||||
|
||||
// advancing to key we dont have gives next
|
||||
id, err = reader4.Advance(index.IndexInternalID("2"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("3")) {
|
||||
t.Errorf("expected to find id '3', got '%s'", id)
|
||||
}
|
||||
|
||||
// next after advance works
|
||||
id, err = reader4.Next()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("5")) {
|
||||
t.Errorf("expected to find id '5', got '%s'", id)
|
||||
}
|
||||
|
||||
// advancing to key we do have works
|
||||
id, err = reader4.Advance(index.IndexInternalID("9"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("9")) {
|
||||
t.Errorf("expected to find id '9', got '%s'", id)
|
||||
}
|
||||
|
||||
// advance backwards at end
|
||||
id, err = reader4.Advance(index.IndexInternalID("4"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("5")) {
|
||||
t.Errorf("expected to find id '5', got '%s'", id)
|
||||
}
|
||||
|
||||
// next after advance works
|
||||
id, err = reader4.Next()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("9")) {
|
||||
t.Errorf("expected to find id '9', got '%s'", id)
|
||||
}
|
||||
|
||||
// advance backwards to key that exists, but not in only set
|
||||
id, err = reader4.Advance(index.IndexInternalID("7"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !id.Equals(index.IndexInternalID("9")) {
|
||||
t.Errorf("expected to find id '9', got '%s'", id)
|
||||
}
|
||||
}
|
1144
index/upsidedown/row.go
Normal file
1144
index/upsidedown/row.go
Normal file
File diff suppressed because it is too large
Load diff
76
index/upsidedown/row_merge.go
Normal file
76
index/upsidedown/row_merge.go
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var mergeOperator upsideDownMerge
|
||||
|
||||
var dictionaryTermIncr []byte
|
||||
var dictionaryTermDecr []byte
|
||||
|
||||
func init() {
|
||||
dictionaryTermIncr = make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1))
|
||||
dictionaryTermDecr = make([]byte, 8)
|
||||
var negOne = int64(-1)
|
||||
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne))
|
||||
}
|
||||
|
||||
type upsideDownMerge struct{}
|
||||
|
||||
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) {
|
||||
// set up record based on key
|
||||
dr, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
if len(existingValue) > 0 {
|
||||
// if existing value, parse it
|
||||
err = dr.parseDictionaryV(existingValue)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
// now process operands
|
||||
for _, operand := range operands {
|
||||
next := int64(binary.LittleEndian.Uint64(operand))
|
||||
if next < 0 && uint64(-next) > dr.count {
|
||||
// subtracting next from existing would overflow
|
||||
dr.count = 0
|
||||
} else if next < 0 {
|
||||
dr.count -= uint64(-next)
|
||||
} else {
|
||||
dr.count += uint64(next)
|
||||
}
|
||||
}
|
||||
|
||||
return dr.Value(), true
|
||||
}
|
||||
|
||||
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) {
|
||||
left := int64(binary.LittleEndian.Uint64(leftOperand))
|
||||
right := int64(binary.LittleEndian.Uint64(rightOperand))
|
||||
rv := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(rv, uint64(left+right))
|
||||
return rv, true
|
||||
}
|
||||
|
||||
func (m *upsideDownMerge) Name() string {
|
||||
return "upsideDownMerge"
|
||||
}
|
57
index/upsidedown/row_merge_test.go
Normal file
57
index/upsidedown/row_merge_test.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPartialMerge(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
in [][]byte
|
||||
out uint64
|
||||
}{
|
||||
{
|
||||
in: [][]byte{dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr},
|
||||
out: 5,
|
||||
},
|
||||
}
|
||||
|
||||
mo := &upsideDownMerge{}
|
||||
for _, test := range tests {
|
||||
curr := test.in[0]
|
||||
for _, next := range test.in[1:] {
|
||||
var ok bool
|
||||
curr, ok = mo.PartialMerge([]byte("key"), curr, next)
|
||||
if !ok {
|
||||
t.Errorf("expected partial merge ok")
|
||||
}
|
||||
}
|
||||
actual := decodeCount(curr)
|
||||
if actual != test.out {
|
||||
t.Errorf("expected %d, got %d", test.out, actual)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func decodeCount(in []byte) uint64 {
|
||||
buf := bytes.NewBuffer(in)
|
||||
count, _ := binary.ReadUvarint(buf)
|
||||
return count
|
||||
}
|
382
index/upsidedown/row_test.go
Normal file
382
index/upsidedown/row_test.go
Normal file
|
@ -0,0 +1,382 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
)
|
||||
|
||||
func TestRows(t *testing.T) {
|
||||
tests := []struct {
|
||||
input UpsideDownCouchRow
|
||||
outKey []byte
|
||||
outVal []byte
|
||||
}{
|
||||
{
|
||||
NewVersionRow(1),
|
||||
[]byte{'v'},
|
||||
[]byte{0x1},
|
||||
},
|
||||
{
|
||||
NewFieldRow(0, "name"),
|
||||
[]byte{'f', 0, 0},
|
||||
[]byte{'n', 'a', 'm', 'e', ByteSeparator},
|
||||
},
|
||||
{
|
||||
NewFieldRow(1, "desc"),
|
||||
[]byte{'f', 1, 0},
|
||||
[]byte{'d', 'e', 's', 'c', ByteSeparator},
|
||||
},
|
||||
{
|
||||
NewFieldRow(513, "style"),
|
||||
[]byte{'f', 1, 2},
|
||||
[]byte{'s', 't', 'y', 'l', 'e', ByteSeparator},
|
||||
},
|
||||
{
|
||||
NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27),
|
||||
[]byte{'d', 0, 0, 'b', 'e', 'e', 'r'},
|
||||
[]byte{27},
|
||||
},
|
||||
{
|
||||
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("catz"), 3, 3.14),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'},
|
||||
[]byte{3, 195, 235, 163, 130, 4},
|
||||
},
|
||||
{
|
||||
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 195, 235, 163, 130, 4},
|
||||
},
|
||||
{
|
||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14, []*TermVector{{field: 0, pos: 1, start: 3, end: 11}, {field: 0, pos: 2, start: 23, end: 31}, {field: 0, pos: 3, start: 43, end: 51}}),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0},
|
||||
},
|
||||
// test larger varints
|
||||
{
|
||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{{field: 255, pos: 1, start: 3, end: 11}, {field: 0, pos: 2198, start: 23, end: 31}, {field: 0, pos: 3, start: 43, end: 51}}),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 0, 150, 17, 23, 31, 0, 0, 3, 43, 51, 0},
|
||||
},
|
||||
// test vectors with arrayPositions
|
||||
{
|
||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, {field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, {field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}}, nil),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, nil),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't'},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, []*BackIndexStoreEntry{{Field: proto.Uint32(3)}, {Field: proto.Uint32(4)}, {Field: proto.Uint32(5)}}),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5},
|
||||
},
|
||||
{
|
||||
NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")),
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
{
|
||||
NewStoredRow([]byte("budweiser"), 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")),
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
{
|
||||
NewInternalRow([]byte("mapping"), []byte(`{"mapping":"json content"}`)),
|
||||
[]byte{'i', 'm', 'a', 'p', 'p', 'i', 'n', 'g'},
|
||||
[]byte{'{', '"', 'm', 'a', 'p', 'p', 'i', 'n', 'g', '"', ':', '"', 'j', 's', 'o', 'n', ' ', 'c', 'o', 'n', 't', 'e', 'n', 't', '"', '}'},
|
||||
},
|
||||
}
|
||||
|
||||
// test going from struct to k/v bytes
|
||||
for i, test := range tests {
|
||||
rk := test.input.Key()
|
||||
if !reflect.DeepEqual(rk, test.outKey) {
|
||||
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
|
||||
}
|
||||
rv := test.input.Value()
|
||||
if !reflect.DeepEqual(rv, test.outVal) {
|
||||
t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i)
|
||||
}
|
||||
}
|
||||
|
||||
// now test going back from k/v bytes to struct
|
||||
for i, test := range tests {
|
||||
row, err := ParseFromKeyValue(test.outKey, test.outVal)
|
||||
if err != nil {
|
||||
t.Errorf("error parsking key/value: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(row, test.input) {
|
||||
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestInvalidRows(t *testing.T) {
|
||||
tests := []struct {
|
||||
key []byte
|
||||
val []byte
|
||||
}{
|
||||
// empty key
|
||||
{
|
||||
[]byte{},
|
||||
[]byte{},
|
||||
},
|
||||
// no such type q
|
||||
{
|
||||
[]byte{'q'},
|
||||
[]byte{},
|
||||
},
|
||||
// type v, invalid empty value
|
||||
{
|
||||
[]byte{'v'},
|
||||
[]byte{},
|
||||
},
|
||||
// type f, invalid key
|
||||
{
|
||||
[]byte{'f'},
|
||||
[]byte{},
|
||||
},
|
||||
// type f, valid key, invalid value
|
||||
{
|
||||
[]byte{'f', 0, 0},
|
||||
[]byte{},
|
||||
},
|
||||
// type t, invalid key (missing field)
|
||||
{
|
||||
[]byte{'t'},
|
||||
[]byte{},
|
||||
},
|
||||
// type t, invalid key (missing term)
|
||||
{
|
||||
[]byte{'t', 0, 0},
|
||||
[]byte{},
|
||||
},
|
||||
// type t, invalid key (missing id)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator},
|
||||
[]byte{},
|
||||
},
|
||||
// type t, invalid val (missing freq)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{},
|
||||
},
|
||||
// type t, invalid val (missing norm)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3},
|
||||
},
|
||||
// type t, invalid val (half missing tv field, full missing is valid (no term vectors))
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 25, 255},
|
||||
},
|
||||
// type t, invalid val (missing tv pos)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 25, 0},
|
||||
},
|
||||
// type t, invalid val (missing tv start)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 25, 0, 0},
|
||||
},
|
||||
// type t, invalid val (missing tv end)
|
||||
{
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{3, 25, 0, 0, 0},
|
||||
},
|
||||
// type b, invalid key (missing id)
|
||||
{
|
||||
[]byte{'b'},
|
||||
[]byte{'b', 'e', 'e', 'r', ByteSeparator, 0, 0},
|
||||
},
|
||||
// type b, invalid val (missing field)
|
||||
{
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{'g', 'a', 'r', 'b', 'a', 'g', 'e'},
|
||||
},
|
||||
// type s, invalid key (missing id)
|
||||
{
|
||||
[]byte{'s'},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
// type b, invalid val (missing field)
|
||||
{
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
_, err := ParseFromKeyValue(test.key, test.val)
|
||||
if err == nil {
|
||||
t.Errorf("expected error, got nil")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDictionaryRowValueBug197(t *testing.T) {
|
||||
// this was the smallest value that would trigger a crash
|
||||
dr := &DictionaryRow{
|
||||
field: 0,
|
||||
term: []byte("marty"),
|
||||
count: 72057594037927936,
|
||||
}
|
||||
dr.Value()
|
||||
// this is the maximum possible value
|
||||
dr = &DictionaryRow{
|
||||
field: 0,
|
||||
term: []byte("marty"),
|
||||
count: math.MaxUint64,
|
||||
}
|
||||
dr.Value()
|
||||
// neither of these should panic
|
||||
}
|
||||
|
||||
func BenchmarkTermFrequencyRowEncode(b *testing.B) {
|
||||
row := NewTermFrequencyRowWithTermVectors(
|
||||
[]byte{'b', 'e', 'e', 'r'},
|
||||
0,
|
||||
[]byte("budweiser"),
|
||||
3,
|
||||
3.14,
|
||||
[]*TermVector{
|
||||
{
|
||||
field: 0,
|
||||
pos: 1,
|
||||
start: 3,
|
||||
end: 11,
|
||||
},
|
||||
{
|
||||
field: 0,
|
||||
pos: 2,
|
||||
start: 23,
|
||||
end: 31,
|
||||
},
|
||||
{
|
||||
field: 0,
|
||||
pos: 3,
|
||||
start: 43,
|
||||
end: 51,
|
||||
},
|
||||
})
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
row.Key()
|
||||
row.Value()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTermFrequencyRowDecode(b *testing.B) {
|
||||
k := []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}
|
||||
v := []byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := NewTermFrequencyRowKV(k, v)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBackIndexRowEncode(b *testing.B) {
|
||||
field := uint32(1)
|
||||
t1 := "term1"
|
||||
row := NewBackIndexRow([]byte("beername"),
|
||||
[]*BackIndexTermsEntry{
|
||||
{
|
||||
Field: &field,
|
||||
Terms: []string{t1},
|
||||
},
|
||||
},
|
||||
[]*BackIndexStoreEntry{
|
||||
{
|
||||
Field: &field,
|
||||
},
|
||||
})
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
row.Key()
|
||||
row.Value()
|
||||
b.Logf("%#v", row.Value())
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBackIndexRowDecode(b *testing.B) {
|
||||
k := []byte{0x62, 0x62, 0x65, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65}
|
||||
v := []byte{0xa, 0x9, 0x8, 0x1, 0x12, 0x5, 0x74, 0x65, 0x72, 0x6d, 0x31, 0x12, 0x2, 0x8, 0x1}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := NewBackIndexRowKV(k, v)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkStoredRowEncode(b *testing.B) {
|
||||
row := NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer"))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
row.Key()
|
||||
row.Value()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkStoredRowDecode(b *testing.B) {
|
||||
k := []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0}
|
||||
v := []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, err := NewStoredRowKV(k, v)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVisitBackIndexRow(t *testing.T) {
|
||||
expected := map[uint32][]byte{
|
||||
0: []byte("beer"),
|
||||
1: []byte("beat"),
|
||||
}
|
||||
val := []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}
|
||||
err := visitBackIndexRow(val, func(field uint32, term []byte) {
|
||||
if reflect.DeepEqual(expected[field], term) {
|
||||
delete(expected, field)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(expected) > 0 {
|
||||
t.Errorf("expected visitor to see these but did not %v", expected)
|
||||
}
|
||||
}
|
55
index/upsidedown/stats.go
Normal file
55
index/upsidedown/stats.go
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type indexStat struct {
|
||||
updates, deletes, batches, errors uint64
|
||||
analysisTime, indexTime uint64
|
||||
termSearchersStarted uint64
|
||||
termSearchersFinished uint64
|
||||
numPlainTextBytesIndexed uint64
|
||||
i *UpsideDownCouch
|
||||
}
|
||||
|
||||
func (i *indexStat) statsMap() map[string]interface{} {
|
||||
m := map[string]interface{}{}
|
||||
m["updates"] = atomic.LoadUint64(&i.updates)
|
||||
m["deletes"] = atomic.LoadUint64(&i.deletes)
|
||||
m["batches"] = atomic.LoadUint64(&i.batches)
|
||||
m["errors"] = atomic.LoadUint64(&i.errors)
|
||||
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
|
||||
m["index_time"] = atomic.LoadUint64(&i.indexTime)
|
||||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
|
||||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
|
||||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
|
||||
|
||||
if o, ok := i.i.store.(store.KVStoreStats); ok {
|
||||
m["kv"] = o.StatsMap()
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (i *indexStat) MarshalJSON() ([]byte, error) {
|
||||
m := i.statsMap()
|
||||
return util.MarshalJSON(m)
|
||||
}
|
85
index/upsidedown/store/boltdb/iterator.go
Normal file
85
index/upsidedown/store/boltdb/iterator.go
Normal file
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
store *Store
|
||||
tx *bolt.Tx
|
||||
cursor *bolt.Cursor
|
||||
prefix []byte
|
||||
start []byte
|
||||
end []byte
|
||||
valid bool
|
||||
key []byte
|
||||
val []byte
|
||||
}
|
||||
|
||||
func (i *Iterator) updateValid() {
|
||||
i.valid = (i.key != nil)
|
||||
if i.valid {
|
||||
if i.prefix != nil {
|
||||
i.valid = bytes.HasPrefix(i.key, i.prefix)
|
||||
} else if i.end != nil {
|
||||
i.valid = bytes.Compare(i.key, i.end) < 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *Iterator) Seek(k []byte) {
|
||||
if i.start != nil && bytes.Compare(k, i.start) < 0 {
|
||||
k = i.start
|
||||
}
|
||||
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) {
|
||||
if bytes.Compare(k, i.prefix) < 0 {
|
||||
k = i.prefix
|
||||
} else {
|
||||
i.valid = false
|
||||
return
|
||||
}
|
||||
}
|
||||
i.key, i.val = i.cursor.Seek(k)
|
||||
i.updateValid()
|
||||
}
|
||||
|
||||
func (i *Iterator) Next() {
|
||||
i.key, i.val = i.cursor.Next()
|
||||
i.updateValid()
|
||||
}
|
||||
|
||||
func (i *Iterator) Current() ([]byte, []byte, bool) {
|
||||
return i.key, i.val, i.valid
|
||||
}
|
||||
|
||||
func (i *Iterator) Key() []byte {
|
||||
return i.key
|
||||
}
|
||||
|
||||
func (i *Iterator) Value() []byte {
|
||||
return i.val
|
||||
}
|
||||
|
||||
func (i *Iterator) Valid() bool {
|
||||
return i.valid
|
||||
}
|
||||
|
||||
func (i *Iterator) Close() error {
|
||||
return nil
|
||||
}
|
73
index/upsidedown/store/boltdb/reader.go
Normal file
73
index/upsidedown/store/boltdb/reader.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
store *Store
|
||||
tx *bolt.Tx
|
||||
bucket *bolt.Bucket
|
||||
}
|
||||
|
||||
func (r *Reader) Get(key []byte) ([]byte, error) {
|
||||
var rv []byte
|
||||
v := r.bucket.Get(key)
|
||||
if v != nil {
|
||||
rv = make([]byte, len(v))
|
||||
copy(rv, v)
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator {
|
||||
cursor := r.bucket.Cursor()
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
tx: r.tx,
|
||||
cursor: cursor,
|
||||
prefix: prefix,
|
||||
}
|
||||
|
||||
rv.Seek(prefix)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
cursor := r.bucket.Cursor()
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
tx: r.tx,
|
||||
cursor: cursor,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
|
||||
rv.Seek(start)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) Close() error {
|
||||
return r.tx.Rollback()
|
||||
}
|
28
index/upsidedown/store/boltdb/stats.go
Normal file
28
index/upsidedown/store/boltdb/stats.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
|
||||
type stats struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (s *stats) MarshalJSON() ([]byte, error) {
|
||||
bs := s.s.db.Stats()
|
||||
return util.MarshalJSON(bs)
|
||||
}
|
184
index/upsidedown/store/boltdb/store.go
Normal file
184
index/upsidedown/store/boltdb/store.go
Normal file
|
@ -0,0 +1,184 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the
|
||||
// following options:
|
||||
//
|
||||
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve".
|
||||
//
|
||||
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index
|
||||
// operations in exchange of losing integrity guarantees if indexation aborts
|
||||
// without closing the index. Use it when rebuilding indexes from zero.
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "boltdb"
|
||||
defaultCompactBatchSize = 100
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
path string
|
||||
bucket string
|
||||
db *bolt.DB
|
||||
noSync bool
|
||||
fillPercent float64
|
||||
mo store.MergeOperator
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
if path == "" {
|
||||
return nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
bucket, ok := config["bucket"].(string)
|
||||
if !ok {
|
||||
bucket = "bleve"
|
||||
}
|
||||
|
||||
noSync, _ := config["nosync"].(bool)
|
||||
|
||||
fillPercent, ok := config["fillPercent"].(float64)
|
||||
if !ok {
|
||||
fillPercent = bolt.DefaultFillPercent
|
||||
}
|
||||
|
||||
bo := &bolt.Options{}
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
bo.ReadOnly = ro
|
||||
}
|
||||
|
||||
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
|
||||
bo.InitialMmapSize = initialMmapSize
|
||||
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
|
||||
bo.InitialMmapSize = int(initialMmapSize)
|
||||
}
|
||||
|
||||
db, err := bolt.Open(path, 0600, bo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
db.NoSync = noSync
|
||||
|
||||
if !bo.ReadOnly {
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
_, err := tx.CreateBucketIfNotExists([]byte(bucket))
|
||||
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
rv := Store{
|
||||
path: path,
|
||||
bucket: bucket,
|
||||
db: db,
|
||||
mo: mo,
|
||||
noSync: noSync,
|
||||
fillPercent: fillPercent,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Close() error {
|
||||
return bs.db.Close()
|
||||
}
|
||||
|
||||
func (bs *Store) Reader() (store.KVReader, error) {
|
||||
tx, err := bs.db.Begin(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Reader{
|
||||
store: bs,
|
||||
tx: tx,
|
||||
bucket: tx.Bucket([]byte(bs.bucket)),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{
|
||||
store: bs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bs *Store) Stats() json.Marshaler {
|
||||
return &stats{
|
||||
s: bs,
|
||||
}
|
||||
}
|
||||
|
||||
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches)
|
||||
// Removing entries is a workaround for github issue #374.
|
||||
func (bs *Store) CompactWithBatchSize(batchSize int) error {
|
||||
for {
|
||||
cnt := 0
|
||||
err := bs.db.Batch(func(tx *bolt.Tx) error {
|
||||
c := tx.Bucket([]byte(bs.bucket)).Cursor()
|
||||
prefix := []byte("d")
|
||||
|
||||
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() {
|
||||
if bytes.Equal(v, []byte{0}) {
|
||||
cnt++
|
||||
if err := c.Delete(); err != nil {
|
||||
return err
|
||||
}
|
||||
if cnt == batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cnt == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround
|
||||
// for github issue #374.
|
||||
func (bs *Store) Compact() error {
|
||||
return bs.CompactWithBatchSize(defaultCompactBatchSize)
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
148
index/upsidedown/store/boltdb/store_test.go
Normal file
148
index/upsidedown/store/boltdb/store_test.go
Normal file
|
@ -0,0 +1,148 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !darwin || !arm64
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
|
||||
rv, err := New(mo, map[string]interface{}{"path": "test"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, s store.KVStore) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = os.RemoveAll("test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoltDBKVCrud(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBReaderIsolation(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBReaderOwnsGetBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBWriterOwnsBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBPrefixIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBPrefixIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBRangeIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBRangeIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBMerge(t *testing.T) {
|
||||
s := open(t, &test.TestMergeCounter{})
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
||||
|
||||
func TestBoltDBConfig(t *testing.T) {
|
||||
var tests = []struct {
|
||||
in map[string]interface{}
|
||||
path string
|
||||
bucket string
|
||||
noSync bool
|
||||
fillPercent float64
|
||||
}{
|
||||
{
|
||||
map[string]interface{}{"path": "test", "bucket": "mybucket", "nosync": true, "fillPercent": 0.75},
|
||||
"test",
|
||||
"mybucket",
|
||||
true,
|
||||
0.75,
|
||||
},
|
||||
{
|
||||
map[string]interface{}{"path": "test"},
|
||||
"test",
|
||||
"bleve",
|
||||
false,
|
||||
bolt.DefaultFillPercent,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
kv, err := New(nil, test.in)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
bs, ok := kv.(*Store)
|
||||
if !ok {
|
||||
t.Fatal("failed type assertion to *boltdb.Store")
|
||||
}
|
||||
if bs.path != test.path {
|
||||
t.Fatalf("path: expected %q, got %q", test.path, bs.path)
|
||||
}
|
||||
if bs.bucket != test.bucket {
|
||||
t.Fatalf("bucket: expected %q, got %q", test.bucket, bs.bucket)
|
||||
}
|
||||
if bs.noSync != test.noSync {
|
||||
t.Fatalf("noSync: expected %t, got %t", test.noSync, bs.noSync)
|
||||
}
|
||||
if bs.fillPercent != test.fillPercent {
|
||||
t.Fatalf("fillPercent: expected %f, got %f", test.fillPercent, bs.fillPercent)
|
||||
}
|
||||
cleanup(t, kv)
|
||||
}
|
||||
}
|
95
index/upsidedown/store/boltdb/writer.go
Normal file
95
index/upsidedown/store/boltdb/writer.go
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
store *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
return store.NewEmulatedBatch(w.store.mo)
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) {
|
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
tx, err := w.store.db.Begin(true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// defer function to ensure that once started,
|
||||
// we either Commit tx or Rollback
|
||||
defer func() {
|
||||
// if nothing went wrong, commit
|
||||
if err == nil {
|
||||
// careful to catch error here too
|
||||
err = tx.Commit()
|
||||
} else {
|
||||
// caller should see error that caused abort,
|
||||
// not success or failure of Rollback itself
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
|
||||
bucket := tx.Bucket([]byte(w.store.bucket))
|
||||
bucket.FillPercent = w.store.fillPercent
|
||||
|
||||
for k, mergeOps := range emulatedBatch.Merger.Merges {
|
||||
kb := []byte(k)
|
||||
existingVal := bucket.Get(kb)
|
||||
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps)
|
||||
if !fullMergeOk {
|
||||
err = fmt.Errorf("merge operator returned failure")
|
||||
return
|
||||
}
|
||||
err = bucket.Put(kb, mergedVal)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for _, op := range emulatedBatch.Ops {
|
||||
if op.V != nil {
|
||||
err = bucket.Put(op.K, op.V)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
err = bucket.Delete(op.K)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
return nil
|
||||
}
|
50
index/upsidedown/store/goleveldb/batch.go
Normal file
50
index/upsidedown/store/goleveldb/batch.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/goleveldb/leveldb"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Batch struct {
|
||||
store *Store
|
||||
merge *store.EmulatedMerge
|
||||
batch *leveldb.Batch
|
||||
}
|
||||
|
||||
func (b *Batch) Set(key, val []byte) {
|
||||
b.batch.Put(key, val)
|
||||
}
|
||||
|
||||
func (b *Batch) Delete(key []byte) {
|
||||
b.batch.Delete(key)
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(key, val []byte) {
|
||||
b.merge.Merge(key, val)
|
||||
}
|
||||
|
||||
func (b *Batch) Reset() {
|
||||
b.batch.Reset()
|
||||
b.merge = store.NewEmulatedMerge(b.store.mo)
|
||||
}
|
||||
|
||||
func (b *Batch) Close() error {
|
||||
b.batch.Reset()
|
||||
b.batch = nil
|
||||
b.merge = nil
|
||||
return nil
|
||||
}
|
66
index/upsidedown/store/goleveldb/config.go
Normal file
66
index/upsidedown/store/goleveldb/config.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/goleveldb/leveldb/filter"
|
||||
"github.com/blevesearch/goleveldb/leveldb/opt"
|
||||
)
|
||||
|
||||
func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) {
|
||||
|
||||
ro, ok := config["read_only"].(bool)
|
||||
if ok {
|
||||
o.ReadOnly = ro
|
||||
}
|
||||
|
||||
cim, ok := config["create_if_missing"].(bool)
|
||||
if ok {
|
||||
o.ErrorIfMissing = !cim
|
||||
}
|
||||
|
||||
eie, ok := config["error_if_exists"].(bool)
|
||||
if ok {
|
||||
o.ErrorIfExist = eie
|
||||
}
|
||||
|
||||
wbs, ok := config["write_buffer_size"].(float64)
|
||||
if ok {
|
||||
o.WriteBuffer = int(wbs)
|
||||
}
|
||||
|
||||
bs, ok := config["block_size"].(float64)
|
||||
if ok {
|
||||
o.BlockSize = int(bs)
|
||||
}
|
||||
|
||||
bri, ok := config["block_restart_interval"].(float64)
|
||||
if ok {
|
||||
o.BlockRestartInterval = int(bri)
|
||||
}
|
||||
|
||||
lcc, ok := config["lru_cache_capacity"].(float64)
|
||||
if ok {
|
||||
o.BlockCacheCapacity = int(lcc)
|
||||
}
|
||||
|
||||
bfbpk, ok := config["bloom_filter_bits_per_key"].(float64)
|
||||
if ok {
|
||||
bf := filter.NewBloomFilter(int(bfbpk))
|
||||
o.Filter = bf
|
||||
}
|
||||
|
||||
return o, nil
|
||||
}
|
54
index/upsidedown/store/goleveldb/iterator.go
Normal file
54
index/upsidedown/store/goleveldb/iterator.go
Normal file
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import "github.com/blevesearch/goleveldb/leveldb/iterator"
|
||||
|
||||
type Iterator struct {
|
||||
store *Store
|
||||
iterator iterator.Iterator
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Seek(key []byte) {
|
||||
ldi.iterator.Seek(key)
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Next() {
|
||||
ldi.iterator.Next()
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Current() ([]byte, []byte, bool) {
|
||||
if ldi.Valid() {
|
||||
return ldi.Key(), ldi.Value(), true
|
||||
}
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Key() []byte {
|
||||
return ldi.iterator.Key()
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Value() []byte {
|
||||
return ldi.iterator.Value()
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Valid() bool {
|
||||
return ldi.iterator.Valid()
|
||||
}
|
||||
|
||||
func (ldi *Iterator) Close() error {
|
||||
ldi.iterator.Release()
|
||||
return nil
|
||||
}
|
68
index/upsidedown/store/goleveldb/reader.go
Normal file
68
index/upsidedown/store/goleveldb/reader.go
Normal file
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/goleveldb/leveldb"
|
||||
"github.com/blevesearch/goleveldb/leveldb/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
store *Store
|
||||
snapshot *leveldb.Snapshot
|
||||
}
|
||||
|
||||
func (r *Reader) Get(key []byte) ([]byte, error) {
|
||||
b, err := r.snapshot.Get(key, r.store.defaultReadOptions)
|
||||
if err == leveldb.ErrNotFound {
|
||||
return nil, nil
|
||||
}
|
||||
return b, err
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator {
|
||||
byteRange := util.BytesPrefix(prefix)
|
||||
iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions)
|
||||
iter.First()
|
||||
rv := Iterator{
|
||||
store: r.store,
|
||||
iterator: iter,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
byteRange := &util.Range{
|
||||
Start: start,
|
||||
Limit: end,
|
||||
}
|
||||
iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions)
|
||||
iter.First()
|
||||
rv := Iterator{
|
||||
store: r.store,
|
||||
iterator: iter,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (r *Reader) Close() error {
|
||||
r.snapshot.Release()
|
||||
return nil
|
||||
}
|
152
index/upsidedown/store/goleveldb/store.go
Normal file
152
index/upsidedown/store/goleveldb/store.go
Normal file
|
@ -0,0 +1,152 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/goleveldb/leveldb"
|
||||
"github.com/blevesearch/goleveldb/leveldb/opt"
|
||||
"github.com/blevesearch/goleveldb/leveldb/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
const (
|
||||
Name = "goleveldb"
|
||||
defaultCompactBatchSize = 250
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
path string
|
||||
opts *opt.Options
|
||||
db *leveldb.DB
|
||||
mo store.MergeOperator
|
||||
|
||||
defaultWriteOptions *opt.WriteOptions
|
||||
defaultReadOptions *opt.ReadOptions
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
if path == "" {
|
||||
return nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
opts, err := applyConfig(&opt.Options{}, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
db, err := leveldb.OpenFile(path, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := Store{
|
||||
path: path,
|
||||
opts: opts,
|
||||
db: db,
|
||||
mo: mo,
|
||||
defaultReadOptions: &opt.ReadOptions{},
|
||||
defaultWriteOptions: &opt.WriteOptions{},
|
||||
}
|
||||
rv.defaultWriteOptions.Sync = true
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (ldbs *Store) Close() error {
|
||||
return ldbs.db.Close()
|
||||
}
|
||||
|
||||
func (ldbs *Store) Reader() (store.KVReader, error) {
|
||||
snapshot, _ := ldbs.db.GetSnapshot()
|
||||
return &Reader{
|
||||
store: ldbs,
|
||||
snapshot: snapshot,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (ldbs *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{
|
||||
store: ldbs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches), then
|
||||
// compacts the underlying goleveldb store. Removing entries is a workaround for github issue #374.
|
||||
func (ldbs *Store) CompactWithBatchSize(batchSize int) error {
|
||||
// workaround for github issue #374 - remove DictionaryTerm keys with count=0
|
||||
batch := &leveldb.Batch{}
|
||||
for {
|
||||
t, err := ldbs.db.OpenTransaction()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
iter := t.NewIterator(util.BytesPrefix([]byte("d")), ldbs.defaultReadOptions)
|
||||
|
||||
for iter.Next() {
|
||||
if bytes.Equal(iter.Value(), []byte{0}) {
|
||||
k := append([]byte{}, iter.Key()...)
|
||||
batch.Delete(k)
|
||||
}
|
||||
if batch.Len() == batchSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
iter.Release()
|
||||
if iter.Error() != nil {
|
||||
t.Discard()
|
||||
return iter.Error()
|
||||
}
|
||||
|
||||
if batch.Len() > 0 {
|
||||
err := t.Write(batch, ldbs.defaultWriteOptions)
|
||||
if err != nil {
|
||||
t.Discard()
|
||||
return err
|
||||
}
|
||||
err = t.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
t.Discard()
|
||||
break
|
||||
}
|
||||
batch.Reset()
|
||||
}
|
||||
|
||||
return ldbs.db.CompactRange(util.Range{Start: nil, Limit: nil})
|
||||
}
|
||||
|
||||
// Compact compacts the underlying goleveldb store. The current implementation includes a workaround
|
||||
// for github issue #374 (see CompactWithBatchSize).
|
||||
func (ldbs *Store) Compact() error {
|
||||
return ldbs.CompactWithBatchSize(defaultCompactBatchSize)
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
99
index/upsidedown/store/goleveldb/store_test.go
Normal file
99
index/upsidedown/store/goleveldb/store_test.go
Normal file
|
@ -0,0 +1,99 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
)
|
||||
|
||||
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
|
||||
rv, err := New(mo, map[string]interface{}{
|
||||
"path": "test",
|
||||
"create_if_missing": true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, s store.KVStore) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = os.RemoveAll("test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoLevelDBKVCrud(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBReaderIsolation(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBReaderOwnsGetBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBWriterOwnsBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBPrefixIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBPrefixIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBRangeIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBRangeIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestGoLevelDBMerge(t *testing.T) {
|
||||
s := open(t, &test.TestMergeCounter{})
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
68
index/upsidedown/store/goleveldb/writer.go
Normal file
68
index/upsidedown/store/goleveldb/writer.go
Normal file
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package goleveldb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/goleveldb/leveldb"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
store *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
rv := Batch{
|
||||
store: w.store,
|
||||
merge: store.NewEmulatedMerge(w.store.mo),
|
||||
batch: new(leveldb.Batch),
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(b store.KVBatch) error {
|
||||
batch, ok := b.(*Batch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
// first process merges
|
||||
for k, mergeOps := range batch.merge.Merges {
|
||||
kb := []byte(k)
|
||||
existingVal, err := w.store.db.Get(kb, w.store.defaultReadOptions)
|
||||
if err != nil && err != leveldb.ErrNotFound {
|
||||
return err
|
||||
}
|
||||
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps)
|
||||
if !fullMergeOk {
|
||||
return fmt.Errorf("merge operator returned failure")
|
||||
}
|
||||
// add the final merge to this batch
|
||||
batch.batch.Put(kb, mergedVal)
|
||||
}
|
||||
|
||||
// now execute the batch
|
||||
return w.store.db.Write(batch.batch, w.store.defaultWriteOptions)
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
return nil
|
||||
}
|
152
index/upsidedown/store/gtreap/iterator.go
Normal file
152
index/upsidedown/store/gtreap/iterator.go
Normal file
|
@ -0,0 +1,152 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
|
||||
"github.com/blevesearch/gtreap"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
t *gtreap.Treap
|
||||
|
||||
m sync.Mutex
|
||||
cancelCh chan struct{}
|
||||
nextCh chan *Item
|
||||
curr *Item
|
||||
currOk bool
|
||||
|
||||
prefix []byte
|
||||
start []byte
|
||||
end []byte
|
||||
}
|
||||
|
||||
func (w *Iterator) Seek(k []byte) {
|
||||
if w.start != nil && bytes.Compare(k, w.start) < 0 {
|
||||
k = w.start
|
||||
}
|
||||
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) {
|
||||
if bytes.Compare(k, w.prefix) < 0 {
|
||||
k = w.prefix
|
||||
} else {
|
||||
var end []byte
|
||||
for i := len(w.prefix) - 1; i >= 0; i-- {
|
||||
c := w.prefix[i]
|
||||
if c < 0xff {
|
||||
end = make([]byte, i+1)
|
||||
copy(end, w.prefix)
|
||||
end[i] = c + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
k = end
|
||||
}
|
||||
}
|
||||
w.restart(&Item{k: k})
|
||||
}
|
||||
|
||||
func (w *Iterator) restart(start *Item) *Iterator {
|
||||
cancelCh := make(chan struct{})
|
||||
nextCh := make(chan *Item, 1)
|
||||
|
||||
w.m.Lock()
|
||||
if w.cancelCh != nil {
|
||||
close(w.cancelCh)
|
||||
}
|
||||
w.cancelCh = cancelCh
|
||||
w.nextCh = nextCh
|
||||
w.curr = nil
|
||||
w.currOk = false
|
||||
w.m.Unlock()
|
||||
|
||||
go func() {
|
||||
if start != nil {
|
||||
w.t.VisitAscend(start, func(itm gtreap.Item) bool {
|
||||
select {
|
||||
case <-cancelCh:
|
||||
return false
|
||||
case nextCh <- itm.(*Item):
|
||||
return true
|
||||
}
|
||||
})
|
||||
}
|
||||
close(nextCh)
|
||||
}()
|
||||
|
||||
w.Next()
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (w *Iterator) Next() {
|
||||
w.m.Lock()
|
||||
nextCh := w.nextCh
|
||||
w.m.Unlock()
|
||||
w.curr, w.currOk = <-nextCh
|
||||
}
|
||||
|
||||
func (w *Iterator) Current() ([]byte, []byte, bool) {
|
||||
w.m.Lock()
|
||||
defer w.m.Unlock()
|
||||
if !w.currOk || w.curr == nil {
|
||||
return nil, nil, false
|
||||
}
|
||||
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) {
|
||||
return nil, nil, false
|
||||
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 {
|
||||
return nil, nil, false
|
||||
}
|
||||
return w.curr.k, w.curr.v, w.currOk
|
||||
}
|
||||
|
||||
func (w *Iterator) Key() []byte {
|
||||
k, _, ok := w.Current()
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func (w *Iterator) Value() []byte {
|
||||
_, v, ok := w.Current()
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (w *Iterator) Valid() bool {
|
||||
_, _, ok := w.Current()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (w *Iterator) Close() error {
|
||||
w.m.Lock()
|
||||
if w.cancelCh != nil {
|
||||
close(w.cancelCh)
|
||||
}
|
||||
w.cancelCh = nil
|
||||
w.nextCh = nil
|
||||
w.curr = nil
|
||||
w.currOk = false
|
||||
w.m.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
66
index/upsidedown/store/gtreap/reader.go
Normal file
66
index/upsidedown/store/gtreap/reader.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
"github.com/blevesearch/gtreap"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
t *gtreap.Treap
|
||||
}
|
||||
|
||||
func (w *Reader) Get(k []byte) (v []byte, err error) {
|
||||
var rv []byte
|
||||
itm := w.t.Get(&Item{k: k})
|
||||
if itm != nil {
|
||||
rv = make([]byte, len(itm.(*Item).v))
|
||||
copy(rv, itm.(*Item).v)
|
||||
return rv, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (w *Reader) PrefixIterator(k []byte) store.KVIterator {
|
||||
rv := Iterator{
|
||||
t: w.t,
|
||||
prefix: k,
|
||||
}
|
||||
rv.restart(&Item{k: k})
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
rv := Iterator{
|
||||
t: w.t,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
rv.restart(&Item{k: start})
|
||||
return &rv
|
||||
}
|
||||
|
||||
func (w *Reader) Close() error {
|
||||
return nil
|
||||
}
|
85
index/upsidedown/store/gtreap/store.go
Normal file
85
index/upsidedown/store/gtreap/store.go
Normal file
|
@ -0,0 +1,85 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/gtreap"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
const Name = "gtreap"
|
||||
|
||||
type Store struct {
|
||||
m sync.Mutex
|
||||
t *gtreap.Treap
|
||||
mo store.MergeOperator
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
k []byte
|
||||
v []byte
|
||||
}
|
||||
|
||||
func itemCompare(a, b interface{}) int {
|
||||
return bytes.Compare(a.(*Item).k, b.(*Item).k)
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
if path != "" {
|
||||
return nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
rv := Store{
|
||||
t: gtreap.NewTreap(itemCompare),
|
||||
mo: mo,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *Store) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) Reader() (store.KVReader, error) {
|
||||
s.m.Lock()
|
||||
t := s.t
|
||||
s.m.Unlock()
|
||||
return &Reader{t: t}, nil
|
||||
}
|
||||
|
||||
func (s *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{s: s}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
93
index/upsidedown/store/gtreap/store_test.go
Normal file
93
index/upsidedown/store/gtreap/store_test.go
Normal file
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
)
|
||||
|
||||
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
|
||||
rv, err := New(mo, map[string]interface{}{
|
||||
"path": "",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, s store.KVStore) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGTreapKVCrud(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapReaderIsolation(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapReaderOwnsGetBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapWriterOwnsBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapPrefixIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapPrefixIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapRangeIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapRangeIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestGTreapMerge(t *testing.T) {
|
||||
s := open(t, &test.TestMergeCounter{})
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
76
index/upsidedown/store/gtreap/writer.go
Normal file
76
index/upsidedown/store/gtreap/writer.go
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
return store.NewEmulatedBatch(w.s.mo)
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) error {
|
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
w.s.m.Lock()
|
||||
for k, mergeOps := range emulatedBatch.Merger.Merges {
|
||||
kb := []byte(k)
|
||||
var existingVal []byte
|
||||
existingItem := w.s.t.Get(&Item{k: kb})
|
||||
if existingItem != nil {
|
||||
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v
|
||||
}
|
||||
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps)
|
||||
if !fullMergeOk {
|
||||
return fmt.Errorf("merge operator returned failure")
|
||||
}
|
||||
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int())
|
||||
}
|
||||
|
||||
for _, op := range emulatedBatch.Ops {
|
||||
if op.V != nil {
|
||||
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int())
|
||||
} else {
|
||||
w.s.t = w.s.t.Delete(&Item{k: op.K})
|
||||
}
|
||||
}
|
||||
w.s.m.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
w.s = nil
|
||||
return nil
|
||||
}
|
46
index/upsidedown/store/metrics/batch.go
Normal file
46
index/upsidedown/store/metrics/batch.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import store "github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
type Batch struct {
|
||||
s *Store
|
||||
o store.KVBatch
|
||||
}
|
||||
|
||||
func (b *Batch) Set(key, val []byte) {
|
||||
b.o.Set(key, val)
|
||||
}
|
||||
|
||||
func (b *Batch) Delete(key []byte) {
|
||||
b.o.Delete(key)
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(key, val []byte) {
|
||||
b.s.timerBatchMerge.Time(func() {
|
||||
b.o.Merge(key, val)
|
||||
})
|
||||
}
|
||||
|
||||
func (b *Batch) Reset() {
|
||||
b.o.Reset()
|
||||
}
|
||||
|
||||
func (b *Batch) Close() error {
|
||||
err := b.o.Close()
|
||||
b.o = nil
|
||||
return err
|
||||
}
|
58
index/upsidedown/store/metrics/iterator.go
Normal file
58
index/upsidedown/store/metrics/iterator.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import store "github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
type Iterator struct {
|
||||
s *Store
|
||||
o store.KVIterator
|
||||
}
|
||||
|
||||
func (i *Iterator) Seek(x []byte) {
|
||||
i.s.timerIteratorSeek.Time(func() {
|
||||
i.o.Seek(x)
|
||||
})
|
||||
}
|
||||
|
||||
func (i *Iterator) Next() {
|
||||
i.s.timerIteratorNext.Time(func() {
|
||||
i.o.Next()
|
||||
})
|
||||
}
|
||||
|
||||
func (i *Iterator) Current() ([]byte, []byte, bool) {
|
||||
return i.o.Current()
|
||||
}
|
||||
|
||||
func (i *Iterator) Key() []byte {
|
||||
return i.o.Key()
|
||||
}
|
||||
|
||||
func (i *Iterator) Value() []byte {
|
||||
return i.o.Value()
|
||||
}
|
||||
|
||||
func (i *Iterator) Valid() bool {
|
||||
return i.o.Valid()
|
||||
}
|
||||
|
||||
func (i *Iterator) Close() error {
|
||||
err := i.o.Close()
|
||||
if err != nil {
|
||||
i.s.AddError("Iterator.Close", err, nil)
|
||||
}
|
||||
return err
|
||||
}
|
141
index/upsidedown/store/metrics/metrics_test.go
Normal file
141
index/upsidedown/store/metrics/metrics_test.go
Normal file
|
@ -0,0 +1,141 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
|
||||
)
|
||||
|
||||
func TestMetricsStore(t *testing.T) {
|
||||
_, err := New(nil, map[string]interface{}{})
|
||||
if err == nil {
|
||||
t.Errorf("expected err when bad config")
|
||||
}
|
||||
|
||||
_, err = New(nil, map[string]interface{}{
|
||||
"kvStoreName_actual": "some-invalid-kvstore-name",
|
||||
})
|
||||
if err == nil {
|
||||
t.Errorf("expected err when unknown kvStoreName_actual")
|
||||
}
|
||||
|
||||
s, err := New(nil, map[string]interface{}{
|
||||
"kvStoreName_actual": gtreap.Name,
|
||||
"path": "",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b := bytes.NewBuffer(nil)
|
||||
err = s.(*Store).WriteJSON(b)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if b.Len() <= 0 {
|
||||
t.Errorf("expected some output from WriteJSON")
|
||||
}
|
||||
var m map[string]interface{}
|
||||
err = json.Unmarshal(b.Bytes(), &m)
|
||||
if err != nil {
|
||||
t.Errorf("expected WriteJSON to be unmarshallable")
|
||||
}
|
||||
if len(m) == 0 {
|
||||
t.Errorf("expected some entries")
|
||||
}
|
||||
|
||||
b = bytes.NewBuffer(nil)
|
||||
s.(*Store).WriteCSVHeader(b)
|
||||
if b.Len() <= 0 {
|
||||
t.Errorf("expected some output from WriteCSVHeader")
|
||||
}
|
||||
|
||||
b = bytes.NewBuffer(nil)
|
||||
s.(*Store).WriteCSV(b)
|
||||
if b.Len() <= 0 {
|
||||
t.Errorf("expected some output from WriteCSV")
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrors(t *testing.T) {
|
||||
s, err := New(nil, map[string]interface{}{
|
||||
"kvStoreName_actual": gtreap.Name,
|
||||
"path": "",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
x, ok := s.(*Store)
|
||||
if !ok {
|
||||
t.Errorf("expecting a Store")
|
||||
}
|
||||
|
||||
x.AddError("foo", fmt.Errorf("Foo"), []byte("fooKey"))
|
||||
x.AddError("bar", fmt.Errorf("Bar"), nil)
|
||||
x.AddError("baz", fmt.Errorf("Baz"), []byte("bazKey"))
|
||||
|
||||
b := bytes.NewBuffer(nil)
|
||||
err = x.WriteJSON(b)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var m map[string]interface{}
|
||||
err = json.Unmarshal(b.Bytes(), &m)
|
||||
if err != nil {
|
||||
t.Errorf("expected unmarshallable writeJSON, err: %v, b: %s",
|
||||
err, b.Bytes())
|
||||
}
|
||||
|
||||
errorsi, ok := m["Errors"]
|
||||
if !ok || errorsi == nil {
|
||||
t.Errorf("expected errorsi")
|
||||
}
|
||||
errors, ok := errorsi.([]interface{})
|
||||
if !ok || errors == nil {
|
||||
t.Errorf("expected errorsi is array")
|
||||
}
|
||||
if len(errors) != 3 {
|
||||
t.Errorf("expected errors len 3")
|
||||
}
|
||||
|
||||
e := errors[0].(map[string]interface{})
|
||||
if e["Op"].(string) != "foo" ||
|
||||
e["Err"].(string) != "Foo" ||
|
||||
len(e["Time"].(string)) < 10 ||
|
||||
e["Key"].(string) != "fooKey" {
|
||||
t.Errorf("expected foo, %#v", e)
|
||||
}
|
||||
e = errors[1].(map[string]interface{})
|
||||
if e["Op"].(string) != "bar" ||
|
||||
e["Err"].(string) != "Bar" ||
|
||||
len(e["Time"].(string)) < 10 ||
|
||||
e["Key"].(string) != "" {
|
||||
t.Errorf("expected bar, %#v", e)
|
||||
}
|
||||
e = errors[2].(map[string]interface{})
|
||||
if e["Op"].(string) != "baz" ||
|
||||
e["Err"].(string) != "Baz" ||
|
||||
len(e["Time"].(string)) < 10 ||
|
||||
e["Key"].(string) != "bazKey" {
|
||||
t.Errorf("expected baz, %#v", e)
|
||||
}
|
||||
}
|
64
index/upsidedown/store/metrics/reader.go
Normal file
64
index/upsidedown/store/metrics/reader.go
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import store "github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
type Reader struct {
|
||||
s *Store
|
||||
o store.KVReader
|
||||
}
|
||||
|
||||
func (r *Reader) Get(key []byte) (v []byte, err error) {
|
||||
r.s.timerReaderGet.Time(func() {
|
||||
v, err = r.o.Get(key)
|
||||
if err != nil {
|
||||
r.s.AddError("Reader.Get", err, key)
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) (vals [][]byte, err error) {
|
||||
r.s.timerReaderMultiGet.Time(func() {
|
||||
vals, err = r.o.MultiGet(keys)
|
||||
if err != nil {
|
||||
r.s.AddError("Reader.MultiGet", err, nil)
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (r *Reader) PrefixIterator(prefix []byte) (i store.KVIterator) {
|
||||
r.s.timerReaderPrefixIterator.Time(func() {
|
||||
i = &Iterator{s: r.s, o: r.o.PrefixIterator(prefix)}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) (i store.KVIterator) {
|
||||
r.s.timerReaderRangeIterator.Time(func() {
|
||||
i = &Iterator{s: r.s, o: r.o.RangeIterator(start, end)}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (r *Reader) Close() error {
|
||||
err := r.o.Close()
|
||||
if err != nil {
|
||||
r.s.AddError("Reader.Close", err, nil)
|
||||
}
|
||||
return err
|
||||
}
|
50
index/upsidedown/store/metrics/stats.go
Normal file
50
index/upsidedown/store/metrics/stats.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type stats struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (s *stats) statsMap() map[string]interface{} {
|
||||
ms := map[string]interface{}{}
|
||||
|
||||
ms["metrics"] = map[string]interface{}{
|
||||
"reader_get": TimerMap(s.s.timerReaderGet),
|
||||
"reader_multi_get": TimerMap(s.s.timerReaderMultiGet),
|
||||
"reader_prefix_iterator": TimerMap(s.s.timerReaderPrefixIterator),
|
||||
"reader_range_iterator": TimerMap(s.s.timerReaderRangeIterator),
|
||||
"writer_execute_batch": TimerMap(s.s.timerWriterExecuteBatch),
|
||||
"iterator_seek": TimerMap(s.s.timerIteratorSeek),
|
||||
"iterator_next": TimerMap(s.s.timerIteratorNext),
|
||||
"batch_merge": TimerMap(s.s.timerBatchMerge),
|
||||
}
|
||||
|
||||
if o, ok := s.s.o.(store.KVStoreStats); ok {
|
||||
ms["kv"] = o.StatsMap()
|
||||
}
|
||||
|
||||
return ms
|
||||
}
|
||||
|
||||
func (s *stats) MarshalJSON() ([]byte, error) {
|
||||
m := s.statsMap()
|
||||
return util.MarshalJSON(m)
|
||||
}
|
277
index/upsidedown/store/metrics/store.go
Normal file
277
index/upsidedown/store/metrics/store.go
Normal file
|
@ -0,0 +1,277 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package metrics provides a bleve.store.KVStore implementation that
|
||||
// wraps another, real KVStore implementation, and uses go-metrics to
|
||||
// track runtime performance metrics.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
"github.com/blevesearch/go-metrics"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
const Name = "metrics"
|
||||
|
||||
type Store struct {
|
||||
o store.KVStore
|
||||
|
||||
timerReaderGet metrics.Timer
|
||||
timerReaderMultiGet metrics.Timer
|
||||
timerReaderPrefixIterator metrics.Timer
|
||||
timerReaderRangeIterator metrics.Timer
|
||||
timerWriterExecuteBatch metrics.Timer
|
||||
timerIteratorSeek metrics.Timer
|
||||
timerIteratorNext metrics.Timer
|
||||
timerBatchMerge metrics.Timer
|
||||
|
||||
m sync.Mutex // Protects the fields that follow.
|
||||
errors *list.List // Capped list of StoreError's.
|
||||
|
||||
s *stats
|
||||
}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
|
||||
name, ok := config["kvStoreName_actual"].(string)
|
||||
if !ok || name == "" {
|
||||
return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+
|
||||
" config: %#v", config)
|
||||
}
|
||||
|
||||
if name == Name {
|
||||
return nil, fmt.Errorf("metrics: circular kvStoreName_actual")
|
||||
}
|
||||
|
||||
ctr := registry.KVStoreConstructorByName(name)
|
||||
if ctr == nil {
|
||||
return nil, fmt.Errorf("metrics: no kv store constructor,"+
|
||||
" kvStoreName_actual: %s", name)
|
||||
}
|
||||
|
||||
kvs, err := ctr(mo, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := &Store{
|
||||
o: kvs,
|
||||
|
||||
timerReaderGet: metrics.NewTimer(),
|
||||
timerReaderMultiGet: metrics.NewTimer(),
|
||||
timerReaderPrefixIterator: metrics.NewTimer(),
|
||||
timerReaderRangeIterator: metrics.NewTimer(),
|
||||
timerWriterExecuteBatch: metrics.NewTimer(),
|
||||
timerIteratorSeek: metrics.NewTimer(),
|
||||
timerIteratorNext: metrics.NewTimer(),
|
||||
timerBatchMerge: metrics.NewTimer(),
|
||||
|
||||
errors: list.New(),
|
||||
}
|
||||
|
||||
rv.s = &stats{s: rv}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) Close() error {
|
||||
return s.o.Close()
|
||||
}
|
||||
|
||||
func (s *Store) Reader() (store.KVReader, error) {
|
||||
o, err := s.o.Reader()
|
||||
if err != nil {
|
||||
s.AddError("Reader", err, nil)
|
||||
return nil, err
|
||||
}
|
||||
return &Reader{s: s, o: o}, nil
|
||||
}
|
||||
|
||||
func (s *Store) Writer() (store.KVWriter, error) {
|
||||
o, err := s.o.Writer()
|
||||
if err != nil {
|
||||
s.AddError("Writer", err, nil)
|
||||
return nil, err
|
||||
}
|
||||
return &Writer{s: s, o: o}, nil
|
||||
}
|
||||
|
||||
// Metric specific code below:
|
||||
|
||||
const MaxErrors = 100
|
||||
|
||||
type StoreError struct {
|
||||
Time string
|
||||
Op string
|
||||
Err string
|
||||
Key string
|
||||
}
|
||||
|
||||
func (s *Store) AddError(op string, err error, key []byte) {
|
||||
e := &StoreError{
|
||||
Time: time.Now().Format(time.RFC3339Nano),
|
||||
Op: op,
|
||||
Err: fmt.Sprintf("%v", err),
|
||||
Key: string(key),
|
||||
}
|
||||
|
||||
s.m.Lock()
|
||||
for s.errors.Len() >= MaxErrors {
|
||||
s.errors.Remove(s.errors.Front())
|
||||
}
|
||||
s.errors.PushBack(e)
|
||||
s.m.Unlock()
|
||||
}
|
||||
|
||||
func (s *Store) WriteJSON(w io.Writer) (err error) {
|
||||
_, err = w.Write([]byte(`{"TimerReaderGet":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerReaderGet)
|
||||
_, err = w.Write([]byte(`,"TimerReaderMultiGet":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerReaderMultiGet)
|
||||
_, err = w.Write([]byte(`,"TimerReaderPrefixIterator":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerReaderPrefixIterator)
|
||||
_, err = w.Write([]byte(`,"TimerReaderRangeIterator":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerReaderRangeIterator)
|
||||
_, err = w.Write([]byte(`,"TimerWriterExecuteBatch":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerWriterExecuteBatch)
|
||||
_, err = w.Write([]byte(`,"TimerIteratorSeek":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerIteratorSeek)
|
||||
_, err = w.Write([]byte(`,"TimerIteratorNext":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerIteratorNext)
|
||||
_, err = w.Write([]byte(`,"TimerBatchMerge":`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
WriteTimerJSON(w, s.timerBatchMerge)
|
||||
|
||||
_, err = w.Write([]byte(`,"Errors":[`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
s.m.Lock()
|
||||
defer s.m.Unlock()
|
||||
e := s.errors.Front()
|
||||
i := 0
|
||||
for e != nil {
|
||||
se, ok := e.Value.(*StoreError)
|
||||
if ok && se != nil {
|
||||
if i > 0 {
|
||||
_, err = w.Write([]byte(","))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
var buf []byte
|
||||
buf, err = util.MarshalJSON(se)
|
||||
if err == nil {
|
||||
_, err = w.Write(buf)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
e = e.Next()
|
||||
i = i + 1
|
||||
}
|
||||
_, err = w.Write([]byte(`]`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// see if the underlying implementation has its own stats
|
||||
if o, ok := s.o.(store.KVStoreStats); ok {
|
||||
storeStats := o.Stats()
|
||||
var storeBytes []byte
|
||||
storeBytes, err = util.MarshalJSON(storeStats)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, err = fmt.Fprintf(w, `, "store": %s`, string(storeBytes))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
_, err = w.Write([]byte(`}`))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Store) WriteCSVHeader(w io.Writer) {
|
||||
WriteTimerCSVHeader(w, "TimerReaderGet")
|
||||
WriteTimerCSVHeader(w, "TimerReaderPrefixIterator")
|
||||
WriteTimerCSVHeader(w, "TimerReaderRangeIterator")
|
||||
WriteTimerCSVHeader(w, "TimerWtierExecuteBatch")
|
||||
WriteTimerCSVHeader(w, "TimerIteratorSeek")
|
||||
WriteTimerCSVHeader(w, "TimerIteratorNext")
|
||||
WriteTimerCSVHeader(w, "TimerBatchMerge")
|
||||
}
|
||||
|
||||
func (s *Store) WriteCSV(w io.Writer) {
|
||||
WriteTimerCSV(w, s.timerReaderGet)
|
||||
WriteTimerCSV(w, s.timerReaderPrefixIterator)
|
||||
WriteTimerCSV(w, s.timerReaderRangeIterator)
|
||||
WriteTimerCSV(w, s.timerWriterExecuteBatch)
|
||||
WriteTimerCSV(w, s.timerIteratorSeek)
|
||||
WriteTimerCSV(w, s.timerIteratorNext)
|
||||
WriteTimerCSV(w, s.timerBatchMerge)
|
||||
}
|
||||
|
||||
func (s *Store) Stats() json.Marshaler {
|
||||
return s.s
|
||||
}
|
||||
|
||||
func (s *Store) StatsMap() map[string]interface{} {
|
||||
return s.s.statsMap()
|
||||
}
|
95
index/upsidedown/store/metrics/store_test.go
Normal file
95
index/upsidedown/store/metrics/store_test.go
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
)
|
||||
|
||||
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
|
||||
rv, err := New(mo, map[string]interface{}{
|
||||
"kvStoreName_actual": gtreap.Name,
|
||||
"path": "",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, s store.KVStore) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsKVCrud(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsReaderIsolation(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsReaderOwnsGetBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsWriterOwnsBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsPrefixIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsPrefixIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsRangeIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsRangeIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMetricsMerge(t *testing.T) {
|
||||
s := open(t, &test.TestMergeCounter{})
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
135
index/upsidedown/store/metrics/util.go
Normal file
135
index/upsidedown/store/metrics/util.go
Normal file
|
@ -0,0 +1,135 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/go-metrics"
|
||||
)
|
||||
|
||||
// NOTE: This is copy & pasted from cbft as otherwise there
|
||||
// would be an import cycle.
|
||||
|
||||
var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999}
|
||||
|
||||
func TimerMap(timer metrics.Timer) map[string]interface{} {
|
||||
|
||||
rv := make(map[string]interface{})
|
||||
t := timer.Snapshot()
|
||||
p := t.Percentiles(timerPercentiles)
|
||||
|
||||
percentileKeys := []string{"median", "75%", "95%", "99%", "99.9%"}
|
||||
percentiles := make(map[string]interface{})
|
||||
for i, pi := range p {
|
||||
if !isNanOrInf(pi) {
|
||||
percentileKey := percentileKeys[i]
|
||||
percentiles[percentileKey] = pi
|
||||
}
|
||||
}
|
||||
|
||||
rateKeys := []string{"1-min", "5-min", "15-min", "mean"}
|
||||
rates := make(map[string]interface{})
|
||||
for i, ri := range []float64{t.Rate1(), t.Rate5(), t.Rate15(), t.RateMean()} {
|
||||
if !isNanOrInf(ri) {
|
||||
rateKey := rateKeys[i]
|
||||
rates[rateKey] = ri
|
||||
}
|
||||
}
|
||||
|
||||
rv["count"] = t.Count()
|
||||
rv["min"] = t.Min()
|
||||
rv["max"] = t.Max()
|
||||
mean := t.Mean()
|
||||
if !isNanOrInf(mean) {
|
||||
rv["mean"] = mean
|
||||
}
|
||||
stddev := t.StdDev()
|
||||
if !isNanOrInf(stddev) {
|
||||
rv["stddev"] = stddev
|
||||
}
|
||||
rv["percentiles"] = percentiles
|
||||
rv["rates"] = rates
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func isNanOrInf(v float64) bool {
|
||||
if math.IsNaN(v) || math.IsInf(v, 0) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func WriteTimerJSON(w io.Writer, timer metrics.Timer) {
|
||||
t := timer.Snapshot()
|
||||
p := t.Percentiles(timerPercentiles)
|
||||
|
||||
fmt.Fprintf(w, `{"count":%9d,`, t.Count())
|
||||
fmt.Fprintf(w, `"min":%9d,`, t.Min())
|
||||
fmt.Fprintf(w, `"max":%9d,`, t.Max())
|
||||
fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean())
|
||||
fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev())
|
||||
fmt.Fprintf(w, `"percentiles":{`)
|
||||
fmt.Fprintf(w, `"median":%12.2f,`, p[0])
|
||||
fmt.Fprintf(w, `"75%%":%12.2f,`, p[1])
|
||||
fmt.Fprintf(w, `"95%%":%12.2f,`, p[2])
|
||||
fmt.Fprintf(w, `"99%%":%12.2f,`, p[3])
|
||||
fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4])
|
||||
fmt.Fprintf(w, `"rates":{`)
|
||||
fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1())
|
||||
fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5())
|
||||
fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15())
|
||||
fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean())
|
||||
}
|
||||
|
||||
func WriteTimerCSVHeader(w io.Writer, prefix string) {
|
||||
fmt.Fprintf(w, "%s-count,", prefix)
|
||||
fmt.Fprintf(w, "%s-min,", prefix)
|
||||
fmt.Fprintf(w, "%s-max,", prefix)
|
||||
fmt.Fprintf(w, "%s-mean,", prefix)
|
||||
fmt.Fprintf(w, "%s-stddev,", prefix)
|
||||
fmt.Fprintf(w, "%s-percentile-50%%,", prefix)
|
||||
fmt.Fprintf(w, "%s-percentile-75%%,", prefix)
|
||||
fmt.Fprintf(w, "%s-percentile-95%%,", prefix)
|
||||
fmt.Fprintf(w, "%s-percentile-99%%,", prefix)
|
||||
fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix)
|
||||
fmt.Fprintf(w, "%s-rate-1-min,", prefix)
|
||||
fmt.Fprintf(w, "%s-rate-5-min,", prefix)
|
||||
fmt.Fprintf(w, "%s-rate-15-min,", prefix)
|
||||
fmt.Fprintf(w, "%s-rate-mean", prefix)
|
||||
}
|
||||
|
||||
func WriteTimerCSV(w io.Writer, timer metrics.Timer) {
|
||||
t := timer.Snapshot()
|
||||
p := t.Percentiles(timerPercentiles)
|
||||
|
||||
fmt.Fprintf(w, `%d,`, t.Count())
|
||||
fmt.Fprintf(w, `%d,`, t.Min())
|
||||
fmt.Fprintf(w, `%d,`, t.Max())
|
||||
fmt.Fprintf(w, `%f,`, t.Mean())
|
||||
fmt.Fprintf(w, `%f,`, t.StdDev())
|
||||
fmt.Fprintf(w, `%f,`, p[0])
|
||||
fmt.Fprintf(w, `%f,`, p[1])
|
||||
fmt.Fprintf(w, `%f,`, p[2])
|
||||
fmt.Fprintf(w, `%f,`, p[3])
|
||||
fmt.Fprintf(w, `%f,`, p[4])
|
||||
fmt.Fprintf(w, `%f,`, t.Rate1())
|
||||
fmt.Fprintf(w, `%f,`, t.Rate5())
|
||||
fmt.Fprintf(w, `%f,`, t.Rate15())
|
||||
fmt.Fprintf(w, `%f`, t.RateMean())
|
||||
}
|
60
index/upsidedown/store/metrics/writer.go
Normal file
60
index/upsidedown/store/metrics/writer.go
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
s *Store
|
||||
o store.KVWriter
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
err := w.o.Close()
|
||||
if err != nil {
|
||||
w.s.AddError("Writer.Close", err, nil)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
return &Batch{s: w.s, o: w.o.NewBatch()}
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
buf, b, err := w.o.NewBatchEx(options)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return buf, &Batch{s: w.s, o: b}, nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) {
|
||||
batch, ok := b.(*Batch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
w.s.timerWriterExecuteBatch.Time(func() {
|
||||
err = w.o.ExecuteBatch(batch.o)
|
||||
if err != nil {
|
||||
w.s.AddError("Writer.ExecuteBatch", err, nil)
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
87
index/upsidedown/store/moss/batch.go
Normal file
87
index/upsidedown/store/moss/batch.go
Normal file
|
@ -0,0 +1,87 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"github.com/couchbase/moss"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Batch struct {
|
||||
store *Store
|
||||
merge *store.EmulatedMerge
|
||||
batch moss.Batch
|
||||
buf []byte // Non-nil when using pre-alloc'ed / NewBatchEx().
|
||||
bufUsed int
|
||||
}
|
||||
|
||||
func (b *Batch) Set(key, val []byte) {
|
||||
var err error
|
||||
if b.buf != nil {
|
||||
b.bufUsed += len(key) + len(val)
|
||||
err = b.batch.AllocSet(key, val)
|
||||
} else {
|
||||
err = b.batch.Set(key, val)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
b.store.Logf("bleve moss batch.Set err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) Delete(key []byte) {
|
||||
var err error
|
||||
if b.buf != nil {
|
||||
b.bufUsed += len(key)
|
||||
err = b.batch.AllocDel(key)
|
||||
} else {
|
||||
err = b.batch.Del(key)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
b.store.Logf("bleve moss batch.Delete err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(key, val []byte) {
|
||||
if b.buf != nil {
|
||||
b.bufUsed += len(key) + len(val)
|
||||
}
|
||||
b.merge.Merge(key, val)
|
||||
}
|
||||
|
||||
func (b *Batch) Reset() {
|
||||
err := b.Close()
|
||||
if err != nil {
|
||||
b.store.Logf("bleve moss batch.Close err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
batch, err := b.store.ms.NewBatch(0, 0)
|
||||
if err == nil {
|
||||
b.batch = batch
|
||||
b.merge = store.NewEmulatedMerge(b.store.mo)
|
||||
b.buf = nil
|
||||
b.bufUsed = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Batch) Close() error {
|
||||
b.merge = nil
|
||||
err := b.batch.Close()
|
||||
b.batch = nil
|
||||
return err
|
||||
}
|
87
index/upsidedown/store/moss/iterator.go
Normal file
87
index/upsidedown/store/moss/iterator.go
Normal file
|
@ -0,0 +1,87 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"github.com/couchbase/moss"
|
||||
)
|
||||
|
||||
type Iterator struct {
|
||||
store *Store
|
||||
ss moss.Snapshot
|
||||
iter moss.Iterator
|
||||
start []byte
|
||||
end []byte
|
||||
k []byte
|
||||
v []byte
|
||||
err error
|
||||
}
|
||||
|
||||
func (x *Iterator) Seek(seekToKey []byte) {
|
||||
_ = x.iter.SeekTo(seekToKey)
|
||||
|
||||
x.k, x.v, x.err = x.iter.Current()
|
||||
}
|
||||
|
||||
func (x *Iterator) Next() {
|
||||
_ = x.iter.Next()
|
||||
|
||||
x.k, x.v, x.err = x.iter.Current()
|
||||
}
|
||||
|
||||
func (x *Iterator) Current() ([]byte, []byte, bool) {
|
||||
return x.k, x.v, x.err == nil
|
||||
}
|
||||
|
||||
func (x *Iterator) Key() []byte {
|
||||
if x.err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return x.k
|
||||
}
|
||||
|
||||
func (x *Iterator) Value() []byte {
|
||||
if x.err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return x.v
|
||||
}
|
||||
|
||||
func (x *Iterator) Valid() bool {
|
||||
return x.err == nil
|
||||
}
|
||||
|
||||
func (x *Iterator) Close() error {
|
||||
var err error
|
||||
|
||||
x.ss = nil
|
||||
|
||||
if x.iter != nil {
|
||||
err = x.iter.Close()
|
||||
x.iter = nil
|
||||
}
|
||||
|
||||
x.k = nil
|
||||
x.v = nil
|
||||
x.err = moss.ErrIteratorDone
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (x *Iterator) current() {
|
||||
x.k, x.v, x.err = x.iter.Current()
|
||||
}
|
571
index/upsidedown/store/moss/lower.go
Normal file
571
index/upsidedown/store/moss/lower.go
Normal file
|
@ -0,0 +1,571 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package moss provides a KVStore implementation based on the
|
||||
// github.com/couchbase/moss library.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/couchbase/moss"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
func initLowerLevelStore(
|
||||
config map[string]interface{},
|
||||
lowerLevelStoreName string,
|
||||
lowerLevelStoreConfig map[string]interface{},
|
||||
lowerLevelMaxBatchSize uint64,
|
||||
options moss.CollectionOptions,
|
||||
) (moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, statsFunc, error) {
|
||||
if lowerLevelStoreConfig == nil {
|
||||
lowerLevelStoreConfig = map[string]interface{}{}
|
||||
}
|
||||
|
||||
for k, v := range config {
|
||||
_, exists := lowerLevelStoreConfig[k]
|
||||
if !exists {
|
||||
lowerLevelStoreConfig[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if lowerLevelStoreName == "mossStore" {
|
||||
return InitMossStore(lowerLevelStoreConfig, options)
|
||||
}
|
||||
|
||||
constructor := registry.KVStoreConstructorByName(lowerLevelStoreName)
|
||||
if constructor == nil {
|
||||
return nil, nil, nil, nil, fmt.Errorf("moss store, initLowerLevelStore,"+
|
||||
" could not find lower level store: %s", lowerLevelStoreName)
|
||||
}
|
||||
|
||||
kvStore, err := constructor(options.MergeOperator, lowerLevelStoreConfig)
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
}
|
||||
|
||||
llStore := &llStore{
|
||||
refs: 0,
|
||||
config: config,
|
||||
llConfig: lowerLevelStoreConfig,
|
||||
kvStore: kvStore,
|
||||
logf: options.Log,
|
||||
}
|
||||
|
||||
llUpdate := func(ssHigher moss.Snapshot) (ssLower moss.Snapshot, err error) {
|
||||
return llStore.update(ssHigher, lowerLevelMaxBatchSize)
|
||||
}
|
||||
|
||||
llSnapshot, err := llUpdate(nil)
|
||||
if err != nil {
|
||||
_ = kvStore.Close()
|
||||
return nil, nil, nil, nil, err
|
||||
}
|
||||
|
||||
return llSnapshot, llUpdate, kvStore, nil, nil // llStore.refs is now 1.
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
// llStore is a lower level store and provides ref-counting around a
|
||||
// bleve store.KVStore.
|
||||
type llStore struct {
|
||||
kvStore store.KVStore
|
||||
|
||||
config map[string]interface{}
|
||||
llConfig map[string]interface{}
|
||||
|
||||
logf func(format string, a ...interface{})
|
||||
|
||||
m sync.Mutex // Protects fields that follow.
|
||||
refs int
|
||||
}
|
||||
|
||||
// llSnapshot represents a lower-level snapshot, wrapping a bleve
|
||||
// store.KVReader, and implements the moss.Snapshot interface.
|
||||
type llSnapshot struct {
|
||||
llStore *llStore // Holds 1 refs on the llStore.
|
||||
kvReader store.KVReader
|
||||
childSnapshots map[string]*llSnapshot
|
||||
|
||||
m sync.Mutex // Protects fields that follow.
|
||||
refs int
|
||||
}
|
||||
|
||||
// llIterator represents a lower-level iterator, wrapping a bleve
|
||||
// store.KVIterator, and implements the moss.Iterator interface.
|
||||
type llIterator struct {
|
||||
llSnapshot *llSnapshot // Holds 1 refs on the llSnapshot.
|
||||
|
||||
// Some lower-level KVReader implementations need a separate
|
||||
// KVReader clone, due to KVReader single-threaded'ness.
|
||||
kvReader store.KVReader
|
||||
|
||||
kvIterator store.KVIterator
|
||||
}
|
||||
|
||||
type readerSource interface {
|
||||
Reader() (store.KVReader, error)
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
func (s *llStore) addRef() *llStore {
|
||||
s.m.Lock()
|
||||
s.refs += 1
|
||||
s.m.Unlock()
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *llStore) decRef() {
|
||||
s.m.Lock()
|
||||
s.refs -= 1
|
||||
if s.refs <= 0 {
|
||||
err := s.kvStore.Close()
|
||||
if err != nil {
|
||||
s.logf("llStore kvStore.Close err: %v", err)
|
||||
}
|
||||
}
|
||||
s.m.Unlock()
|
||||
}
|
||||
|
||||
// update() mutates this lower level store with latest data from the
|
||||
// given higher level moss.Snapshot and returns a new moss.Snapshot
|
||||
// that the higher level can use which represents this lower level
|
||||
// store.
|
||||
func (s *llStore) update(ssHigher moss.Snapshot, maxBatchSize uint64) (
|
||||
ssLower moss.Snapshot, err error,
|
||||
) {
|
||||
if ssHigher != nil {
|
||||
iter, err := ssHigher.StartIterator(nil, nil, moss.IteratorOptions{
|
||||
IncludeDeletions: true,
|
||||
SkipLowerLevel: true,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = iter.Close()
|
||||
if err != nil {
|
||||
s.logf("llStore iter.Close err: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
kvWriter, err := s.kvStore.Writer()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = kvWriter.Close()
|
||||
if err != nil {
|
||||
s.logf("llStore kvWriter.Close err: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
batch := kvWriter.NewBatch()
|
||||
|
||||
defer func() {
|
||||
if batch != nil {
|
||||
err = batch.Close()
|
||||
if err != nil {
|
||||
s.logf("llStore batch.Close err: %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
var readOptions moss.ReadOptions
|
||||
|
||||
i := uint64(0)
|
||||
for {
|
||||
if i%1000000 == 0 {
|
||||
s.logf("llStore.update, i: %d", i)
|
||||
}
|
||||
|
||||
ex, key, val, err := iter.CurrentEx()
|
||||
if err == moss.ErrIteratorDone {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch ex.Operation {
|
||||
case moss.OperationSet:
|
||||
batch.Set(key, val)
|
||||
|
||||
case moss.OperationDel:
|
||||
batch.Delete(key)
|
||||
|
||||
case moss.OperationMerge:
|
||||
val, err = ssHigher.Get(key, readOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if val != nil {
|
||||
batch.Set(key, val)
|
||||
} else {
|
||||
batch.Delete(key)
|
||||
}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("moss store, update,"+
|
||||
" unexpected operation, ex: %v", ex)
|
||||
}
|
||||
|
||||
i++
|
||||
|
||||
err = iter.Next()
|
||||
if err == moss.ErrIteratorDone {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if maxBatchSize > 0 && i%maxBatchSize == 0 {
|
||||
err = kvWriter.ExecuteBatch(batch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = batch.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
batch = kvWriter.NewBatch()
|
||||
}
|
||||
}
|
||||
|
||||
if i > 0 {
|
||||
s.logf("llStore.update, ExecuteBatch,"+
|
||||
" path: %s, total: %d, start", s.llConfig["path"], i)
|
||||
|
||||
err = kvWriter.ExecuteBatch(batch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.logf("llStore.update, ExecuteBatch,"+
|
||||
" path: %s: total: %d, done", s.llConfig["path"], i)
|
||||
}
|
||||
}
|
||||
|
||||
kvReader, err := s.kvStore.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.logf("llStore.update, new reader")
|
||||
|
||||
return &llSnapshot{
|
||||
llStore: s.addRef(),
|
||||
kvReader: kvReader,
|
||||
refs: 1,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
func (llss *llSnapshot) addRef() *llSnapshot {
|
||||
llss.m.Lock()
|
||||
llss.refs += 1
|
||||
llss.m.Unlock()
|
||||
|
||||
return llss
|
||||
}
|
||||
|
||||
func (llss *llSnapshot) decRef() {
|
||||
llss.m.Lock()
|
||||
llss.refs -= 1
|
||||
if llss.refs <= 0 {
|
||||
if llss.kvReader != nil {
|
||||
err := llss.kvReader.Close()
|
||||
if err != nil {
|
||||
llss.llStore.logf("llSnapshot kvReader.Close err: %v", err)
|
||||
}
|
||||
|
||||
llss.kvReader = nil
|
||||
}
|
||||
|
||||
if llss.llStore != nil {
|
||||
llss.llStore.decRef()
|
||||
llss.llStore = nil
|
||||
}
|
||||
}
|
||||
llss.m.Unlock()
|
||||
}
|
||||
|
||||
// ChildCollectionNames returns an array of child collection name strings.
|
||||
func (llss *llSnapshot) ChildCollectionNames() ([]string, error) {
|
||||
childCollections := make([]string, len(llss.childSnapshots))
|
||||
idx := 0
|
||||
for name := range llss.childSnapshots {
|
||||
childCollections[idx] = name
|
||||
idx++
|
||||
}
|
||||
return childCollections, nil
|
||||
}
|
||||
|
||||
// ChildCollectionSnapshot returns a Snapshot on a given child
|
||||
// collection by its name.
|
||||
func (llss *llSnapshot) ChildCollectionSnapshot(childCollectionName string) (
|
||||
moss.Snapshot, error,
|
||||
) {
|
||||
childSnapshot, exists := llss.childSnapshots[childCollectionName]
|
||||
if !exists {
|
||||
return nil, nil
|
||||
}
|
||||
childSnapshot.addRef()
|
||||
return childSnapshot, nil
|
||||
}
|
||||
|
||||
func (llss *llSnapshot) Close() error {
|
||||
llss.decRef()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llss *llSnapshot) Get(key []byte,
|
||||
readOptions moss.ReadOptions,
|
||||
) ([]byte, error) {
|
||||
rs, ok := llss.kvReader.(readerSource)
|
||||
if ok {
|
||||
r2, err := rs.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
val, err := r2.Get(key)
|
||||
|
||||
_ = r2.Close()
|
||||
|
||||
return val, err
|
||||
}
|
||||
|
||||
return llss.kvReader.Get(key)
|
||||
}
|
||||
|
||||
func (llss *llSnapshot) StartIterator(
|
||||
startKeyInclusive, endKeyExclusive []byte,
|
||||
iteratorOptions moss.IteratorOptions,
|
||||
) (moss.Iterator, error) {
|
||||
rs, ok := llss.kvReader.(readerSource)
|
||||
if ok {
|
||||
r2, err := rs.Reader()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
i2 := r2.RangeIterator(startKeyInclusive, endKeyExclusive)
|
||||
|
||||
return &llIterator{llSnapshot: llss.addRef(), kvReader: r2, kvIterator: i2}, nil
|
||||
}
|
||||
|
||||
i := llss.kvReader.RangeIterator(startKeyInclusive, endKeyExclusive)
|
||||
|
||||
return &llIterator{llSnapshot: llss.addRef(), kvReader: nil, kvIterator: i}, nil
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
func (lli *llIterator) Close() error {
|
||||
var err0 error
|
||||
if lli.kvIterator != nil {
|
||||
err0 = lli.kvIterator.Close()
|
||||
lli.kvIterator = nil
|
||||
}
|
||||
|
||||
var err1 error
|
||||
if lli.kvReader != nil {
|
||||
err1 = lli.kvReader.Close()
|
||||
lli.kvReader = nil
|
||||
}
|
||||
|
||||
lli.llSnapshot.decRef()
|
||||
lli.llSnapshot = nil
|
||||
|
||||
if err0 != nil {
|
||||
return err0
|
||||
}
|
||||
|
||||
if err1 != nil {
|
||||
return err1
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lli *llIterator) Next() error {
|
||||
lli.kvIterator.Next()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lli *llIterator) SeekTo(k []byte) error {
|
||||
lli.kvIterator.Seek(k)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lli *llIterator) Current() (key, val []byte, err error) {
|
||||
key, val, ok := lli.kvIterator.Current()
|
||||
if !ok {
|
||||
return nil, nil, moss.ErrIteratorDone
|
||||
}
|
||||
|
||||
return key, val, nil
|
||||
}
|
||||
|
||||
func (lli *llIterator) CurrentEx() (
|
||||
entryEx moss.EntryEx, key, val []byte, err error,
|
||||
) {
|
||||
return moss.EntryEx{}, nil, nil, moss.ErrUnimplemented
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
func InitMossStore(config map[string]interface{}, options moss.CollectionOptions) (
|
||||
moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, statsFunc, error,
|
||||
) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, nil, nil, nil, fmt.Errorf("lower: missing path for InitMossStore config")
|
||||
}
|
||||
if path == "" {
|
||||
return nil, nil, nil, nil, os.ErrInvalid
|
||||
}
|
||||
|
||||
err := os.MkdirAll(path, 0o700)
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, fmt.Errorf("lower: InitMossStore mkdir,"+
|
||||
" path: %s, err: %v", path, err)
|
||||
}
|
||||
|
||||
storeOptions := moss.StoreOptions{
|
||||
CollectionOptions: options,
|
||||
}
|
||||
v, ok := config["mossStoreOptions"]
|
||||
if ok {
|
||||
b, err := util.MarshalJSON(v) // Convert from map[string]interface{}.
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
}
|
||||
|
||||
err = util.UnmarshalJSON(b, &storeOptions)
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
s, err := moss.OpenStore(path, storeOptions)
|
||||
if err != nil {
|
||||
return nil, nil, nil, nil, fmt.Errorf("lower: moss.OpenStore,"+
|
||||
" path: %s, err: %v", path, err)
|
||||
}
|
||||
|
||||
sw := &mossStoreWrapper{s: s}
|
||||
|
||||
llUpdate := func(ssHigher moss.Snapshot) (moss.Snapshot, error) {
|
||||
ss, err := sw.s.Persist(ssHigher, moss.StorePersistOptions{
|
||||
CompactionConcern: moss.CompactionAllow,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sw.AddRef() // Ref-count to be owned by snapshot wrapper.
|
||||
|
||||
return moss.NewSnapshotWrapper(ss, sw), nil
|
||||
}
|
||||
|
||||
llSnapshot, err := llUpdate(nil)
|
||||
if err != nil {
|
||||
_ = s.Close()
|
||||
return nil, nil, nil, nil, err
|
||||
}
|
||||
|
||||
llStats := func() map[string]interface{} {
|
||||
stats, err := s.Stats()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
return llSnapshot, llUpdate, sw, llStats, nil
|
||||
}
|
||||
|
||||
// mossStoreWrapper implements the bleve.index.store.KVStore
|
||||
// interface, but only barely enough to allow it to be passed around
|
||||
// as a lower-level store. Advanced apps will likely cast the
|
||||
// mossStoreWrapper to access the Actual() method.
|
||||
type mossStoreWrapper struct {
|
||||
m sync.Mutex
|
||||
refs int
|
||||
s *moss.Store
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) AddRef() {
|
||||
w.m.Lock()
|
||||
w.refs++
|
||||
w.m.Unlock()
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) Close() (err error) {
|
||||
w.m.Lock()
|
||||
w.refs--
|
||||
if w.refs <= 0 {
|
||||
err = w.s.Close()
|
||||
w.s = nil
|
||||
}
|
||||
w.m.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) Reader() (store.KVReader, error) {
|
||||
return nil, fmt.Errorf("unexpected")
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) Writer() (store.KVWriter, error) {
|
||||
return nil, fmt.Errorf("unexpected")
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) Actual() *moss.Store {
|
||||
w.m.Lock()
|
||||
rv := w.s
|
||||
w.m.Unlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (w *mossStoreWrapper) histograms() string {
|
||||
var rv string
|
||||
w.m.Lock()
|
||||
if w.s != nil {
|
||||
rv = w.s.Histograms().String()
|
||||
}
|
||||
w.m.Unlock()
|
||||
return rv
|
||||
}
|
103
index/upsidedown/store/moss/lower_test.go
Normal file
103
index/upsidedown/store/moss/lower_test.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
)
|
||||
|
||||
func openWithLower(t *testing.T, mo store.MergeOperator) (string, store.KVStore) {
|
||||
tmpDir, _ := os.MkdirTemp("", "mossStore")
|
||||
|
||||
config := map[string]interface{}{
|
||||
"path": tmpDir,
|
||||
"mossLowerLevelStoreName": "mossStore",
|
||||
}
|
||||
|
||||
rv, err := New(mo, config)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return tmpDir, rv
|
||||
}
|
||||
|
||||
func cleanupWithLower(t *testing.T, s store.KVStore, tmpDir string) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = os.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMossWithLowerKVCrud(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerReaderIsolation(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerReaderOwnsGetBytes(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerWriterOwnsBytes(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerPrefixIterator(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerPrefixIteratorSeek(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerRangeIterator(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerRangeIteratorSeek(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, nil)
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMossWithLowerMerge(t *testing.T) {
|
||||
tmpDir, s := openWithLower(t, &test.TestMergeCounter{})
|
||||
defer cleanupWithLower(t, s, tmpDir)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
97
index/upsidedown/store/moss/reader.go
Normal file
97
index/upsidedown/store/moss/reader.go
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"github.com/couchbase/moss"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
store *Store
|
||||
ss moss.Snapshot
|
||||
}
|
||||
|
||||
func (r *Reader) Get(k []byte) (v []byte, err error) {
|
||||
v, err = r.ss.Get(k, moss.ReadOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if v != nil {
|
||||
return append(make([]byte, 0, len(v)), v...), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return store.MultiGet(r, keys)
|
||||
}
|
||||
|
||||
func (r *Reader) PrefixIterator(k []byte) store.KVIterator {
|
||||
kEnd := incrementBytes(k)
|
||||
|
||||
iter, err := r.ss.StartIterator(k, kEnd, moss.IteratorOptions{})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
ss: r.ss,
|
||||
iter: iter,
|
||||
start: k,
|
||||
end: kEnd,
|
||||
}
|
||||
|
||||
rv.current()
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
iter, err := r.ss.StartIterator(start, end, moss.IteratorOptions{})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rv := &Iterator{
|
||||
store: r.store,
|
||||
ss: r.ss,
|
||||
iter: iter,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
|
||||
rv.current()
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *Reader) Close() error {
|
||||
return r.ss.Close()
|
||||
}
|
||||
|
||||
func incrementBytes(in []byte) []byte {
|
||||
rv := make([]byte, len(in))
|
||||
copy(rv, in)
|
||||
for i := len(rv) - 1; i >= 0; i-- {
|
||||
rv[i] = rv[i] + 1
|
||||
if rv[i] != 0 {
|
||||
return rv // didn't overflow, so stop
|
||||
}
|
||||
}
|
||||
return nil // overflowed
|
||||
}
|
58
index/upsidedown/store/moss/stats.go
Normal file
58
index/upsidedown/store/moss/stats.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
type stats struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (s *stats) statsMap() map[string]interface{} {
|
||||
ms := map[string]interface{}{}
|
||||
|
||||
var err error
|
||||
ms["moss"], err = s.s.ms.Stats()
|
||||
if err != nil {
|
||||
return ms
|
||||
}
|
||||
|
||||
if s.s.llstore != nil {
|
||||
if o, ok := s.s.llstore.(store.KVStoreStats); ok {
|
||||
ms["kv"] = o.StatsMap()
|
||||
}
|
||||
}
|
||||
|
||||
_, exists := ms["kv"]
|
||||
if !exists && s.s.llstats != nil {
|
||||
ms["kv"] = s.s.llstats()
|
||||
}
|
||||
|
||||
if msw, ok := s.s.llstore.(*mossStoreWrapper); ok {
|
||||
ms["store_histograms"] = msw.histograms()
|
||||
}
|
||||
|
||||
ms["coll_histograms"] = s.s.ms.Histograms().String()
|
||||
|
||||
return ms
|
||||
}
|
||||
|
||||
func (s *stats) MarshalJSON() ([]byte, error) {
|
||||
m := s.statsMap()
|
||||
return util.MarshalJSON(m)
|
||||
}
|
231
index/upsidedown/store/moss/store.go
Normal file
231
index/upsidedown/store/moss/store.go
Normal file
|
@ -0,0 +1,231 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package moss provides a KVStore implementation based on the
|
||||
// github.com/couchbase/moss library.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/couchbase/moss"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
// RegistryCollectionOptions should be treated as read-only after
|
||||
// process init()'ialization.
|
||||
var RegistryCollectionOptions = map[string]moss.CollectionOptions{}
|
||||
|
||||
const Name = "moss"
|
||||
|
||||
type Store struct {
|
||||
m sync.Mutex
|
||||
ms moss.Collection
|
||||
mo store.MergeOperator
|
||||
llstore store.KVStore // May be nil.
|
||||
llstats statsFunc // May be nil.
|
||||
|
||||
s *stats
|
||||
config map[string]interface{}
|
||||
}
|
||||
|
||||
type statsFunc func() map[string]interface{}
|
||||
|
||||
// New initializes a moss storage with values from the optional
|
||||
// config["mossCollectionOptions"] (a JSON moss.CollectionOptions).
|
||||
// Next, values from the RegistryCollectionOptions, named by the
|
||||
// optional config["mossCollectionOptionsName"], take precedence.
|
||||
// Finally, base case defaults are taken from
|
||||
// moss.DefaultCollectionOptions.
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (
|
||||
store.KVStore, error) {
|
||||
options := moss.DefaultCollectionOptions // Copy.
|
||||
|
||||
v, ok := config["mossCollectionOptionsName"]
|
||||
if ok {
|
||||
name, ok := v.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not parse config[mossCollectionOptionsName]: %v", v)
|
||||
}
|
||||
|
||||
options, ok = RegistryCollectionOptions[name] // Copy.
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not find RegistryCollectionOptions, name: %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
options.MergeOperator = mo
|
||||
options.DeferredSort = true
|
||||
|
||||
v, ok = config["mossCollectionOptions"]
|
||||
if ok {
|
||||
b, err := util.MarshalJSON(v) // Convert from map[string]interface{}.
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not marshal config[mossCollectionOptions]: %v, err: %v", v, err)
|
||||
}
|
||||
|
||||
err = util.UnmarshalJSON(b, &options)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not unmarshal config[mossCollectionOptions]: %v, err: %v", v, err)
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------
|
||||
|
||||
if options.Log == nil || options.Debug <= 0 {
|
||||
options.Log = func(format string, a ...interface{}) {}
|
||||
}
|
||||
|
||||
// --------------------------------------------------
|
||||
|
||||
mossLowerLevelStoreName := ""
|
||||
v, ok = config["mossLowerLevelStoreName"]
|
||||
if ok {
|
||||
mossLowerLevelStoreName, ok = v.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not parse config[mossLowerLevelStoreName]: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
var llStore store.KVStore
|
||||
var llStats statsFunc
|
||||
|
||||
if options.LowerLevelInit == nil &&
|
||||
options.LowerLevelUpdate == nil &&
|
||||
mossLowerLevelStoreName != "" {
|
||||
mossLowerLevelStoreConfig := map[string]interface{}{}
|
||||
v, ok := config["mossLowerLevelStoreConfig"]
|
||||
if ok {
|
||||
mossLowerLevelStoreConfig, ok = v.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("moss store, initLowerLevelStore,"+
|
||||
" could parse mossLowerLevelStoreConfig: %v", v)
|
||||
}
|
||||
}
|
||||
|
||||
mossLowerLevelMaxBatchSize := uint64(0)
|
||||
v, ok = config["mossLowerLevelMaxBatchSize"]
|
||||
if ok {
|
||||
mossLowerLevelMaxBatchSizeF, ok := v.(float64)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("moss store,"+
|
||||
" could not parse config[mossLowerLevelMaxBatchSize]: %v", v)
|
||||
}
|
||||
|
||||
mossLowerLevelMaxBatchSize = uint64(mossLowerLevelMaxBatchSizeF)
|
||||
}
|
||||
|
||||
lowerLevelInit, lowerLevelUpdate, lowerLevelStore, lowerLevelStats, err :=
|
||||
initLowerLevelStore(config,
|
||||
mossLowerLevelStoreName,
|
||||
mossLowerLevelStoreConfig,
|
||||
mossLowerLevelMaxBatchSize,
|
||||
options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
options.LowerLevelInit = lowerLevelInit
|
||||
options.LowerLevelUpdate = lowerLevelUpdate
|
||||
|
||||
llStore = lowerLevelStore
|
||||
llStats = lowerLevelStats
|
||||
}
|
||||
|
||||
// --------------------------------------------------
|
||||
|
||||
ms, err := moss.NewCollection(options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = ms.Start()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := Store{
|
||||
ms: ms,
|
||||
mo: mo,
|
||||
llstore: llStore,
|
||||
llstats: llStats,
|
||||
config: config,
|
||||
}
|
||||
rv.s = &stats{s: &rv}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *Store) Close() error {
|
||||
if val, ok := s.config["mossAbortCloseEnabled"]; ok {
|
||||
if v, ok := val.(bool); ok && v {
|
||||
if msw, ok := s.llstore.(*mossStoreWrapper); ok {
|
||||
if s := msw.Actual(); s != nil {
|
||||
_ = s.CloseEx(moss.StoreCloseExOptions{Abort: true})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return s.ms.Close()
|
||||
}
|
||||
|
||||
func (s *Store) Reader() (store.KVReader, error) {
|
||||
ss, err := s.ms.Snapshot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Reader{ss: ss}, nil
|
||||
}
|
||||
|
||||
func (s *Store) Writer() (store.KVWriter, error) {
|
||||
return &Writer{s: s}, nil
|
||||
}
|
||||
|
||||
func (s *Store) Logf(fmt string, args ...interface{}) {
|
||||
options := s.ms.Options()
|
||||
if options.Log != nil {
|
||||
options.Log(fmt, args...)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) Stats() json.Marshaler {
|
||||
return s.s
|
||||
}
|
||||
|
||||
func (s *Store) StatsMap() map[string]interface{} {
|
||||
return s.s.statsMap()
|
||||
}
|
||||
|
||||
func (s *Store) LowerLevelStore() store.KVStore {
|
||||
return s.llstore
|
||||
}
|
||||
|
||||
func (s *Store) Collection() moss.Collection {
|
||||
return s.ms
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
91
index/upsidedown/store/moss/store_test.go
Normal file
91
index/upsidedown/store/moss/store_test.go
Normal file
|
@ -0,0 +1,91 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
"github.com/blevesearch/upsidedown_store_api/test"
|
||||
)
|
||||
|
||||
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
|
||||
rv, err := New(mo, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func cleanup(t *testing.T, s store.KVStore) {
|
||||
err := s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMossKVCrud(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestKVCrud(t, s)
|
||||
}
|
||||
|
||||
func TestMossReaderIsolation(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderIsolation(t, s)
|
||||
}
|
||||
|
||||
func TestMossReaderOwnsGetBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestReaderOwnsGetBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMossWriterOwnsBytes(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestWriterOwnsBytes(t, s)
|
||||
}
|
||||
|
||||
func TestMossPrefixIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMossPrefixIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestPrefixIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMossRangeIterator(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIterator(t, s)
|
||||
}
|
||||
|
||||
func TestMossRangeIteratorSeek(t *testing.T) {
|
||||
s := open(t, nil)
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestRangeIteratorSeek(t, s)
|
||||
}
|
||||
|
||||
func TestMossMerge(t *testing.T) {
|
||||
s := open(t, &test.TestMergeCounter{})
|
||||
defer cleanup(t, s)
|
||||
test.CommonTestMerge(t, s)
|
||||
}
|
97
index/upsidedown/store/moss/writer.go
Normal file
97
index/upsidedown/store/moss/writer.go
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package moss
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
|
||||
"github.com/couchbase/moss"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
s *Store
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch {
|
||||
b, err := w.s.ms.NewBatch(0, 0)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &Batch{
|
||||
store: w.s,
|
||||
merge: store.NewEmulatedMerge(w.s.mo),
|
||||
batch: b,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) (
|
||||
[]byte, store.KVBatch, error) {
|
||||
numOps := options.NumSets + options.NumDeletes + options.NumMerges
|
||||
|
||||
b, err := w.s.ms.NewBatch(numOps, options.TotalBytes)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
buf, err := b.Alloc(options.TotalBytes)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return buf, &Batch{
|
||||
store: w.s,
|
||||
merge: store.NewEmulatedMerge(w.s.mo),
|
||||
batch: b,
|
||||
buf: buf,
|
||||
bufUsed: 0,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) {
|
||||
batch, ok := b.(*Batch)
|
||||
if !ok {
|
||||
return fmt.Errorf("wrong type of batch")
|
||||
}
|
||||
|
||||
for kStr, mergeOps := range batch.merge.Merges {
|
||||
for _, v := range mergeOps {
|
||||
if batch.buf != nil {
|
||||
kLen := len(kStr)
|
||||
vLen := len(v)
|
||||
kBuf := batch.buf[batch.bufUsed : batch.bufUsed+kLen]
|
||||
vBuf := batch.buf[batch.bufUsed+kLen : batch.bufUsed+kLen+vLen]
|
||||
copy(kBuf, kStr)
|
||||
copy(vBuf, v)
|
||||
batch.bufUsed += kLen + vLen
|
||||
err = batch.batch.AllocMerge(kBuf, vBuf)
|
||||
} else {
|
||||
err = batch.batch.Merge([]byte(kStr), v)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return w.s.ms.ExecuteBatch(batch.batch, moss.WriteOptions{})
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
w.s = nil
|
||||
return nil
|
||||
}
|
121
index/upsidedown/store/null/null.go
Normal file
121
index/upsidedown/store/null/null.go
Normal file
|
@ -0,0 +1,121 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package null
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
const Name = "null"
|
||||
|
||||
type Store struct{}
|
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
|
||||
return &Store{}, nil
|
||||
}
|
||||
|
||||
func (i *Store) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *Store) Reader() (store.KVReader, error) {
|
||||
return &reader{}, nil
|
||||
}
|
||||
|
||||
func (i *Store) Writer() (store.KVWriter, error) {
|
||||
return &writer{}, nil
|
||||
}
|
||||
|
||||
type reader struct{}
|
||||
|
||||
func (r *reader) Get(key []byte) ([]byte, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *reader) MultiGet(keys [][]byte) ([][]byte, error) {
|
||||
return make([][]byte, len(keys)), nil
|
||||
}
|
||||
|
||||
func (r *reader) PrefixIterator(prefix []byte) store.KVIterator {
|
||||
return &iterator{}
|
||||
}
|
||||
|
||||
func (r *reader) RangeIterator(start, end []byte) store.KVIterator {
|
||||
return &iterator{}
|
||||
}
|
||||
|
||||
func (r *reader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type iterator struct{}
|
||||
|
||||
func (i *iterator) SeekFirst() {}
|
||||
func (i *iterator) Seek(k []byte) {}
|
||||
func (i *iterator) Next() {}
|
||||
|
||||
func (i *iterator) Current() ([]byte, []byte, bool) {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
func (i *iterator) Key() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *iterator) Value() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *iterator) Valid() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *iterator) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type batch struct{}
|
||||
|
||||
func (i *batch) Set(key, val []byte) {}
|
||||
func (i *batch) Delete(key []byte) {}
|
||||
func (i *batch) Merge(key, val []byte) {}
|
||||
func (i *batch) Reset() {}
|
||||
func (i *batch) Close() error { return nil }
|
||||
|
||||
type writer struct{}
|
||||
|
||||
func (w *writer) NewBatch() store.KVBatch {
|
||||
return &batch{}
|
||||
}
|
||||
|
||||
func (w *writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
|
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil
|
||||
}
|
||||
|
||||
func (w *writer) ExecuteBatch(store.KVBatch) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *writer) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterKVStore(Name, New)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
92
index/upsidedown/store/null/null_test.go
Normal file
92
index/upsidedown/store/null/null_test.go
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package null
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
store "github.com/blevesearch/upsidedown_store_api"
|
||||
)
|
||||
|
||||
func TestStore(t *testing.T) {
|
||||
s, err := New(nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
NullTestKVStore(t, s)
|
||||
}
|
||||
|
||||
// NullTestKVStore has very different expectations
|
||||
// compared to CommonTestKVStore
|
||||
func NullTestKVStore(t *testing.T, s store.KVStore) {
|
||||
|
||||
writer, err := s.Writer()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
batch := writer.NewBatch()
|
||||
batch.Set([]byte("b"), []byte("val-b"))
|
||||
batch.Set([]byte("c"), []byte("val-c"))
|
||||
batch.Set([]byte("d"), []byte("val-d"))
|
||||
batch.Set([]byte("e"), []byte("val-e"))
|
||||
batch.Set([]byte("f"), []byte("val-f"))
|
||||
batch.Set([]byte("g"), []byte("val-g"))
|
||||
batch.Set([]byte("h"), []byte("val-h"))
|
||||
batch.Set([]byte("i"), []byte("val-i"))
|
||||
batch.Set([]byte("j"), []byte("val-j"))
|
||||
|
||||
err = writer.ExecuteBatch(batch)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = writer.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
reader, err := s.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
defer func() {
|
||||
err := reader.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
it := reader.RangeIterator([]byte("b"), nil)
|
||||
key, val, valid := it.Current()
|
||||
if valid {
|
||||
t.Fatalf("valid true, expected false")
|
||||
}
|
||||
if key != nil {
|
||||
t.Fatalf("expected key nil, got %s", key)
|
||||
}
|
||||
if val != nil {
|
||||
t.Fatalf("expected value nil, got %s", val)
|
||||
}
|
||||
|
||||
err = it.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = s.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
1079
index/upsidedown/upsidedown.go
Normal file
1079
index/upsidedown/upsidedown.go
Normal file
File diff suppressed because it is too large
Load diff
690
index/upsidedown/upsidedown.pb.go
Normal file
690
index/upsidedown/upsidedown.pb.go
Normal file
|
@ -0,0 +1,690 @@
|
|||
// Code generated by protoc-gen-gogo.
|
||||
// source: upsidedown.proto
|
||||
// DO NOT EDIT!
|
||||
|
||||
/*
|
||||
Package upsidedown is a generated protocol buffer package.
|
||||
|
||||
It is generated from these files:
|
||||
|
||||
upsidedown.proto
|
||||
|
||||
It has these top-level messages:
|
||||
|
||||
BackIndexTermsEntry
|
||||
BackIndexStoreEntry
|
||||
BackIndexRowValue
|
||||
*/
|
||||
package upsidedown
|
||||
|
||||
import proto "github.com/golang/protobuf/proto"
|
||||
import math "math"
|
||||
|
||||
import io "io"
|
||||
import fmt "fmt"
|
||||
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
var _ = math.Inf
|
||||
|
||||
type BackIndexTermsEntry struct {
|
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
|
||||
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} }
|
||||
func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexTermsEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexTermsEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) GetTerms() []string {
|
||||
if m != nil {
|
||||
return m.Terms
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type BackIndexStoreEntry struct {
|
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
|
||||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} }
|
||||
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexStoreEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
|
||||
if m != nil {
|
||||
return m.ArrayPositions
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type BackIndexRowValue struct {
|
||||
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
|
||||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
|
||||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexRowValue) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
|
||||
if m != nil {
|
||||
return m.TermsEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
|
||||
if m != nil {
|
||||
return m.StoredEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
|
||||
var hasFields [1]uint64
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
|
||||
}
|
||||
var v uint32
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint32(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.Field = &v
|
||||
hasFields[0] |= uint64(0x00000001)
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
|
||||
}
|
||||
var stringLen uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
stringLen |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + int(stringLen)
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
if hasFields[0]&uint64(0x00000001) == 0 {
|
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error {
|
||||
var hasFields [1]uint64
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
|
||||
}
|
||||
var v uint32
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint32(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.Field = &v
|
||||
hasFields[0] |= uint64(0x00000001)
|
||||
case 2:
|
||||
if wireType != 0 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType)
|
||||
}
|
||||
var v uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
v |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
m.ArrayPositions = append(m.ArrayPositions, v)
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
if hasFields[0]&uint64(0x00000001) == 0 {
|
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func (m *BackIndexRowValue) Unmarshal(data []byte) error {
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
fieldNum := int32(wire >> 3)
|
||||
wireType := int(wire & 0x7)
|
||||
switch fieldNum {
|
||||
case 1:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
|
||||
if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
case 2:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
|
||||
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
var sizeOfWire int
|
||||
for {
|
||||
sizeOfWire++
|
||||
wire >>= 7
|
||||
if wire == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx -= sizeOfWire
|
||||
skippy, err := skipUpsidedown(data[iNdEx:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skippy < 0 {
|
||||
return ErrInvalidLengthUpsidedown
|
||||
}
|
||||
if (iNdEx + skippy) > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
|
||||
iNdEx += skippy
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func skipUpsidedown(data []byte) (n int, err error) {
|
||||
l := len(data)
|
||||
iNdEx := 0
|
||||
for iNdEx < l {
|
||||
var wire uint64
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
wire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
wireType := int(wire & 0x7)
|
||||
switch wireType {
|
||||
case 0:
|
||||
for {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
iNdEx++
|
||||
if data[iNdEx-1] < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 1:
|
||||
iNdEx += 8
|
||||
return iNdEx, nil
|
||||
case 2:
|
||||
var length int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
length |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
iNdEx += length
|
||||
if length < 0 {
|
||||
return 0, ErrInvalidLengthUpsidedown
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 3:
|
||||
for {
|
||||
var innerWire uint64
|
||||
var start int = iNdEx
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if iNdEx >= l {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
innerWire |= (uint64(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
innerWireType := int(innerWire & 0x7)
|
||||
if innerWireType == 4 {
|
||||
break
|
||||
}
|
||||
next, err := skipUpsidedown(data[start:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
iNdEx = start + next
|
||||
}
|
||||
return iNdEx, nil
|
||||
case 4:
|
||||
return iNdEx, nil
|
||||
case 5:
|
||||
iNdEx += 4
|
||||
return iNdEx, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
var (
|
||||
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
|
||||
)
|
||||
|
||||
func (m *BackIndexTermsEntry) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field != nil {
|
||||
n += 1 + sovUpsidedown(uint64(*m.Field))
|
||||
}
|
||||
if len(m.Terms) > 0 {
|
||||
for _, s := range m.Terms {
|
||||
l = len(s)
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field != nil {
|
||||
n += 1 + sovUpsidedown(uint64(*m.Field))
|
||||
}
|
||||
if len(m.ArrayPositions) > 0 {
|
||||
for _, e := range m.ArrayPositions {
|
||||
n += 1 + sovUpsidedown(uint64(e))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Size() (n int) {
|
||||
var l int
|
||||
_ = l
|
||||
if len(m.TermsEntries) > 0 {
|
||||
for _, e := range m.TermsEntries {
|
||||
l = e.Size()
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if len(m.StoredEntries) > 0 {
|
||||
for _, e := range m.StoredEntries {
|
||||
l = e.Size()
|
||||
n += 1 + l + sovUpsidedown(uint64(l))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
n += len(m.XXX_unrecognized)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func sovUpsidedown(x uint64) (n int) {
|
||||
for {
|
||||
n++
|
||||
x >>= 7
|
||||
if x == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
func sozUpsidedown(x uint64) (n int) {
|
||||
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
|
||||
}
|
||||
func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field == nil {
|
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
} else {
|
||||
data[i] = 0x8
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
|
||||
}
|
||||
if len(m.Terms) > 0 {
|
||||
for _, s := range m.Terms {
|
||||
data[i] = 0x12
|
||||
i++
|
||||
l = len(s)
|
||||
for l >= 1<<7 {
|
||||
data[i] = uint8(uint64(l)&0x7f | 0x80)
|
||||
l >>= 7
|
||||
i++
|
||||
}
|
||||
data[i] = uint8(l)
|
||||
i++
|
||||
i += copy(data[i:], s)
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if m.Field == nil {
|
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
|
||||
} else {
|
||||
data[i] = 0x8
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
|
||||
}
|
||||
if len(m.ArrayPositions) > 0 {
|
||||
for _, num := range m.ArrayPositions {
|
||||
data[i] = 0x10
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(num))
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Marshal() (data []byte, err error) {
|
||||
size := m.Size()
|
||||
data = make([]byte, size)
|
||||
n, err := m.MarshalTo(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
|
||||
var i int
|
||||
_ = i
|
||||
var l int
|
||||
_ = l
|
||||
if len(m.TermsEntries) > 0 {
|
||||
for _, msg := range m.TermsEntries {
|
||||
data[i] = 0xa
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
|
||||
n, err := msg.MarshalTo(data[i:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
i += n
|
||||
}
|
||||
}
|
||||
if len(m.StoredEntries) > 0 {
|
||||
for _, msg := range m.StoredEntries {
|
||||
data[i] = 0x12
|
||||
i++
|
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
|
||||
n, err := msg.MarshalTo(data[i:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
i += n
|
||||
}
|
||||
}
|
||||
if m.XXX_unrecognized != nil {
|
||||
i += copy(data[i:], m.XXX_unrecognized)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int {
|
||||
data[offset] = uint8(v)
|
||||
data[offset+1] = uint8(v >> 8)
|
||||
data[offset+2] = uint8(v >> 16)
|
||||
data[offset+3] = uint8(v >> 24)
|
||||
data[offset+4] = uint8(v >> 32)
|
||||
data[offset+5] = uint8(v >> 40)
|
||||
data[offset+6] = uint8(v >> 48)
|
||||
data[offset+7] = uint8(v >> 56)
|
||||
return offset + 8
|
||||
}
|
||||
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int {
|
||||
data[offset] = uint8(v)
|
||||
data[offset+1] = uint8(v >> 8)
|
||||
data[offset+2] = uint8(v >> 16)
|
||||
data[offset+3] = uint8(v >> 24)
|
||||
return offset + 4
|
||||
}
|
||||
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
|
||||
for v >= 1<<7 {
|
||||
data[offset] = uint8(v&0x7f | 0x80)
|
||||
v >>= 7
|
||||
offset++
|
||||
}
|
||||
data[offset] = uint8(v)
|
||||
return offset + 1
|
||||
}
|
14
index/upsidedown/upsidedown.proto
Normal file
14
index/upsidedown/upsidedown.proto
Normal file
|
@ -0,0 +1,14 @@
|
|||
message BackIndexTermsEntry {
|
||||
required uint32 field = 1;
|
||||
repeated string terms = 2;
|
||||
}
|
||||
|
||||
message BackIndexStoreEntry {
|
||||
required uint32 field = 1;
|
||||
repeated uint64 arrayPositions = 2;
|
||||
}
|
||||
|
||||
message BackIndexRowValue {
|
||||
repeated BackIndexTermsEntry termsEntries = 1;
|
||||
repeated BackIndexStoreEntry storedEntries = 2;
|
||||
}
|
1529
index/upsidedown/upsidedown_test.go
Normal file
1529
index/upsidedown/upsidedown_test.go
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue