WL#2223: NdbRecord
Affects: Server-6.0
—
Status: Complete
Introduce NdbRecord
NdbRecord is linear memory per operation
NdbRecord can be mapped (almost) directly to mysql record
Lots of code can be removed and
Lots of optimizations can be added if using records.
It's further more quite simple to make this backward compatible
---
a NdbRecord-object provides mapping to a full or a partial record stored in ndb
a NdbRecord can also be used together with bitmap for access of partial NdbRecord
a NdbRecord is prepared by API program
a NdbRecord is validated and finalized by ndbapi
a NdbRecord-object can be used simultanious
in several operations
in several transactions
in several threads
(i.e the actual NdbRecord object should not contain any data references)
Usage of NdbRecord can be phased (as devliverables or internally as)
* uk read
* pk read
* table scan
* index scan
* pk dml
* uk dml
* index bounds
* wl1496 is concidered depandant on this wl, as it will be soo much simpler
to impl. once this is done
---
Logically readTuple(NdbRecord) <=> readTuples() getValue on each col in record
insertTuple(NdbRecord) <=> inserTuple() setValue on each col in record
etc...
NdbRecord *record= dict->createRecord(table, record_specification,
number_of_coulmns, sizeof(record_specification[0]);
NdbRecord *key_record= dict->createRecord(table, record_specification,
number_of_key_coulmns, sizeof(record_specification[0]);
char* buf0 = malloc(sizeof(TUPLE));
char* buf1 = malloc(sizeof(TUPLE));
char* buf2 = malloc(sizeof(TUPLE));
// set key in buf0
// set key in buf1
// Read all columns in record into buf2
op0 = pTrans->readTuple(key_record, buf0, record, buf2,LM_Read, 0);
// Read all columns in record & columnmask
op0 = pTrans->readTuple(key_record, buf0, record, buf2,LM_Read, columnmask);
API:
struct RecordSpecification {
/*
Column described by this entry (the column maximum size defines field
size in row).
Note that even when creating an NdbRecord for an index, the column
pointers must be to columns obtained from the underlying table, not
from the index itself.
*/
const Column *column;
/* Offset of data from start of a row. */
Uint32 offset;
/* Offset from start of row of byte containing NULL bit. */
Uint32 nullbit_byte_offset;
/* NULL bit, 0-7. */
Uint32 nullbit_bit_in_byte;
};
class NdbDictionary {
...
/*
Create an NdbRecord for use in table operations.
*/
NdbRecord *createRecord(const Table *table,
const RecordSpecification *recSpec,
Uint32 length,
Uint32 elemSize);
/*
Create an NdbRecord for use in index operations.
*/
NdbRecord *createRecord(const Index *index,
const Table *table,
const RecordSpecification *recSpec,
Uint32 length,
Uint32 elemSize);
void releaseRecord(NdbRecord *rec);
};
class NdbTransaction {
...
/* Primary key NdbRecord operations. */
NdbOperation *readTuple(const NdbRecord *key_rec, const char *key_row,
const NdbRecord *result_rec, char *result_row,
NdbOperation::LockMode lock_mode= NdbOperation::LM_Read,
const unsigned char *result_mask= 0);
NdbOperation *insertTuple(const NdbRecord *rec, const char *row,
const unsigned char *mask= 0);
NdbOperation *updateTuple(const NdbRecord *key_rec, const char *key_row,
const NdbRecord *attr_rec, const char *attr_row,
const unsigned char *mask= 0);
NdbOperation *writeTuple(const NdbRecord *key_rec, const char *key_row,
const NdbRecord *attr_rec, const char *attr_row,
const unsigned char *mask);
NdbOperation *deleteTuple(const NdbRecord *key_rec, const char *key_row);
/*
Scan a table, using NdbRecord to read out column data.
The result_record pointer must remain valid until after the call to
execute().
The result_mask pointer is optional, if present only columns for which
the corresponding bit in result_mask is set will be retrieved in the
scan. The result_mask is copied internally, so in contrast to
result_record need not be valid at execute().
The parallel argument is the desired parallelism, or 0 for maximum
parallelism (receiving rows from all fragments in parallel).
*/
NdbScanOperation *
scanTable(const NdbRecord *result_record,
NdbOperation::LockMode lock_mode= NdbOperation::LM_Read,
const unsigned char *result_mask= 0,
Uint32 scan_flags= 0,
Uint32 parallel= 0,
Uint32 batch= 0);
/*
Do an index range scan (optionally ordered) of a table.
The key_record describes the index to be scanned. It must be a
primary key record for the index, ie. it must specify exactly the
key columns of the index.
The result_record describes the rows to be returned from the scan. For an
ordered index scan, result_record must be a key record for the index to
be scanned, that is it must include at least all of the column in the
index.
Both the key_record and the result_record must be created from the Index
to be scanned, not from the underlying table.
The call uses a callback function as a flexible way of specifying multiple
range bounds. The callback will be called once for each bound to define
lower and upper key value etc.
The callback received a private callback_data void *, and the index of the
bound (0 .. num_key_bounds). However, it is guaranteed that it will be
called in ordered sequence, so it is permissible to ignore the passed
bound_index and just return the values for the next bound (for example
if data is kept in a linked list).
The callback can return 0 to denote success, and -1 to denote error (the
latter causing the creation of the NdbIndexScanOperation to fail).
This multi-range method is only for use in mysqld code.
*/
private:
NdbIndexScanOperation *
scanIndex(const NdbRecord *key_record,
int (*get_bound_callback)(void *callback_data,
Uint32 bound_index,
NdbIndexScanOperation::IndexBound & bound),
void *callback_data,
Uint32 num_key_bounds,
const NdbRecord *result_record,
NdbOperation::LockMode lock_mode= NdbOperation::LM_Read,
const unsigned char *result_mask= 0,
Uint32 scan_flags= 0,
Uint32 parallel= 0,
Uint32 batch= 0);
public:
/* A convenience wrapper for simpler specification of a single bound. */
NdbIndexScanOperation *
scanIndex(const NdbRecord *key_record,
const char *low_key,
Uint32 low_key_count,
bool low_inclusive,
const char * high_key,
Uint32 high_key_count,
bool high_inclusive,
const NdbRecord *result_record,
NdbOperation::LockMode lock_mode= NdbOperation::LM_Read,
const unsigned char *result_mask= 0,
Uint32 scan_flags= 0,
Uint32 parallel= 0,
Uint32 batch= 0);
};
class NdbScanOperation {
...
/*
NdbRecord version of nextResult.
This sets a pointer to the next row in out_row (if returning 0). This
pointer is valid (only) until the next call to nextResult() with
fetchAllowed==true.
The NdbRecord object defining the row format was specified in the
NdbTransaction::scanTable (or scanIndex) call.
*/
int nextResult(const char * & out_row,
bool fetchAllowed = true, bool forceSend = false);
/*
NdbRecord versions of scan lock take-over operations.
Note that calling NdbRecord scan lock take-over on an NdbRecAttr-style
scan is not valid, nor is calling NdbRecAttr-style scan lock take-over
on an NdbRecord-style scan.
*/
/*
Take over the lock without changing the row.
Optionally also read from the row (call with default value NULL for row
to not read any attributes.).
The NdbRecord * is required even when not reading any attributes.
*/
NdbOperation *lockCurrentTuple(NdbTransaction *takeOverTrans,
const NdbRecord *record,
char *row= 0,
const unsigned char *mask= 0);
/*
Update the current tuple, NdbRecord version.
Values to update with are contained in the passed-in row.
*/
NdbOperation *updateCurrentTuple(NdbTransaction *takeOverTrans,
const NdbRecord *record,
const char *row,
const unsigned char *mask= 0);
/* Delete the current tuple. */
NdbOperation *deleteCurrentTuple(NdbTransaction *takeOverTrans,
const NdbRecord *record);
};
class NdbIndexScanOperation {
...
/* Structure used to describe index scan bounds, for NdbRecord scans. */
struct IndexBound {
/* Row containing lower bound, or NULL for scan from the start. */
const char *low_key;
/* Number of columns in lower bound, for bounding by partial prefix. */
Uint32 low_key_count;
/* True for less-than-or-equal, false for strictly less-than. */
bool low_inclusive;
/* Row containing upper bound, or NULL for scan to the end. */
const char * high_key;
/* Number of columns in upper bound, for bounding by partial prefix. */
Uint32 high_key_count;
/* True for greater-than-or-equal, false for strictly greater-than. */
bool high_inclusive;
/*
Value to identify this bound, may be read with get_range_no().
Must be < 8192 (set to zero if not using range_no).
Note that for ordered scans, the range_no must be strictly increasing
for each range, or the result set will not be sorted correctly.
*/
Uint32 range_no;
};
};
- index scan bounds - index scan merge estimate two weeks from 25/1-07 - scan take-over - review comment pk operations jo: 3 days - bitfields jo: 3 days - unique index jo: 3 days - psuedo attibutes jo: 1 day sum: 2 + 2 weeks - blobs 1 week read, (preferably done during above) 1 week impl. (very uncertain) sum: 2 + 2 + 2 weeks
Copyright (c) 2000, 2025, Oracle Corporation and/or its affiliates. All rights reserved.