Phoenix创建索引源码过程

org.apache.phoenix.index.IndexMaintainer

public byte[] buildRowKey(ValueGetter valueGetter, ImmutableBytesWritable rowKeyPtr, byte[] regionStartKey, byte[] regionEndKey, long ts)  {
    ImmutableBytesWritable ptr = new ImmutableBytesWritable();

//判断是否是构建本地索引，考虑两个条件：1.本地索引是否开启 2.startRK 是否传进来了
    boolean prependRegionStartKey = isLocalIndex && regionStartKey != null;
    boolean isIndexSalted = !isLocalIndex && nIndexSaltBuckets > 0;

//如果开启本地索引，则在数据前面添加前缀，判断startRK是否是region起始startRK，如果是则使用该region的EndRK
    int prefixKeyLength =
            prependRegionStartKey ? (regionStartKey.length != 0 ? regionStartKey.length
                    : regionEndKey.length) : 0;
    TrustedByteArrayOutputStream stream = new TrustedByteArrayOutputStream(estimatedIndexRowKeyBytes + (prependRegionStartKey ? prefixKeyLength : 0));

// 构建数据流对象，对数据进行put
    DataOutput output = new DataOutputStream(stream);

如果是本地索引，则在rowkey前加入startrowkey索引

// For local indexes, we must prepend the row key with the start region key
if (prependRegionStartKey) {
    if (regionStartKey.length == 0) {
        // 如果startRK为null，则其实使用的endRK
        output.write(new byte[prefixKeyLength]);
    } else {
        output.write(regionStartKey);
    }
}

判断是否有加盐，如果有，则增加一个标志位，后面再更改这个标志位

1
2
3

if (isIndexSalted) {
    output.write(0); // will be set at end to index salt byte
}

如果在索引视图id不为null，会在索引rowkey中加入视图id

1
2
3

if (viewIndexId != null) {
    output.write(viewIndexId);
}

dataRowKeySchema是数据表的信息，忽略在视图变量的中常量值，并标记出原表pk的rowkey的offset 和 length，方便后面定位数据表rowkey插入。

for (int i = dataPosOffset; i < dataRowKeySchema.getFieldCount(); i++) {
    Boolean hasValue=dataRowKeySchema.next(ptr, i, maxRowKeyOffset);
    // Ignore view constants from the data table, as these
    // don't need to appear in the index (as they're the
    // same for all rows in this index)
    if (!viewConstantColumnBitSet.get(i)) {
        int pos = rowKeyMetaData.getIndexPkPosition(i-dataPosOffset);
        if (Boolean.TRUE.equals(hasValue)) {
            dataRowKeyLocator[0][pos] = ptr.getOffset();
            dataRowKeyLocator[1][pos] = ptr.getLength();
        } else {
            dataRowKeyLocator[0][pos] = 0;
            dataRowKeyLocator[1][pos] = 0;
        }
    }
}

考虑索引的数据的顺序

// 获取表达式索引，表达式索引默认值都为1，未开启的时候isNullAble为true

Iterator<Expression> expressionIterator = indexedExpressions.iterator();

//  nIndexedColumns 的构成是索引列+主键 如果是组合索引，则循环多个索引列
for (int i = 0; i < nIndexedColumns; i++) {
    PDataType dataColumnType;
    boolean isNullable;
    SortOrder dataSortOrder;

// dataPkPosition为-1则表示为表达式索引，否则为属性索引
    if (dataPkPosition[i] == EXPRESSION_NOT_PRESENT) {
       Expression expression = expressionIterator.next();
       dataColumnType = expression.getDataType();
       dataSortOrder = expression.getSortOrder();
        isNullable = expression.isNullable();
       expression.evaluate(new ValueGetterTuple(valueGetter, ts), ptr);
    }

//  主键pk 走这个分支
    else {
        Field field = dataRowKeySchema.getField(dataPkPosition[i]);
        dataColumnType = field.getDataType();
        ptr.set(rowKeyPtr.get(), dataRowKeyLocator[0][i], dataRowKeyLocator[1][i]);
        dataSortOrder = field.getSortOrder();
        isNullable = field.isNullable();
    }

// 考虑列值的顺序，考虑字节的比较，考虑索引列的顺序

// 判断查询是否desc，默认为asc。
    boolean isDataColumnInverted = dataSortOrder != SortOrder.ASC;

// 获取索引列的的数据类型，详情看后面getIndexColumnDataType函数
    PDataType indexColumnType = IndexUtil.getIndexColumnDataType(isNullable, dataColumnType);

//根据数据列返回不同的datatype，判断该列是否可比较。不可比较的列有decimal，varchar，boolean，Binary
    boolean isBytesComparable = dataColumnType.isBytesComparableWith(indexColumnType);

// 获取列是否是逆序的
    boolean isIndexColumnDesc = descIndexColumnBitSet.get(i);
    if (isBytesComparable && isDataColumnInverted == isIndexColumnDesc) {
        output.write(ptr.get(), ptr.getOffset(), ptr.getLength());
    } else {
        if (!isBytesComparable)  {

// 让不可比较的类型具有可比性
            indexColumnType.coerceBytes(ptr, dataColumnType, dataSortOrder, SortOrder.getDefault());
        }

// 按位取异或值，二进制数比较肯定是字典序，从最高位开始比较，直到遇到第一个不一样的位，这个位上哪个数等于1哪个数就较大。 
        if (isDataColumnInverted != isIndexColumnDesc) {
            writeInverted(ptr.get(), ptr.getOffset(), ptr.getLength(), output);
        } else {
            output.write(ptr.get(), ptr.getOffset(), ptr.getLength());
        }
    }

// 判断数据是不是一个固定长度的字段，如果不是根据数据的正序逆序添加一个标志位
    if (!indexColumnType.isFixedWidth()) {
        output.writeByte(SchemaUtil.getSeparatorByte(rowKeyOrderOptimizable, ptr.getLength() == 0, isIndexColumnDesc ? SortOrder.DESC : SortOrder.ASC));
    }
}

填充开始的加盐部分的字节位，规则是根据数据做hash，然后再对nIndexSaltBuckets取余

if (isIndexSalted) {
    // Set salt byte
    byte saltByte = SaltingUtil.getSaltingByte(indexRowKey, SaltingUtil.NUM_SALTING_BYTES, length-SaltingUtil.NUM_SALTING_BYTES, nIndexSaltBuckets);
    indexRowKey[0] = saltByte;
}

返回所有的生成的rowkey

1	return indexRowKey.length == length ? indexRowKey : Arrays.copyOf(indexRowKey, length);

根据数据列返回不同的datatype，判断该列是否可比较。不可比较的列有decimal，varchar，boolean，Binary等

// Since we cannot have nullable fixed length in a row key
// we need to translate to variable length. The verification that we have a valid index
// row key was already done, so here we just need to convert from one built-in type to
// another.
public static PDataType getIndexColumnDataType(boolean isNullable, PDataType dataType) {
    if (dataType == null || !isNullable || !dataType.isFixedWidth()) {
        return dataType;
    }
    // for fixed length numeric types and boolean
    if (dataType.isCastableTo(PDecimal.INSTANCE)) {
        return PDecimal.INSTANCE;
    }
    // for CHAR
    if (dataType.isCoercibleTo(PVarchar.INSTANCE)) {
        return PVarchar.INSTANCE;
    }

    if (PBinary.INSTANCE.equals(dataType)) {
        return PVarbinary.INSTANCE;
    }
    throw new IllegalArgumentException("Unsupported non nullable type " + dataType);
}

让数据有可比性

protected static int toBytes(BigDecimal v, byte[] result, final int offset, int length) {
    // From scale to exponent byte (if BigDecimal is positive): (-(scale+(scale % 2 == 0 : 0 : 1)) / 2 + 65) | 0x80
    // If scale % 2 is 1 (i.e. it's odd), then multiple last base-100 digit by 10
    // For example: new BigDecimal(BigInteger.valueOf(1), -4);
    // (byte)((-(-4+0) / 2 + 65) | 0x80) = -61
    // From scale to exponent byte (if BigDecimal is negative): ~(-(scale+1)/2 + 65 + 128) & 0x7F
    // For example: new BigDecimal(BigInteger.valueOf(1), 2);
    // ~(-2/2 + 65 + 128) & 0x7F = 63