186 lines
5.4 KiB
Java
186 lines
5.4 KiB
Java
/* Copyright 2017 Google Inc. All Rights Reserved.
|
|
|
|
Distributed under MIT license.
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
*/
|
|
|
|
package org.brotli.enc;
|
|
|
|
import java.nio.Buffer;
|
|
import java.nio.ByteBuffer;
|
|
import java.nio.ByteOrder;
|
|
import java.nio.IntBuffer;
|
|
import java.nio.ShortBuffer;
|
|
|
|
/**
|
|
* Java prepared (raw) dictionary producer.
|
|
*/
|
|
public class PreparedDictionaryGenerator {
|
|
|
|
private static final int MAGIC = 0xDEBCEDE0;
|
|
private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L;
|
|
|
|
private static class PreparedDictionaryImpl implements PreparedDictionary {
|
|
private final ByteBuffer data;
|
|
|
|
private PreparedDictionaryImpl(ByteBuffer data) {
|
|
this.data = data;
|
|
}
|
|
|
|
@Override
|
|
public ByteBuffer getData() {
|
|
return data;
|
|
}
|
|
}
|
|
|
|
// Disallow instantiation.
|
|
private PreparedDictionaryGenerator() { }
|
|
|
|
public static PreparedDictionary generate(ByteBuffer src) {
|
|
return generate(src, 17, 3, 40, 5);
|
|
}
|
|
|
|
public static PreparedDictionary generate(ByteBuffer src,
|
|
int bucketBits, int slotBits, int hashBits, int blockBits) {
|
|
((Buffer) src).clear(); // Just in case...
|
|
if (blockBits > 12) {
|
|
throw new IllegalArgumentException("blockBits is too big");
|
|
}
|
|
if (bucketBits >= 24) {
|
|
throw new IllegalArgumentException("bucketBits is too big");
|
|
}
|
|
if (bucketBits - slotBits >= 16) {
|
|
throw new IllegalArgumentException("slotBits is too small");
|
|
}
|
|
int bucketLimit = 1 << blockBits;
|
|
int numBuckets = 1 << bucketBits;
|
|
int numSlots = 1 << slotBits;
|
|
int slotMask = numSlots - 1;
|
|
int hashShift = 64 - bucketBits;
|
|
long hashMask = (~0L) >>> (64 - hashBits);
|
|
int sourceSize = src.capacity();
|
|
if (sourceSize < 8) {
|
|
throw new IllegalArgumentException("src is too short");
|
|
}
|
|
|
|
/* Step 1: create "bloated" hasher. */
|
|
short[] num = new short[numBuckets];
|
|
int[] bucketHeads = new int[numBuckets];
|
|
int[] nextBucket = new int[sourceSize];
|
|
|
|
long accumulator = 0;
|
|
for (int i = 0; i < 7; ++i) {
|
|
accumulator |= (src.get(i) & 0xFFL) << (8 * i);
|
|
}
|
|
accumulator <<= 8;
|
|
/* TODO(eustas): apply custom "store" order. */
|
|
for (int i = 0; i + 7 < sourceSize; ++i) {
|
|
accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56);
|
|
long h = (accumulator & hashMask) * HASH_MULTIPLIER;
|
|
int key = (int) (h >>> hashShift);
|
|
int count = num[key];
|
|
nextBucket[i] = (count == 0) ? -1 : bucketHeads[key];
|
|
bucketHeads[key] = i;
|
|
count++;
|
|
if (count > bucketLimit) {
|
|
count = bucketLimit;
|
|
}
|
|
num[key] = (short) count;
|
|
}
|
|
|
|
/* Step 2: find slot limits. */
|
|
int[] slotLimit = new int[numSlots];
|
|
int[] slotSize = new int[numSlots];
|
|
int totalItems = 0;
|
|
for (int i = 0; i < numSlots; ++i) {
|
|
boolean overflow = false;
|
|
slotLimit[i] = bucketLimit;
|
|
while (true) {
|
|
overflow = false;
|
|
int limit = slotLimit[i];
|
|
int count = 0;
|
|
for (int j = i; j < numBuckets; j += numSlots) {
|
|
int size = num[j];
|
|
/* Last chain may span behind 64K limit; overflow happens only if
|
|
we are about to use 0xFFFF+ as item offset. */
|
|
if (count >= 0xFFFF) {
|
|
overflow = true;
|
|
break;
|
|
}
|
|
if (size > limit) {
|
|
size = limit;
|
|
}
|
|
count += size;
|
|
}
|
|
if (!overflow) {
|
|
slotSize[i] = count;
|
|
totalItems += count;
|
|
break;
|
|
}
|
|
slotLimit[i]--;
|
|
}
|
|
}
|
|
|
|
/* Step 3: transfer data to "slim" hasher. */
|
|
int part0 = 6 * 4;
|
|
int part1 = numSlots * 4;
|
|
int part2 = numBuckets * 2;
|
|
int part3 = totalItems * 4;
|
|
int allocSize = part0 + part1 + part2 + part3 + sourceSize;
|
|
ByteBuffer flat = ByteBuffer.allocateDirect(allocSize);
|
|
ByteBuffer pointer = flat.slice();
|
|
pointer.order(ByteOrder.nativeOrder());
|
|
|
|
IntBuffer struct = pointer.asIntBuffer();
|
|
pointer.position(pointer.position() + part0);
|
|
IntBuffer slotOffsets = pointer.asIntBuffer();
|
|
pointer.position(pointer.position() + part1);
|
|
ShortBuffer heads = pointer.asShortBuffer();
|
|
pointer.position(pointer.position() + part2);
|
|
IntBuffer items = pointer.asIntBuffer();
|
|
pointer.position(pointer.position() + part3);
|
|
ByteBuffer sourceCopy = pointer.slice();
|
|
|
|
/* magic */ struct.put(0, MAGIC);
|
|
/* source_offset */ struct.put(1, totalItems);
|
|
/* source_size */ struct.put(2, sourceSize);
|
|
/* hash_bits */ struct.put(3, hashBits);
|
|
/* bucket_bits */ struct.put(4, bucketBits);
|
|
/* slot_bits */ struct.put(5, slotBits);
|
|
|
|
totalItems = 0;
|
|
for (int i = 0; i < numSlots; ++i) {
|
|
slotOffsets.put(i, totalItems);
|
|
totalItems += slotSize[i];
|
|
slotSize[i] = 0;
|
|
}
|
|
|
|
for (int i = 0; i < numBuckets; ++i) {
|
|
int slot = i & slotMask;
|
|
int count = num[i];
|
|
if (count > slotLimit[slot]) {
|
|
count = slotLimit[slot];
|
|
}
|
|
if (count == 0) {
|
|
heads.put(i, (short) 0xFFFF);
|
|
continue;
|
|
}
|
|
int cursor = slotSize[slot];
|
|
heads.put(i, (short) cursor);
|
|
cursor += slotOffsets.get(slot);
|
|
slotSize[slot] += count;
|
|
int pos = bucketHeads[i];
|
|
for (int j = 0; j < count; j++) {
|
|
items.put(cursor++, pos);
|
|
pos = nextBucket[pos];
|
|
}
|
|
cursor--;
|
|
items.put(cursor, items.get(cursor) | 0x80000000);
|
|
}
|
|
|
|
sourceCopy.put(src);
|
|
|
|
return new PreparedDictionaryImpl(flat);
|
|
}
|
|
}
|