/* Copyright 2017 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ package org.brotli.enc; import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.IntBuffer; import java.nio.ShortBuffer; /** * Java prepared (raw) dictionary producer. */ public class PreparedDictionaryGenerator { private static final int MAGIC = 0xDEBCEDE0; private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L; private static class PreparedDictionaryImpl implements PreparedDictionary { private final ByteBuffer data; private PreparedDictionaryImpl(ByteBuffer data) { this.data = data; } @Override public ByteBuffer getData() { return data; } } // Disallow instantiation. private PreparedDictionaryGenerator() { } public static PreparedDictionary generate(ByteBuffer src) { return generate(src, 17, 3, 40, 5); } public static PreparedDictionary generate(ByteBuffer src, int bucketBits, int slotBits, int hashBits, int blockBits) { ((Buffer) src).clear(); // Just in case... if (blockBits > 12) { throw new IllegalArgumentException("blockBits is too big"); } if (bucketBits >= 24) { throw new IllegalArgumentException("bucketBits is too big"); } if (bucketBits - slotBits >= 16) { throw new IllegalArgumentException("slotBits is too small"); } int bucketLimit = 1 << blockBits; int numBuckets = 1 << bucketBits; int numSlots = 1 << slotBits; int slotMask = numSlots - 1; int hashShift = 64 - bucketBits; long hashMask = (~0L) >>> (64 - hashBits); int sourceSize = src.capacity(); if (sourceSize < 8) { throw new IllegalArgumentException("src is too short"); } /* Step 1: create "bloated" hasher. */ short[] num = new short[numBuckets]; int[] bucketHeads = new int[numBuckets]; int[] nextBucket = new int[sourceSize]; long accumulator = 0; for (int i = 0; i < 7; ++i) { accumulator |= (src.get(i) & 0xFFL) << (8 * i); } accumulator <<= 8; /* TODO(eustas): apply custom "store" order. */ for (int i = 0; i + 7 < sourceSize; ++i) { accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56); long h = (accumulator & hashMask) * HASH_MULTIPLIER; int key = (int) (h >>> hashShift); int count = num[key]; nextBucket[i] = (count == 0) ? -1 : bucketHeads[key]; bucketHeads[key] = i; count++; if (count > bucketLimit) { count = bucketLimit; } num[key] = (short) count; } /* Step 2: find slot limits. */ int[] slotLimit = new int[numSlots]; int[] slotSize = new int[numSlots]; int totalItems = 0; for (int i = 0; i < numSlots; ++i) { boolean overflow = false; slotLimit[i] = bucketLimit; while (true) { overflow = false; int limit = slotLimit[i]; int count = 0; for (int j = i; j < numBuckets; j += numSlots) { int size = num[j]; /* Last chain may span behind 64K limit; overflow happens only if we are about to use 0xFFFF+ as item offset. */ if (count >= 0xFFFF) { overflow = true; break; } if (size > limit) { size = limit; } count += size; } if (!overflow) { slotSize[i] = count; totalItems += count; break; } slotLimit[i]--; } } /* Step 3: transfer data to "slim" hasher. */ int part0 = 6 * 4; int part1 = numSlots * 4; int part2 = numBuckets * 2; int part3 = totalItems * 4; int allocSize = part0 + part1 + part2 + part3 + sourceSize; ByteBuffer flat = ByteBuffer.allocateDirect(allocSize); ByteBuffer pointer = flat.slice(); pointer.order(ByteOrder.nativeOrder()); IntBuffer struct = pointer.asIntBuffer(); pointer.position(pointer.position() + part0); IntBuffer slotOffsets = pointer.asIntBuffer(); pointer.position(pointer.position() + part1); ShortBuffer heads = pointer.asShortBuffer(); pointer.position(pointer.position() + part2); IntBuffer items = pointer.asIntBuffer(); pointer.position(pointer.position() + part3); ByteBuffer sourceCopy = pointer.slice(); /* magic */ struct.put(0, MAGIC); /* source_offset */ struct.put(1, totalItems); /* source_size */ struct.put(2, sourceSize); /* hash_bits */ struct.put(3, hashBits); /* bucket_bits */ struct.put(4, bucketBits); /* slot_bits */ struct.put(5, slotBits); totalItems = 0; for (int i = 0; i < numSlots; ++i) { slotOffsets.put(i, totalItems); totalItems += slotSize[i]; slotSize[i] = 0; } for (int i = 0; i < numBuckets; ++i) { int slot = i & slotMask; int count = num[i]; if (count > slotLimit[slot]) { count = slotLimit[slot]; } if (count == 0) { heads.put(i, (short) 0xFFFF); continue; } int cursor = slotSize[slot]; heads.put(i, (short) cursor); cursor += slotOffsets.get(slot); slotSize[slot] += count; int pos = bucketHeads[i]; for (int j = 0; j < count; j++) { items.put(cursor++, pos); pos = nextBucket[pos]; } cursor--; items.put(cursor, items.get(cursor) | 0x80000000); } sourceCopy.put(src); return new PreparedDictionaryImpl(flat); } }