#pragma once
|
|
#include <cstdint>
|
|
namespace caffe2 {
|
|
namespace math {
|
|
// Returns the quantized and compressed values of floating inputs
|
// The "fused" representation stores the [bitwidth][tail][min][max]
|
// with the quantized data in one array. Since we store 8/bitwidth
|
// quantized data in one byte, the last buckets of some bytes may have
|
// unused bits. There are totally tail buckets are unused.
|
// We encode *bitwidth* and *tail* at the beginning,
|
// following by 32-bit floating data respresenting min and max.
|
// | bitwidth | tail | min | max | ... int8 data ... |
|
// | 1B | 1B | 4B | 4B | ...output_data....|
|
// In output_data: the b-th bucket of the i-th byte stores
|
// the i-th data of the b-th segment of input row
|
|
void quantize_and_compress(
|
const float* input_data,
|
std::uint8_t* output_data,
|
std::uint64_t input_size,
|
std::uint64_t bitwidth,
|
bool random,
|
const float* random_buffer);
|
|
void decompress_and_dequantize(
|
const std::uint8_t* input_data,
|
float* output_data,
|
std::uint64_t input_size);
|
|
} // namespace math
|
} // namespace caffe2
|