Getting Started

Installation

You can install QuantileFlow with pip:

pip install QuantileFlow

Basic Usage

DDSketch

from QuantileFlow import DDSketch

# Create a DDSketch with 1% relative accuracy
sketch = DDSketch(relative_accuracy=0.01)

# Add data points
for value in [1.0, 2.5, 3.0, 4.2, 5.0, 6.8, 7.5, 8.1, 9.2, 10.0]:
    sketch.insert(value)

# Query quantiles
median = sketch.quantile(0.5)  # Get the median
p99 = sketch.quantile(0.99)    # Get the 99th percentile

print(f"Median: {median}")
print(f"99th percentile: {p99}")

Different Mapping Types

# Using logarithmic mapping (default)
log_sketch = DDSketch(relative_accuracy=0.01, mapping_type='logarithmic')

# Using linear interpolation mapping
lin_sketch = DDSketch(relative_accuracy=0.01, mapping_type='lin_interpol')

# Using cubic interpolation mapping
cub_sketch = DDSketch(relative_accuracy=0.01, mapping_type='cub_interpol')

MomentSketch

from QuantileFlow import MomentSketch

# Create a MomentSketch with 20 moments
sketch = MomentSketch(num_moments=20)

# Add data points
for value in range(1000):
    sketch.insert(value)

# Query quantiles
median = sketch.quantile(0.5)
p95 = sketch.quantile(0.95)

# Get summary statistics
stats = sketch.summary_statistics()

print(f"Median: {median}")
print(f"95th percentile: {p95}")
print(f"Summary statistics: {stats}")

HDRHistogram

from QuantileFlow import HDRHistogram

# Create an HDRHistogram with 8 buckets
histogram = HDRHistogram(num_buckets=8)

# Add data points
for value in [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0]:
    histogram.insert(value)

# Query quantiles
median = histogram.quantile(0.5)
p95 = histogram.quantile(0.95)

# Get summary statistics
stats = histogram.summary_statistics()

print(f"Median: {median}")
print(f"95th percentile: {p95}")
print(f"Summary statistics: {stats}")

# Visualize the distribution
fig = histogram.plot_distribution()

Merging Sketches

All three algorithms support merging for distributed processing:

from QuantileFlow import DDSketch

# Create two histograms
hist1 = DDSketch(0.01)
hist2 = DDSketch(0.01)

# Add different data to each
for i in range(100):
    hist1.insert(i)
    
for i in range(100, 200):
    hist2.insert(i)
    
# Merge hist2 into hist1
hist1.merge(hist2)

# Now hist1 contains the combined data
print(f"Median of combined data: {hist1.quantile(0.5)}")

Advanced Configuration

DDSketch Storage Options

# Using contiguous storage (default)
contiguous_sketch = DDSketch(
    relative_accuracy=0.01,
    bucket_strategy=BucketManagementStrategy.FIXED
)

# Using sparse storage for widely distributed data
sparse_sketch = DDSketch(
    relative_accuracy=0.01,
    bucket_strategy=BucketManagementStrategy.UNLIMITED
)

# Using dynamic bucket management
dynamic_sketch = DDSketch(
    relative_accuracy=0.01,
    bucket_strategy=BucketManagementStrategy.DYNAMIC
)

MomentSketch with Compression

from QuantileFlow import MomentSketch

# Create a MomentSketch with compression
sketch = MomentSketch(num_moments=20, compress_values=True)

# Add data
for value in range(1000):
    sketch.insert(value)

# Query results as normal
median = sketch.quantile(0.5)
print(f"Compressed sketch median: {median}")

# Visualize the distribution
fig = sketch.plot_distribution()

HDRHistogram with Value Range Control

from QuantileFlow import HDRHistogram

# Create an HDRHistogram with value range control
histogram = HDRHistogram(
    num_buckets=8,
    min_value=1.0,    # Minimum trackable value
    max_value=1000.0  # Maximum trackable value
)

# Add data
for value in range(1, 1001):
    histogram.insert(value)

# Query results as normal
median = histogram.quantile(0.5)
print(f"Histogram median: {median}")

# Visualize the distribution
fig = histogram.plot_distribution()