Getting Started

Install and use forgetless in your project.

Installation

bash
cargo add forgetless

Or add to your Cargo.toml:

toml
[dependencies]
forgetless = "0.1"

Optional Features

bash
# With HTTP server
cargo add forgetless --features server
# With GPU acceleration (macOS Apple Silicon)
cargo add forgetless --features metal
# With GPU acceleration (NVIDIA)
cargo add forgetless --features cuda

Server Binary

bash
cargo install forgetless --features server
forgetless-server # Runs on http://localhost:8080

Basic Usage

rust
use forgetless::{Forgetless, Config};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let result = Forgetless::new()
.config(Config::default().context_limit(10_000))
.add(&large_content)
.run()
.await?;
println!("{}", result.content);
Ok(())
}
output
Input:  247,000 tokens
Output: 9,842 tokens

Config Options

rust
use forgetless::{Forgetless, Config};
let result = Forgetless::new()
.config(Config::default()
.context_limit(128_000) // Max output tokens
.vision_llm(true) // LLM for image descriptions
.context_llm(true) // LLM for smart scoring
.chunk_size(256) // Target chunk size
.parallel(true) // Parallel processing
.cache(true)) // Embedding cache
.add_file("diagram.png")
.add_file("research.pdf")
.query("Explain the architecture")
.run()
.await?;

Priority

rust
use forgetless::{Forgetless, Config, WithPriority};
let result = Forgetless::new()
.config(Config::default().context_limit(50_000))
.add(WithPriority::critical("System prompt")) // Always kept
.add(WithPriority::high(&conversation)) // High priority
.add(&documents) // Medium (default)
.add(WithPriority::low(&logs)) // Low priority
.run()
.await?;

Files

rust
use forgetless::{Forgetless, Config, FileWithPriority};
let result = Forgetless::new()
.config(Config::default().context_limit(100_000))
.add_file("README.md")
.add_file(FileWithPriority::high("main.rs"))
.add_files(&["lib.rs", "config.rs"])
.run()
.await?;

Result

rust
result.content // Optimized content
result.total_tokens // Output token count
result.stats.input_tokens // Input token count
result.stats.compression_ratio // e.g., 14.5
result.stats.chunks_processed // Total chunks
result.stats.chunks_selected // Selected chunks