I recently had the issue that atuin got quite slow to search.
Using this statistic script I found that I had a few entries that were quite large:
import sqlite3
from collections import Counter
import os
def create_histogram(data, bins=30, width=60):
"""Create an ASCII histogram
Args:
data: List of values
bins: Number of bins
width: Width of the histogram in characters
"""
# Calculate min and max for bin ranges
min_val, max_val = min(data), max(data)
bin_size = (max_val - min_val) / bins
# Create bins
counts = Counter()
for value in data:
bin_index = int((value - min_val) / bin_size)
if bin_index == bins: # Handle edge case for max value
bin_index -= 1
counts[bin_index] += 1
# Find maximum count for scaling
max_count = max(counts.values()) if counts else 0
# Print histogram
for i in range(bins):
bin_start = min_val + i * bin_size
bin_end = bin_start + bin_size
count = counts[i]
# Calculate bar length
bar_length = int((count / max_count) * width) if max_count > 0 else 0
# Print bar
print(f"{bin_start:6.1f} - {bin_end:6.1f} | {'#' * bar_length} ({count})")
def format_command(command, max_length=80):
"""Format command for display with length limit and ellipsis"""
if len(command) > max_length:
return command[:max_length-3] + "..."
return command
def main():
# Use Atuin's default database path
db_path = os.path.expanduser("~/.local/share/atuin/history.db")
# Connect to database
conn = sqlite3.connect(db_path)
try:
cursor = conn.cursor()
# Query command lengths
cursor.execute("SELECT LENGTH(command), command FROM history WHERE deleted_at IS NULL")
results = cursor.fetchall()
if not results:
print("No commands found in database")
return
command_lengths = [row[0] for row in results]
# Print statistics
print("\nAtuin Command History Analysis")
print("-" * 70)
print(f"Total commands: {len(command_lengths):,}")
print(f"Min length: {min(command_lengths)} characters")
print(f"Max length: {max(command_lengths)} characters")
print(f"Average length: {sum(command_lengths)/len(command_lengths):.1f} characters")
print("\nLength Distribution Histogram:")
print("-" * 70)
# Create and display histogram
create_histogram(command_lengths)
# Print longest commands
print("\nTop 10 Longest Commands:")
print("-" * 70)
# Query and sort longest commands
cursor.execute("""
SELECT command, LENGTH(command) as len
FROM history
WHERE deleted_at IS NULL
ORDER BY len DESC
LIMIT 10
""")
longest_commands = cursor.fetchall()
for i, (command, length) in enumerate(longest_commands, 1):
formatted_command = format_command(command)
print(f"{i:2d}. [{length:4d}] {formatted_command}")
except sqlite3.Error as e:
print(f"Database error: {e}")
print(f"Make sure Atuin is installed and the database exists at: {db_path}")
except Exception as e:
print(f"Error: {e}")
finally:
conn.close()
if __name__ == "__main__":
main()
Afterwards I wrote a script that removes duplicates commands from history as well as large commands:
import sqlite3
import os
def get_file_size(path):
return os.path.getsize(path) / (1024 * 1024) # Convert to MB
def deduplicate_history() -> None:
path = os.path.expanduser("~/.local/share/atuin/history.db")
# Get initial size
initial_size = get_file_size(path)
print(f"Initial database size: {initial_size:.2f} MB")
# Connect to the database
conn = sqlite3.connect(path)
cursor = conn.cursor()
try:
# Get initial record count
cursor.execute("SELECT COUNT(*) FROM history")
initial_records = cursor.fetchone()[0]
# Begin transaction
cursor.execute("BEGIN TRANSACTION")
# Delete duplicates keeping the most recent entry
cursor.execute("""
DELETE FROM history
WHERE id IN (
SELECT h1.id
FROM history h1
JOIN (
SELECT command, cwd, MAX(timestamp) as max_timestamp
FROM history
GROUP BY command, cwd
HAVING COUNT(*) > 1
) h2
ON h1.command = h2.command
AND h1.cwd = h2.cwd
WHERE h1.timestamp < h2.max_timestamp
)
""")
duplicates_removed = cursor.rowcount
# Delete commands longer than 1000 bytes
cursor.execute("""
DELETE FROM history
WHERE LENGTH(command) > 1000
""")
long_commands_removed = cursor.rowcount
# Commit changes
conn.commit()
# Get size before vacuum
pre_vacuum_size = get_file_size(path)
# Vacuum the database
conn.execute("VACUUM")
# Get final statistics
cursor.execute("SELECT COUNT(*) FROM history")
final_records = cursor.fetchone()[0]
final_size = get_file_size(path)
print("\nOperation Summary:")
print(f"- Duplicate commands removed: {duplicates_removed}")
print(f"- Long commands removed: {long_commands_removed}")
print(f"- Records: {initial_records} → {final_records}")
print("\nSize Changes:")
print(f"- Initial size: {initial_size:.2f} MB")
print(f"- Pre-vacuum size: {pre_vacuum_size:.2f} MB")
print(f"- Final size: {final_size:.2f} MB")
print(
f"- Total space saved: {(initial_size - final_size):.2f} MB ({((initial_size - final_size) / initial_size * 100):.1f}%)"
)
except sqlite3.Error as e:
print(f"An error occurred: {e}")
conn.rollback()
finally:
conn.close()
if __name__ == "__main__":
deduplicate_history()
This brought the size down from 50M to 25M and my atuin search is significantly more snappy. You may not need to drop duplicates. In my case the most gain was by dropping large commands itself.