Connection
cqlsh # local connect
cqlsh <host> <port> # remote
cqlsh -u <user> -p <pass> # with auth
Keyspace Ops
-- create
CREATE KEYSPACE ks_name
WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};
-- use
USE ks_name;
-- list all
DESCRIBE keyspaces;
-- drop
DROP KEYSPACE ks_name;
Table Ops
-- create
CREATE TABLE users (
user_id UUID PRIMARY KEY,
name TEXT,
email TEXT,
created_at TIMESTAMP
);
-- with clustering
CREATE TABLE events (
user_id UUID,
event_time TIMESTAMP,
event_type TEXT,
PRIMARY KEY (user_id, event_time)
) WITH CLUSTERING ORDER BY (event_time DESC);
-- describe
DESCRIBE TABLE table_name;
-- alter
ALTER TABLE users ADD phone TEXT;
-- drop
DROP TABLE users;
CRUD
-- insert
INSERT INTO users (user_id, name, email)
VALUES (uuid(), 'John', '[email protected]');
-- with TTL
INSERT INTO users (...) VALUES (...) USING TTL 86400;
-- select
SELECT * FROM users;
SELECT * FROM users WHERE user_id = <uuid>;
SELECT * FROM users LIMIT 10;
-- update
UPDATE users SET name='Jane' WHERE user_id=<uuid>;
-- delete
DELETE FROM users WHERE user_id=<uuid>;
DELETE name FROM users WHERE user_id=<uuid>; # delete column
Indexes
-- create secondary index
CREATE INDEX ON users (email);
-- drop
DROP INDEX users_email_idx;
Batch Operations
BEGIN BATCH
INSERT INTO users (...) VALUES (...);
UPDATE users SET ... WHERE ...;
DELETE FROM users WHERE ...;
APPLY BATCH;
Useful Queries
-- count (expensive!)
SELECT COUNT(*) FROM users;
-- token range
SELECT * FROM users WHERE token(user_id) > token(<uuid>);
-- allow filtering (use carefully)
SELECT * FROM users WHERE email='[email protected]' ALLOW FILTERING;
Admin/Utility
DESCRIBE CLUSTER;
DESCRIBE SCHEMA;
CONSISTENCY QUORUM; # set consistency level
TRACING ON; # enable query tracing
SOURCE '/path/to/file.cql'; # execute file
Data Types (common)
- TEXT, VARCHAR
- INT, BIGINT, SMALLINT
- FLOAT, DOUBLE, DECIMAL
- BOOLEAN
- UUID, TIMEUUID
- TIMESTAMP, DATE, TIME
- BLOB
- SET, LIST, MAP<type,type>
# cluster status
nodetool status # cluster ring status
nodetool info # node info
nodetool describecluster # cluster details
nodetool ring # token ring
# maintenance
nodetool repair # repair all keyspaces
nodetool repair -full # full repair
nodetool repair ks_name table_name # specific table
nodetool compact # force compaction
nodetool cleanup # cleanup after topology change
nodetool flush # flush memtables to disk
# performance
nodetool tpstats # thread pool stats
nodetool tablestats # table statistics
nodetool cfstats # columnfamily stats (old)
nodetool tablehistograms ks table # latency histograms
nodetool proxyhistograms # coordinator stats
# monitoring
nodetool netstats # network info
nodetool gcstats # GC stats
nodetool statusbinary # native protocol status
nodetool statusthrift # thrift status
nodetool compactionstats # compaction progress
# node ops
nodetool drain # stop writes, flush
nodetool stopdaemon # stop cassandra
nodetool assassinate <ip> # force remove dead node
nodetool removenode <host_id> # remove node properly
nodetool decommission # leave cluster gracefully
# snapshots
nodetool snapshot -t snap_name # create snapshot
nodetool listsnapshots # list all snapshots
nodetool clearsnapshot -t name # clear snapshot
# cache
nodetool invalidatekeycache
nodetool invalidaterowcache
# misc
nodetool settraceprobability 0.1 # set trace sampling
nodetool getlogginglevels # check log levels
nodetool setlogginglevel class LEVEL
sstableloader -d <host> <sstable_dir> # bulk load
sstablelevelreset <ks> <table> # reset levels
sstablemetadata <sstable_file> # view metadata
sstableutil <ks> <table> # list sstables
sstabledump <sstable_file> # dump as JSON
sstablescrub <ks> <table> # fix corrupted sstables
Config Files
# important files
/etc/cassandra/cassandra.yaml # main config
/etc/cassandra/cassandra-env.sh # JVM settings
/var/log/cassandra/system.log # main log
/var/lib/cassandra/data/ # data dir
/var/lib/cassandra/commitlog/ # commit logs
Key Yaml Settings
cluster_name
seeds: "ip1,ip2,ip3"
listen_address
rpc_address
data_file_directories
commitlog_directory
concurrent_reads: 32
concurrent_writes: 32
memtable_flush_writers: 4
compaction_throughput_mb_per_sec: 64
JVM Tuning (cassandra-env.sh)
# heap size
MAX_HEAP_SIZE="8G"
HEAP_NEWSIZE="800M"
# GC (example G1GC)
JVM_OPTS="$JVM_OPTS -XX:+UseG1GC"
Backup/Restore
# backup
nodetool snapshot -t backup_name
# find snapshots
find /var/lib/cassandra/data -name snapshots
# restore (stop node first)
# copy snapshot files to table dir
# then restart and run:
nodetool refresh ks_name table_name
# check disk IO
iostat -x 5
# check network
iftop
nodetool netstats
# check compaction backlog
nodetool compactionstats
# check pending tasks
nodetool tpstats | grep -i pending
Remember
- Primary key = partition key + clustering columns
- Can only query by partition key or full primary key
- ORDER BY only on clustering columns
- No JOINs - denormalize!
- ALLOW FILTERING is slow - avoid in prod
- Use BATCH for same partition only (performance)
- TTL in seconds
- Always run
nodetool repair regularly
- Use
cleanup after adding/removing nodes
- Monitor pending compactions
- GC pauses > 1s are bad