Utility procedures are defined in readfx/sequtils.nim and
readfx/oligoutils.nim and re-exported by the top-level module.
No additional imports are needed.
DNA Sequence Operations
revCompl
proc revCompl*(sequence: string): string
proc revCompl*(record: var FQRecord) # in-place; also reverses quality
proc revCompl*(record: FQRecord): FQRecord # returns a new record
Reverse complements a DNA sequence. The in-place variant modifies both
sequence and quality (the quality string is
reversed to stay in sync).
let rc = revCompl("ATGCCC") # → "GGGCAT"
var rec: FQRecord = ...
revCompl(rec) # modify in place
let copy = revCompl(rec) # new reversed record
gcContent
proc gcContent*(sequence: string): float
proc gcContent*(record: FQRecord): float
Returns the GC fraction as a float in the range 0.0–1.0.
let gc = gcContent("ATGCATGC") # 0.5
echo (gc * 100).int, "%" # 50%
composition
proc composition*(record: FQRecord): SeqComp
Returns a SeqComp
object with per-base counts (A, C, G, T, N, Other) and a precomputed
GC fraction.
let comp = composition(record)
echo "A=", comp.A, " C=", comp.C, " G=", comp.G, " T=", comp.T
echo "N=", comp.N, " GC=", comp.GC
Quality Operations
ReadFX uses Sanger/Illumina 1.8+ encoding by default: offset 33.
All quality procedures accept an optional offset parameter if you
need to handle legacy encodings.
qualCharToInt / qualIntToChar
proc qualCharToInt*(c: char, offset: int = 33): int
proc qualIntToChar*(q: int, offset: int = 33): char
Convert between quality characters and Phred integer scores.
let q = qualCharToInt('I') # 73 - 33 = 40
let c = qualIntToChar(30) # '?' (ASCII 30 + 33 = 63)
avgQuality
proc avgQuality*(record: FQRecord, offset: int = 33): float
proc avgQuality*(quality: string, offset: int = 33): float
Returns the mean Phred quality score as a float.
let avg = avgQuality(record)
echo "Mean Q: ", avg # e.g. 34.7
trimQuality
proc trimQuality*(quality: string, minQual: int, offset: int = 33): string
Trims trailing bases below minQual from a quality string
and returns the trimmed string. Does not modify the sequence — use
qualityTrim to trim both together.
qualityTrim
proc qualityTrim*(record: var FQRecord, minQual: int, offset: int = 33)
Trims both sequence and quality in place,
removing 3′ bases with Phred score below minQual.
var r = record
qualityTrim(r, 20) # remove 3′ bases with Q < 20
echo r.sequence.len # length may be shorter
maskLowQuality
proc maskLowQuality*(record: var FQRecord, minQual: int,
offset: int = 33, maskChar: char = 'N')
Replaces bases with Phred score below minQual with
maskChar (default 'N') without changing the
length of the record. Useful when you want to preserve alignment
positions but flag unreliable bases.
var r = record
maskLowQuality(r, 20) # replace Q<20 with 'N'
maskLowQuality(r, 15, maskChar='X') # or any character
Record Manipulation
subSequence
proc subSequence*(record: FQRecord, start: int, length: int = -1): FQRecord
Returns a new record containing a slice of the sequence (and the
corresponding quality bases). length = -1 means "to the end".
let first50 = subSequence(record, 0, 50) # bases 0..49
let fromPos = subSequence(record, 10) # bases 10..end
trimStart / trimEnd
proc trimStart*(record: FQRecord, bases: int): FQRecord
proc trimEnd*(record: FQRecord, bases: int): FQRecord
Remove a fixed number of bases from the 5′ or 3′ end. Return a new
FQRecord (non-destructive).
let clipped = trimStart(record, 5) # remove first 5 bases
let cropped = trimEnd(record, 3) # remove last 3 bases
IUPAC Primer Matching
These functions are in readfx/oligoutils.nim and re-exported
by the main module. They support IUPAC ambiguity codes in primers:
R (A/G), Y (C/T), S (G/C),
W (A/T), K (G/T), M (A/C),
B, D, H, V,
and N (any).
matchIUPAC
proc matchIUPAC*(primerBase, seqBase: char): bool
Returns true if seqBase is compatible with
primerBase under IUPAC rules. The code is asymmetric —
ambiguity codes are only recognised in the primer.
echo matchIUPAC('R', 'A') # true — R matches A or G
echo matchIUPAC('R', 'C') # false
echo matchIUPAC('N', 'T') # true — N matches anything
findPrimerMatches
proc findPrimerMatches*(sequence, primer: string,
maxMismatches: int = 0): seq[int]
Returns the 0-based positions where primer binds in
sequence, allowing up to maxMismatches
non-IUPAC-matching positions.
let positions = findPrimerMatches("AAACGTGGGCGT", "RCGT", maxMismatches = 0)
for pos in positions:
echo "primer binds at position ", pos
Formatting
$ operator
Both FQRecord and FQRecordPtr have a
$ operator that produces FASTQ-formatted output (or
FASTA if the quality field is empty).
for record in readFQ("sample.fastq.gz"):
echo $record # four-line FASTQ block
fafmt
proc fafmt*(record: FQRecord, lineWidth: int = 60): string
Formats a record as FASTA with the sequence wrapped at
lineWidth characters per line.
echo fafmt(record, 80) # wrapped FASTA output
Complete Example
A practical pipeline: read a FASTQ file, quality-trim, mask remaining low-quality bases, filter short reads, and report composition.
import readfx, strutils
const minLen = 50
const minQual = 20
var passed = 0
var total = 0
for record in readFQ("sample.fastq.gz"):
inc total
var r = record
# 1. Trim 3' low-quality tail
qualityTrim(r, minQual)
# 2. Skip reads that became too short
if r.sequence.len < minLen:
continue
# 3. Mask any remaining Q<15 bases with 'N'
maskLowQuality(r, 15)
# 4. Report stats
let comp = composition(r)
let avg = avgQuality(r)
echo r.name,
"\tlen=", r.sequence.len,
"\tGC=", (comp.GC * 100).formatFloat(ffDecimal, 1), "%",
"\tmeanQ=", avg.formatFloat(ffDecimal, 1)
inc passed
echo passed, "/", total, " reads passed filters"