readfx

ReadFX Methods

This page is incomplete and a placeholder for the methods reference. It will be updated with more details and examples.

Parsing Methods

readFQ

iterator readFQ*(path: string): FQRecord

High-level iterator for FASTA/FASTQ parsing.

readFQPtr

iterator readFQPtr*(path: string): FQRecordPtr

High-performance iterator with pointer-based records.

readFastx

proc readFastx*[T](f: var Bufio[T], r: var FQRecord): bool

Low-level procedure for custom parsing workflows.

File Handling Utilities

xopen

proc xopen*[T](fn: string, mode: FileMode = fmRead, sz: int = 0x10000): Bufio[T]

Opens a file with buffered I/O.

open

proc open*[T](f: var Bufio[T], fn: string, mode: FileMode = fmRead, sz: int = 0x10000): int

Opens a file for buffered I/O.

close

proc close*[T](f: var Bufio[T]): int

Closes a buffered I/O handle.

Sequence Manipulation Functions

reverseComplement

proc reverseComplement*(sequence: string): string

Reverse complements a DNA sequence.

reverseComplementRecord (in-place)

proc reverseComplementRecord*(record: var FQRecord)

Reverse complements a sequence record in place.

reverseComplementRecord (copy)

proc reverseComplementRecord*(record: FQRecord): FQRecord

Creates a new record with reverse-complemented sequence.

gcContent

proc gcContent*(sequence: string): float

Calculates GC content of a DNA sequence.

Quality-Based Operations

trimQuality

proc trimQuality*(quality: string, minQual: int, offset: int = 33): string

Trims a quality string based on minimum quality threshold.

qualityTrim

proc qualityTrim*(record: var FQRecord, minQual: int, offset: int = 33)

Trims a record based on quality scores.

maskLowQuality

proc maskLowQuality*(record: var FQRecord, minQual: int, offset: int = 33, maskChar: char = 'N')

Masks sequence positions with low quality scores.

Record Manipulation

subSequence

proc subSequence*(record: FQRecord, start: int, length: int = -1): FQRecord

Extracts a subsequence from a record.

$ (string representation)

proc `$`*(rec: FQRecord): string
proc `$`*(rec: FQRecordPtr): string 

Converts records to string representation in FASTA/FASTQ format.

Other Utilities

Interval Operations

type Interval*[S,T] = tuple[st, en: S, data: T, max: S]
proc sort*[S,T](a: var seq[Interval[S,T]])
proc index*[S,T](a: var seq[Interval[S,T]]): int
iterator overlap*[S,T](a: seq[Interval[S,T]], st: S, en: S): Interval[S,T]

Interval tree implementation for genomic intervals.

Usage Examples

Basic Parsing

import readfx

# Using readFQ (string-based)
for record in readFQ("sample.fastq.gz"):
  echo record.name, " has length ", record.sequence.len
  
# Using readFQPtr (pointer-based)
for record in readFQPtr("sample.fastq.gz"):
  echo $record.name, " has length ", len($record.sequence)

# Using readFastx (low-level)
var record: FQRecord
var f = xopen[GzFile]("sample.fastq.gz")
defer: f.close()
while f.readFastx(record):
  echo record.name, " has length ", record.sequence.len

Record Manipulation

# GC content calculation
let gc = gcContent(record.sequence)

# Reverse complement
let rcSeq = reverseComplement(record.sequence)
let rcRecord = reverseComplementRecord(record)

# Quality trimming
qualityTrim(record, 20)  # Trim bases with quality < 20

# Masking low quality bases
maskLowQuality(record, 20)  # Mask bases with quality < 20 as 'N'

# Extract subsequence
let firstTenBases = subSequence(record, 0, 10)