Skip to content

add encoding options #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 89 additions & 10 deletions src/helpers/codec.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { strict as assert } from "assert"
import * as _ from "lodash"
import { describe, it } from "mocha"
import { Tuple } from "../storage/types"
import { sortedValues } from "../test/fixtures"
import { decodeTuple, decodeValue, encodeTuple, encodeValue } from "./codec"
import { compare } from "./compare"
import { TupleToString, ValueToString } from "./compareTuple"
import { randomInt } from "./randomInt"

describe("codec", () => {
describe("encodeValue", () => {
Expand Down Expand Up @@ -89,9 +89,11 @@ describe("codec", () => {
const test = (aTuple: Tuple, bTuple: Tuple, result: number) => {
const a = encodeTuple(aTuple)
const b = encodeTuple(bTuple)
const actual = compare(a, b)
const expected = result
assert.deepStrictEqual(
compare(a, b),
result,
actual,
expected,
`compareTuple(${[TupleToString(aTuple), TupleToString(bTuple)].join(
", "
)}) === compare(${[JSON.stringify(a), JSON.stringify(b)].join(", ")})`
Expand Down Expand Up @@ -120,9 +122,9 @@ describe("codec", () => {

const sample = () => {
const x = sortedValues.length
const i = _.random(x - 1)
const j = _.random(x - 1)
const k = _.random(x - 1)
const i = randomInt(x - 1)
const j = randomInt(x - 1)
const k = randomInt(x - 1)
const tuple: Tuple = [sortedValues[i], sortedValues[j], sortedValues[k]]
const rank = i * x * x + j * x + k
return { tuple, rank }
Expand All @@ -136,8 +138,85 @@ describe("codec", () => {
}
})
})
})

function not(x: number) {
return x === 0 ? x : -1 * x
}
describe("codec options", () => {
it("Throws error if a value cannot be encoded", () => {
assert.throws(() => encodeValue("a\x00b", { disallow: ["\x00"] }))
})

it("Encodes and decodes with custom delimiter and escape characters", () => {
const options = { delimiter: ":", escape: "\\", disallow: [] }
const testCases = [
"simple",
"with:delimiter",
"with\\escape",
"with\\:both",
"multiple::delimiters",
"multiple\\\\escapes",
"mixed\\::\\\\:cases",
]

for (const value of testCases) {
const encoded = encodeValue(value, options)
const decoded = decodeValue(encoded, options)

assert.deepStrictEqual(
decoded,
value,
`Failed with custom options ${JSON.stringify(options, undefined, 2)}\n` +
[
ValueToString(value),
ValueToString(encoded),
ValueToString(decoded),
].join(" -> ")
)
}
})

it("Handles all encoding options configurations", () => {
const options = { delimiter: "\x01", escape: "\x02", disallow: ["\x00"] }

const testCases = [
"normal string",
"with spaces",
"with,punctuation!",
"with\nnewline",
"with\ttab",
]

for (const value of testCases) {
const encoded = encodeValue(value, options)
const decoded = decodeValue(encoded, options)

assert.deepStrictEqual(
decoded,
value,
`Failed with custom options ${JSON.stringify(options, undefined, 2)}\n` +
[
ValueToString(value),
ValueToString(encoded),
ValueToString(decoded),
].join(" -> ")
)
}
})

it("Maintains proper escaping with nested delimiters", () => {
const options = { delimiter: ":", escape: "\\", disallow: [] }
const complexValue = "a:b\\:c\\\\:d"
const encoded = encodeValue(complexValue, options)
const decoded = decodeValue(encoded, options)

assert.deepStrictEqual(
decoded,
complexValue,
`Failed with custom options ${JSON.stringify(options, undefined, 2)}\n` +
[
ValueToString(complexValue),
ValueToString(encoded),
ValueToString(decoded),
].join(" -> ")
)
})
})
})
87 changes: 64 additions & 23 deletions src/helpers/codec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,29 @@ import { Tuple, Value } from "../storage/types"
import { compare } from "./compare"
import { UnreachableError } from "./Unreachable"

const defaultOptions: Required<CodecOptions> = {
delimiter: "\x00",
escape: "\x01",
disallow: [],
}

export type CodecOptions = {
/**
* The delimiter between values in a tuple.
* @default "\x00"
*/
delimiter?: string
/**
* The escape byte.
* @default "\x01"
*/
escape?: string
/**
* Disallowed characters in strings. Throws an error if any are found while encoding.
*/
disallow?: string[]
}

// null < object < array < number < string < boolean
export const encodingByte = {
null: "b",
Expand All @@ -20,27 +43,32 @@ export type EncodingType = keyof typeof encodingByte

export const encodingRank = sortBy(
Object.entries(encodingByte),
([key, value]) => value
([_key, value]) => value
).map(([key]) => key as EncodingType)

export function encodeValue(value: Value): string {
export function encodeValue(value: Value, options?: CodecOptions): string {
if (value === null) {
return encodingByte.null
}
if (value === true || value === false) {
return encodingByte.boolean + value
}
if (typeof value === "string") {
for (const disallowed of options?.disallow ?? defaultOptions.disallow) {
if (value.includes(disallowed)) {
throw new Error(`Disallowed character found: ${disallowed}.`)
}
}
return encodingByte.string + value
}
if (typeof value === "number") {
return encodingByte.number + elen.encode(value)
}
if (Array.isArray(value)) {
return encodingByte.array + encodeTuple(value)
return encodingByte.array + encodeTuple(value, options)
}
if (typeof value === "object") {
return encodingByte.object + encodeObjectValue(value)
return encodingByte.object + encodeObjectValue(value, options)
}
throw new UnreachableError(value, "Unknown value type")
}
Expand Down Expand Up @@ -71,7 +99,7 @@ const decodeType = invert(encodingByte) as {
[key: string]: keyof typeof encodingByte
}

export function decodeValue(str: string): Value {
export function decodeValue(str: string, options?: CodecOptions): Value {
const encoding: EncodingType = decodeType[str[0]]
const rest = str.slice(1)

Expand All @@ -88,62 +116,75 @@ export function decodeValue(str: string): Value {
return elen.decode(rest)
}
if (encoding === "array") {
return decodeTuple(rest)
return decodeTuple(rest, options)
}
if (encoding === "object") {
return decodeObjectValue(rest)
return decodeObjectValue(rest, options)
}
throw new UnreachableError(encoding, "Invalid encoding byte")
}

export function encodeTuple(tuple: Tuple) {
export function encodeTuple(tuple: Tuple, options?: CodecOptions) {
const delimiter = options?.delimiter ?? defaultOptions.delimiter
const escape = options?.escape ?? defaultOptions.escape
const reEscapeByte = new RegExp(escape, "g")
const reDelimiterByte = new RegExp(delimiter, "g")
return tuple
.map((value, i) => {
const encoded = encodeValue(value)
const encoded = encodeValue(value, options)
return (
encoded
// B -> BB or \ -> \\
.replace(/\x01/g, "\x01\x01")
.replace(reEscapeByte, escape + escape)
// A -> BA or x -> \x
.replace(/\x00/g, "\x01\x00") + "\x00"
.replace(reDelimiterByte, escape + delimiter) + delimiter
)
})
.join("")
}

export function decodeTuple(str: string) {
export function decodeTuple(str: string, options?: CodecOptions) {
if (str === "") {
return []
}

const delimiter = options?.delimiter ?? defaultOptions.delimiter
const escape = options?.escape ?? defaultOptions.escape

// Capture all of the escaped BB and BA pairs and wait
// til we find an exposed A.
const re = /(\x01(\x01|\x00)|\x00)/g
const matcher = new RegExp(
`(${escape}(${escape}|${delimiter})|${delimiter})`,
"g"
)
const reEncodedEscape = new RegExp(escape + escape, "g")
const reEncodedDelimiter = new RegExp(escape + delimiter, "g")
const tuple: Tuple = []
let start = 0
while (true) {
const match = re.exec(str)
const match = matcher.exec(str)
if (match === null) {
return tuple
}
if (match[0][0] === "\x01") {
// If we match a \x01\x01 or \x01\x00 then keep going.
if (match[0][0] === escape) {
// If we match a escape+escape or escape+delimiter then keep going.
continue
}
const end = match.index
const escaped = str.slice(start, end)
const unescaped = escaped
// BB -> B
.replace(/\x01\x01/g, "\x01")
.replace(reEncodedEscape, escape)
// BA -> A
.replace(/\x01\x00/g, "\x00")
const decoded = decodeValue(unescaped)
.replace(reEncodedDelimiter, delimiter)
const decoded = decodeValue(unescaped, options)
tuple.push(decoded)
// Skip over the \x00.
start = end + 1
}
}

function encodeObjectValue(obj: object) {
function encodeObjectValue(obj: object, options?: CodecOptions) {
if (!isPlainObject(obj)) {
throw new Error("Cannot serialize this object.")
}
Expand All @@ -152,11 +193,11 @@ function encodeObjectValue(obj: object) {
// We allow undefined values in objects, but we want to strip them out before
// serializing.
.filter(([key, value]) => value !== undefined)
return encodeTuple(entries as Tuple)
return encodeTuple(entries as Tuple, options)
}

function decodeObjectValue(str: string) {
const entries = decodeTuple(str) as Array<[string, Value]>
function decodeObjectValue(str: string, options?: CodecOptions) {
const entries = decodeTuple(str, options) as Array<[string, Value]>
const obj = {}
for (const [key, value] of entries) {
obj[key] = value
Expand Down
3 changes: 3 additions & 0 deletions src/helpers/randomInt.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export function randomInt(ceil: number): number {
return Math.floor(Math.random() * ceil)
}
Loading