feat: search parser overhaul

Overhaul of the search parser to include:
* Numeric range queries with support for both inclusive, exclusive, and
  half open ranges
* Better parsing of dates
* Virtual fields (search on "minutes" and have it translated to a
  "duration" query)
* More tests
* First class support for date fields
This commit is contained in:
Christian Benincasa
2025-11-18 16:06:53 -05:00
parent 4ac97a5f8c
commit fa6db00f27
9 changed files with 1779 additions and 111 deletions

3
pnpm-lock.yaml generated
View File

@@ -461,6 +461,9 @@ importers:
tsup:
specifier: ^8.0.2
version: 8.0.2(@microsoft/api-extractor@7.43.0(@types/node@22.10.7))(@swc/core@1.13.5)(postcss@8.5.6)(ts-node@10.9.2(@swc/core@1.13.5)(@types/node@22.10.7)(typescript@5.7.3))(typescript@5.7.3)
tsx:
specifier: ^4.20.5
version: 4.20.6
typescript:
specifier: 5.7.3
version: 5.7.3

View File

@@ -1,5 +1,6 @@
import { dayjsMod as mod } from '@tunarr/shared/util';
import dayjs from 'dayjs';
import customParseFormat from 'dayjs/plugin/customParseFormat.js';
import duration from 'dayjs/plugin/duration.js';
import timezone from 'dayjs/plugin/timezone.js';
import utc from 'dayjs/plugin/utc.js';
@@ -8,5 +9,6 @@ dayjs.extend(duration);
dayjs.extend(timezone);
dayjs.extend(utc);
dayjs.extend(mod);
dayjs.extend(customParseFormat);
export default dayjs;

View File

@@ -13,6 +13,7 @@
"build-dev": "tsup --dts --watch",
"clean": "rimraf ./dist/",
"dev": "tsc --declaration --watch",
"generate-search-diagram": "tsx scripts/generate_search_diagram.ts",
"test": "vitest --run"
},
"exports": {
@@ -50,6 +51,7 @@
"rimraf": "^5.0.5",
"ts-essentials": "^9.4.2",
"tsup": "^8.0.2",
"tsx": "^4.20.5",
"typescript": "catalog:",
"vitest": "^3.2.4"
},

View File

@@ -0,0 +1,13 @@
import { createSyntaxDiagramsCode } from 'chevrotain';
import fs from 'node:fs';
import path, { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { SearchParser } from '../src/util/searchUtil.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const parser = new SearchParser();
const serializedGrammar = parser.getSerializedGastProductions();
const htmlText = createSyntaxDiagramsCode(serializedGrammar, {});
// Write the HTML file to disk
const outPath = path.resolve(__dirname, './');
fs.writeFileSync(outPath + '/generated_diagrams.html', htmlText);

View File

@@ -0,0 +1,940 @@
<!-- This is a generated file -->
<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
background-color: hsl(30, 20%, 95%)
}
</style>
<link rel='stylesheet' href='https://unpkg.com/chevrotain@11.0.3/diagrams/diagrams.css'>
<script src='https://unpkg.com/chevrotain@11.0.3/diagrams/vendor/railroad-diagrams.js'></script>
<script src='https://unpkg.com/chevrotain@11.0.3/diagrams/src/diagrams_builder.js'></script>
<script src='https://unpkg.com/chevrotain@11.0.3/diagrams/src/diagrams_behavior.js'></script>
<script src='https://unpkg.com/chevrotain@11.0.3/diagrams/src/main.js'></script>
<div id="diagrams" align="center"></div>
<script>
window.serializedGrammar = [
{
"type": "Rule",
"name": "searchValue",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Quote",
"label": "Quote",
"idx": 0,
"terminalLabel": "str_open",
"pattern": "\""
},
{
"type": "RepetitionMandatory",
"idx": 0,
"definition": [
{
"type": "Repetition",
"idx": 0,
"definition": [
{
"type": "Alternation",
"idx": 2,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Identifier",
"label": "Identifier",
"idx": 2,
"terminalLabel": "query",
"pattern": "[a-zA-Z0-9\\-]+"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Integer",
"label": "Integer",
"idx": 2,
"terminalLabel": "query",
"pattern": "\\d+"
}
]
}
]
}
]
}
]
},
{
"type": "Terminal",
"name": "Quote",
"label": "Quote",
"idx": 3,
"terminalLabel": "str_close",
"pattern": "\""
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Repetition",
"idx": 2,
"definition": [
{
"type": "Alternation",
"idx": 3,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Identifier",
"label": "Identifier",
"idx": 4,
"terminalLabel": "query",
"pattern": "[a-zA-Z0-9\\-]+"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Integer",
"label": "Integer",
"idx": 4,
"terminalLabel": "query",
"pattern": "\\d+"
}
]
}
]
}
]
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "parenGroup",
"orgText": "",
"definition": [
{
"type": "Terminal",
"name": "OpenParenGroup",
"label": "OpenParenGroup",
"idx": 0,
"pattern": "\\("
},
{
"type": "RepetitionMandatory",
"idx": 0,
"definition": [
{
"type": "NonTerminal",
"name": "searchClause",
"idx": 0
}
]
},
{
"type": "Terminal",
"name": "CloseParenGroup",
"label": "CloseParenGroup",
"idx": 0,
"pattern": "\\)"
}
]
},
{
"type": "Rule",
"name": "string_operator",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Alternation",
"idx": 2,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "EqOperator",
"label": "EqOperator",
"idx": 0,
"pattern": ":|="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "NeqOperator",
"label": "NeqOperator",
"idx": 0,
"pattern": "!="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTEOperator",
"label": "LTEOperator",
"idx": 0,
"pattern": "<="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTOperator",
"label": "LTOperator",
"idx": 0,
"pattern": "<"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "ContainsOperator",
"label": "ContainsOperator",
"idx": 0,
"pattern": "~"
}
]
}
]
},
{
"type": "NonTerminal",
"name": "searchValue",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "InOperator",
"label": "InOperator",
"idx": 0,
"pattern": "in"
},
{
"type": "Terminal",
"name": "OpenArray",
"label": "OpenArray",
"idx": 2,
"pattern": "\\["
},
{
"type": "RepetitionMandatoryWithSeparator",
"idx": 0,
"separator": {
"type": "Terminal",
"name": "Comma",
"label": "Comma",
"idx": 1,
"pattern": ","
},
"definition": [
{
"type": "NonTerminal",
"name": "searchValue",
"idx": 2
}
]
},
{
"type": "Terminal",
"name": "CloseArray",
"label": "CloseArray",
"idx": 2,
"pattern": "]"
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "numeric_operator",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Alternation",
"idx": 2,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "EqOperator",
"label": "EqOperator",
"idx": 0,
"pattern": ":|="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "NeqOperator",
"label": "NeqOperator",
"idx": 0,
"pattern": "!="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTEOperator",
"label": "LTEOperator",
"idx": 0,
"pattern": "<="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "GTEOperator",
"label": "GTEOperator",
"idx": 0,
"pattern": ">="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTOperator",
"label": "LTOperator",
"idx": 0,
"pattern": "<"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "GTOperator",
"label": "GTOperator",
"idx": 0,
"pattern": ">"
}
]
}
]
},
{
"type": "NonTerminal",
"name": "numeric_value",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "BetweenOperator",
"label": "BetweenOperator",
"idx": 0,
"pattern": "between"
},
{
"type": "Alternation",
"idx": 3,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "OpenParenGroup",
"label": "OpenParenGroup",
"idx": 2,
"pattern": "\\("
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "OpenArray",
"label": "OpenArray",
"idx": 2,
"pattern": "\\["
}
]
}
]
},
{
"type": "NonTerminal",
"name": "numeric_value",
"idx": 2
},
{
"type": "Option",
"idx": 0,
"definition": [
{
"type": "Terminal",
"name": "Comma",
"label": "Comma",
"idx": 2,
"pattern": ","
}
]
},
{
"type": "NonTerminal",
"name": "numeric_value",
"idx": 3
},
{
"type": "Alternation",
"idx": 4,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CloseParenGroup",
"label": "CloseParenGroup",
"idx": 3,
"pattern": "\\)"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CloseArray",
"label": "CloseArray",
"idx": 3,
"pattern": "]"
}
]
}
]
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "numeric_value",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "FloatingPoint",
"label": "FloatingPoint",
"idx": 0,
"pattern": "\\d+\\.\\d+"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "Integer",
"label": "Integer",
"idx": 0,
"pattern": "\\d+"
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "date_operator",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Alternation",
"idx": 2,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "EqOperator",
"label": "EqOperator",
"idx": 0,
"pattern": ":|="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTEOperator",
"label": "LTEOperator",
"idx": 0,
"pattern": "<="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "LTOperator",
"label": "LTOperator",
"idx": 0,
"pattern": "<"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "GTEOperator",
"label": "GTEOperator",
"idx": 0,
"pattern": ">="
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "GTOperator",
"label": "GTOperator",
"idx": 0,
"pattern": ">"
}
]
}
]
},
{
"type": "NonTerminal",
"name": "searchValue",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "BetweenOperator",
"label": "BetweenOperator",
"idx": 0,
"pattern": "between"
},
{
"type": "Alternation",
"idx": 3,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "OpenParenGroup",
"label": "OpenParenGroup",
"idx": 2,
"pattern": "\\("
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "OpenArray",
"label": "OpenArray",
"idx": 2,
"pattern": "\\["
}
]
}
]
},
{
"type": "NonTerminal",
"name": "searchValue",
"idx": 2
},
{
"type": "Option",
"idx": 0,
"definition": [
{
"type": "Terminal",
"name": "Comma",
"label": "Comma",
"idx": 2,
"pattern": ","
}
]
},
{
"type": "NonTerminal",
"name": "searchValue",
"idx": 3
},
{
"type": "Alternation",
"idx": 4,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CloseParenGroup",
"label": "CloseParenGroup",
"idx": 3,
"pattern": "\\)"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CloseArray",
"label": "CloseArray",
"idx": 3,
"pattern": "]"
}
]
}
]
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "singleStringSearch",
"orgText": "",
"definition": [
{
"type": "Terminal",
"name": "StringField",
"label": "StringField",
"idx": 0,
"terminalLabel": "field",
"pattern": "actor|genre|director|writer|library_id|title|video_codec|video_dynamic_range|audio_codec|tags|rating|type"
},
{
"type": "NonTerminal",
"name": "string_operator",
"idx": 0,
"label": "op"
}
]
},
{
"type": "Rule",
"name": "singleNumericSearch",
"orgText": "",
"definition": [
{
"type": "Terminal",
"name": "NumericField",
"label": "NumericField",
"idx": 0,
"pattern": "duration|minutes|seconds|video_bit_depth|video_height|video_width|audio_channels|release_year"
},
{
"type": "NonTerminal",
"name": "numeric_operator",
"idx": 0
}
]
},
{
"type": "Rule",
"name": "singleDateSearch",
"orgText": "",
"definition": [
{
"type": "Terminal",
"name": "DateField",
"label": "DateField",
"idx": 0,
"terminalLabel": "field",
"pattern": "release_date"
},
{
"type": "NonTerminal",
"name": "date_operator",
"idx": 0,
"label": "op"
}
]
},
{
"type": "Rule",
"name": "singleSearch",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "NonTerminal",
"name": "singleStringSearch",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "NonTerminal",
"name": "singleNumericSearch",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "NonTerminal",
"name": "singleDateSearch",
"idx": 0
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "searchClause",
"orgText": "",
"definition": [
{
"type": "Alternation",
"idx": 0,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "NonTerminal",
"name": "parenGroup",
"idx": 0
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "NonTerminal",
"name": "singleSearch",
"idx": 0
},
{
"type": "Option",
"idx": 0,
"definition": [
{
"type": "Alternation",
"idx": 2,
"definition": [
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CombineOr",
"label": "CombineOr",
"idx": 0,
"pattern": "OR"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "CombineAnd",
"label": "CombineAnd",
"idx": 0,
"pattern": "AND"
}
]
},
{
"type": "Alternative",
"definition": [
{
"type": "Terminal",
"name": "WhiteSpace",
"label": "WhiteSpace",
"idx": 0,
"pattern": "\\s+"
}
]
}
]
},
{
"type": "NonTerminal",
"name": "searchClause",
"idx": 0
}
]
}
]
}
]
}
]
},
{
"type": "Rule",
"name": "searchExpression",
"orgText": "",
"definition": [
{
"type": "RepetitionMandatory",
"idx": 0,
"definition": [
{
"type": "NonTerminal",
"name": "searchClause",
"idx": 0
}
]
}
]
}
];
</script>
<script>
var diagramsDiv = document.getElementById("diagrams");
main.drawDiagramsFromSerializedGrammar(serializedGrammar, diagramsDiv);
</script>

View File

@@ -1,41 +1,335 @@
import { createSyntaxDiagramsCode } from 'chevrotain';
import fs from 'node:fs';
import { inspect } from 'node:util';
import path, { dirname } from 'path';
import { fileURLToPath } from 'url';
import { SearchParser, tokenizeSearchQuery } from './searchUtil.js';
import { SearchFilter } from '@tunarr/types/api';
import dayjs from 'dayjs';
import customParseFormat from 'dayjs/plugin/customParseFormat.js';
import {
parsedSearchToRequest,
SearchClause,
SearchParser,
tokenizeSearchQuery,
} from './searchUtil.js';
dayjs.extend(customParseFormat);
function parseAndCheckExpression(input: string) {
const lexerResult = tokenizeSearchQuery(input);
expect(lexerResult.errors, JSON.stringify(lexerResult.errors)).toHaveLength(
0,
);
const parser = new SearchParser();
parser.input = lexerResult.tokens;
const query = parser.searchExpression();
expect(parser.errors, JSON.stringify(parser.errors, null, 2)).toHaveLength(0);
return query;
}
describe('search parser', () => {
test('simple parse', () => {
const __dirname = dirname(fileURLToPath(import.meta.url));
const input = 'genre IN [comedy, horror] OR title ~ "XYZ"';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'binary_clause',
lhs: {
type: 'single_query',
field: 'genre',
op: 'in',
value: ['comedy', 'horror'],
},
op: 'or',
rhs: {
type: 'single_query',
field: 'title',
op: 'contains',
value: 'XYZ',
},
} satisfies SearchClause);
});
test('parse string fields', () => {
const input =
'library_id = ddd327c3-aea2-4b27-a2c0-a8ce190d25d0 AND title <= A';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'binary_clause',
lhs: {
type: 'single_query',
field: 'library_id',
op: '=',
value: 'ddd327c3-aea2-4b27-a2c0-a8ce190d25d0',
},
op: 'and',
rhs: {
type: 'single_query',
field: 'title',
op: '<=',
value: 'A',
},
} satisfies SearchClause);
});
test('parse date fields', () => {
const input = 'release_date = 2025-01-01';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'single_date_query',
field: 'release_date',
op: '=',
value: '2025-01-01',
} satisfies SearchClause);
});
test('parse numeric fields', () => {
const input = 'duration >= 10';
const lexerResult = tokenizeSearchQuery(input);
const parser = new SearchParser();
parser.input = lexerResult.tokens;
const serializedGrammar = parser.getSerializedGastProductions();
// console.log(inspect(lexerResult, false, null));
const result = parser.searchExpression();
console.log(inspect(result, false, null));
// const visitor = new SearchExpressionVisitor();
// visitor.visit(result);
const htmlText = createSyntaxDiagramsCode(serializedGrammar, {});
// Write the HTML file to disk
const outPath = path.resolve(__dirname, './');
fs.writeFileSync(outPath + '/generated_diagrams.html', htmlText);
console.log(lexerResult.tokens, parser.errors);
});
test('can parse uuids', () => {
const input =
'libraryId = ddd327c3-aea2-4b27-a2c0-a8ce190d25d0 AND title <= A';
'library_id = ddd327c3-aea2-4b27-a2c0-a8ce190d25d0 AND title <= A';
const lexerResult = tokenizeSearchQuery(input);
const parser = new SearchParser();
parser.input = lexerResult.tokens;
const query = parser.searchExpression();
console.log(query);
});
test('supports numeric between inclusive', () => {
const input = 'duration between [100, 200]';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'single_numeric_query',
field: 'duration',
includeHigher: true,
includeLow: true,
op: 'between',
value: [100, 200],
} satisfies SearchClause);
});
test('supports numeric between half open', () => {
const input = 'duration between [100, 200)';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'single_numeric_query',
field: 'duration',
includeHigher: false,
includeLow: true,
op: 'between',
value: [100, 200],
} satisfies SearchClause);
const input2 = 'duration between (100, 200]';
const query2 = parseAndCheckExpression(input2);
expect(query2).toMatchObject({
type: 'single_numeric_query',
field: 'duration',
includeHigher: true,
includeLow: false,
op: 'between',
value: [100, 200],
} satisfies SearchClause);
});
test('compound queries', () => {
const input = 'duration between [100, 200] AND title <= A';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'binary_clause',
op: 'and',
lhs: {
type: 'single_numeric_query',
field: 'duration',
includeHigher: true,
includeLow: true,
op: 'between',
value: [100, 200],
},
rhs: {
type: 'single_query',
field: 'title',
op: '<=',
value: 'A',
},
} satisfies SearchClause);
});
test('compound query with date', () => {
const input = 'release_date < 2020-01-01 AND title <= A';
const query = parseAndCheckExpression(input);
expect(query).toMatchObject({
type: 'binary_clause',
op: 'and',
lhs: {
type: 'single_date_query',
field: 'release_date',
op: '<',
value: '2020-01-01',
},
rhs: {
type: 'single_query',
field: 'title',
op: '<=',
value: 'A',
},
} satisfies SearchClause);
});
});
describe('parsedSearchToRequest', () => {
test('handles inclusive numeric between', () => {
const clause = {
type: 'single_numeric_query',
field: 'duration',
includeHigher: true,
includeLow: true,
op: 'between',
value: [100, 200],
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
expect(request).toEqual({
type: 'value',
fieldSpec: {
key: 'duration',
name: '',
op: 'to',
type: 'numeric',
value: [100, 200],
},
} satisfies SearchFilter);
});
test('handles exclusive numeric between', () => {
const clause = {
type: 'single_numeric_query',
field: 'duration',
includeHigher: false,
includeLow: false,
op: 'between',
value: [100, 200],
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
const lhs = {
type: 'value',
fieldSpec: {
key: 'duration',
name: '',
op: '>',
type: 'numeric',
value: 100,
},
} satisfies SearchFilter;
const rhs = {
type: 'value',
fieldSpec: {
key: 'duration',
name: '',
op: '<',
type: 'numeric',
value: 200,
},
} satisfies SearchFilter;
expect(request).toEqual({
type: 'op',
op: 'and',
children: [lhs, rhs],
} satisfies SearchFilter);
});
test('handles date parsing with YYYY-MM-DD', () => {
const clause = {
type: 'single_date_query',
field: 'release_date',
op: '=',
value: '2023-01-01',
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
expect(request).toMatchObject({
type: 'value',
fieldSpec: {
key: 'originalReleaseDate',
op: '=',
type: 'date',
value: +dayjs('2023-01-01', 'YYYY-MM-DD'),
},
});
});
test('handles date parsing with YYYYMMDD', () => {
const clause = {
type: 'single_date_query',
field: 'release_date',
op: '=',
value: '20230101',
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
expect(request).toMatchObject({
type: 'value',
fieldSpec: {
key: 'originalReleaseDate',
op: '=',
type: 'date',
value: +dayjs('20230101', 'YYYYMMDD'),
},
});
});
test('handles date between query', () => {
const clause = {
type: 'single_date_query',
field: 'release_date',
op: 'between',
value: ['2023-01-01', '2023-12-31'],
includeLow: true,
includeHigher: true,
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
expect(request).toMatchObject({
type: 'value',
fieldSpec: {
key: 'originalReleaseDate',
op: 'to',
type: 'date',
value: [
+dayjs('2023-01-01', 'YYYY-MM-DD'),
+dayjs('2023-12-31', 'YYYY-MM-DD'),
],
},
});
});
test('converts virtual field key and value', () => {
const clause = {
type: 'single_numeric_query',
field: 'minutes',
op: '<=',
value: 30,
} satisfies SearchClause;
const request = parsedSearchToRequest(clause);
expect(request).toMatchObject({
type: 'value',
fieldSpec: {
key: 'duration',
name: '',
op: '<=',
type: 'numeric',
value: 30 * 60 * 1000,
},
} satisfies SearchFilter);
});
});

View File

@@ -4,15 +4,20 @@ import type {
TupleToUnion,
} from '@tunarr/types';
import type {
NumericOperators,
SearchFilter,
SearchFilterOperatorNode,
SearchFilterValueNode,
StringOperators,
} from '@tunarr/types/api';
import { FreeSearchQueryKeyMappings } from '@tunarr/types/api';
import { createToken, EmbeddedActionsParser, Lexer } from 'chevrotain';
import { isArray, isNumber } from 'lodash-es';
import type { NonEmptyArray } from 'ts-essentials';
import dayjs from 'dayjs';
import customParseFormat from 'dayjs/plugin/customParseFormat.js';
import { identity, isArray, isNumber } from 'lodash-es';
import type { NonEmptyArray, StrictExclude, StrictOmit } from 'ts-essentials';
import { match } from 'ts-pattern';
dayjs.extend(customParseFormat);
const Integer = createToken({ name: 'Integer', pattern: /\d+/ });
@@ -26,6 +31,52 @@ const Identifier = createToken({
pattern: /[a-zA-Z0-9-]+/,
});
const StringFields = [
'actor',
'genre',
'director',
'writer',
'library_id',
'title',
'video_codec',
'video_dynamic_range',
'audio_codec',
'tags',
'rating',
'type',
] as const;
const StringField = createToken({
name: 'StringField',
pattern: new RegExp(StringFields.join('|')),
longer_alt: Identifier,
});
const DateFields = ['release_date'] as const;
const DateField = createToken({
name: 'DateField',
pattern: new RegExp(DateFields.join('|')),
longer_alt: Identifier,
});
const NumericFields = [
'duration',
'minutes',
'seconds',
'video_bit_depth',
'video_height',
'video_width',
'audio_channels',
'release_year',
] as const;
const NumericField = createToken({
name: 'NumericField',
pattern: new RegExp(NumericFields.join('|')),
longer_alt: Identifier,
});
const WhiteSpace = createToken({
name: 'WhiteSpace',
pattern: /\s+/,
@@ -97,6 +148,11 @@ const GreaterThanOperator = createToken({ name: 'GTOperator', pattern: />/ });
const InOperator = createToken({ name: 'InOperator', pattern: /in/i });
const BetweenOperator = createToken({
name: 'BetweenOperator',
pattern: /between/i,
});
const allTokens = [
WhiteSpace,
Comma,
@@ -114,11 +170,16 @@ const allTokens = [
LessThanOperator,
GreaterThanOperator,
InOperator,
BetweenOperator,
ContainsOperator,
// Order matters here. float is more specific
// than int.
FloatingPoint,
Integer,
// Fields
StringField,
DateField,
NumericField,
// Catch all
Identifier,
];
@@ -127,7 +188,10 @@ const SearchExpressionLexer = new Lexer(allTokens);
const StringOps = ['=', '!=', '<', '<=', 'in', 'contains'] as const;
type StringOps = TupleToUnion<typeof StringOps>;
type Ops = '=' | '!=' | '<' | '<=' | '>' | '>=';
const NumericOps = ['=', '!=', '<', '<=', '>', '>=', 'between'] as const;
type NumericOps = TupleToUnion<typeof NumericOps>;
const DateOps = ['=', '<', '<=', '>', '>=', 'between'] as const;
type DateOps = TupleToUnion<typeof DateOps>;
const StringOpToApiType = {
'<': 'starts with',
@@ -138,6 +202,17 @@ const StringOpToApiType = {
in: 'in',
} satisfies Record<StringOps, StringOperators>;
const NumericOpToApiType = {
'!=': '!=',
'<': '<',
'<=': '<=',
'=': '=',
'>': '>',
'>=': '>=',
// This depends on inclusivity
between: 'to',
} satisfies Record<NumericOps, NumericOperators>;
export type SearchGroup = {
type: 'search_group';
clauses: SearchClause[];
@@ -150,15 +225,42 @@ export type SingleStringSearchQuery = {
value: string | NonEmptyArray<string>;
};
export type SingleNumericQuery = {
type: 'single_numeric_query';
field: string;
// TODO: Use real types
op: Ops;
value: number;
};
export type SingleNumericQuery =
| {
type: 'single_numeric_query';
field: string;
op: StrictExclude<NumericOps, 'between'>;
value: number;
}
| {
type: 'single_numeric_query';
op: 'between';
field: string;
value: [number, number];
includeLow: boolean;
includeHigher: boolean;
};
export type SingleSearch = SingleNumericQuery | SingleStringSearchQuery;
export type SingleDateSearchQuery =
| {
type: 'single_date_query';
field: string;
op: StrictExclude<DateOps, 'between'>;
value: string;
}
| {
type: 'single_date_query';
op: 'between';
field: string;
value: [string, string];
includeLow: boolean;
includeHigher: boolean;
};
export type SingleSearch =
| SingleNumericQuery
| SingleStringSearchQuery
| SingleDateSearchQuery;
export type SearchClause =
| SearchGroup
@@ -173,6 +275,48 @@ export type BinarySearchClause = {
rhs: SearchClause;
};
export const virtualFieldToIndexField: Record<string, string> = {
genre: 'genres.name',
actor: 'actors.name',
writer: 'writer.name',
director: 'director.name',
studio: 'studio.name',
year: 'originalReleaseYear',
release_date: 'originalReleaseDate',
release_year: 'originalReleaseYear',
// these get mapped to the duration field and their
// values get converted to the appropriate units
minutes: 'duration',
seconds: 'duration',
// This isn't really true, since this could map to multiple fields
// TODO: Make grouping-tyhpe specific subdocs
show_genre: 'grandparent.genre',
show_title: 'grandparent.title',
show_tag: 'grandparent.tag',
grandparent_genre: 'grandparent.genre',
};
function normalizeReleaseDate(value: string) {
for (const format of ['YYYY-MM-DD', 'YYYYMMDD']) {
const d = dayjs(value, format, true);
if (d.isValid()) {
return +d;
}
}
throw new Error(`Could not parse inputted date string: ${value}`);
}
type Converter<In, Out = In> = (input: In) => Out;
const numericFieldNormalizersByField = {
minutes: (mins: number) => mins * 60 * 1000,
seconds: (secs: number) => secs * 1000,
} satisfies Record<string, Converter<number>>;
const dateFieldNormalizersByField = {
release_date: normalizeReleaseDate,
} satisfies Record<string, Converter<string, number>>;
export class SearchParser extends EmbeddedActionsParser {
constructor() {
super(allTokens, { recoveryEnabled: false });
@@ -181,28 +325,61 @@ export class SearchParser extends EmbeddedActionsParser {
private searchValue = this.RULE('searchValue', () => {
const valueParts: string[] = [];
this.OR([
return this.OR([
{
// Attempt to consume a quoted string.
ALT: () => {
this.CONSUME(Quote, { LABEL: 'str_open' });
this.AT_LEAST_ONE({
DEF: () => {
valueParts.push(
this.CONSUME(Identifier, { LABEL: 'query' }).image,
);
this.MANY(() => {
this.OR2([
{
ALT: () =>
valueParts.push(
this.CONSUME2(Identifier, { LABEL: 'query' }).image,
),
},
{
ALT: () =>
valueParts.push(
this.CONSUME2(Integer, { LABEL: 'query' }).image,
),
},
]);
});
},
});
this.CONSUME2(Quote, { LABEL: 'str_close' });
this.CONSUME3(Quote, { LABEL: 'str_close' });
return valueParts.join(' ');
},
},
{
// Attempt to consume an unquoted string. Consumes both "integers" and "identifiers"
// and joins them with empty string to complete a singular string. This handles things
// like dates, e.g. 2025-03-02
ALT: () => {
valueParts.push(this.CONSUME2(Identifier, { LABEL: 'query' }).image);
this.MANY2(() => {
this.OR3([
{
ALT: () =>
valueParts.push(
this.CONSUME4(Identifier, { LABEL: 'query' }).image,
),
},
{
ALT: () =>
valueParts.push(
this.CONSUME4(Integer, { LABEL: 'query' }).image,
),
},
]);
});
return valueParts.join('');
},
},
]);
return valueParts.join(' ');
});
private parenGroup = this.RULE('parenGroup', () => {
@@ -219,7 +396,7 @@ export class SearchParser extends EmbeddedActionsParser {
} satisfies SearchGroup;
});
private operator = this.RULE('operator', () => {
private stringOperator = this.RULE('string_operator', () => {
return this.OR<{ op: StringOps; value: string | NonEmptyArray<string> }>([
{
ALT: () => {
@@ -228,7 +405,6 @@ export class SearchParser extends EmbeddedActionsParser {
ALT: () => {
const tok = this.CONSUME(EqOperator);
return tok.image === ':' ? '=' : (tok.image as StringOps);
// const value = this
},
},
{
@@ -238,9 +414,6 @@ export class SearchParser extends EmbeddedActionsParser {
ALT: () =>
this.CONSUME(LessThanOrEqualOperator).image as StringOps,
},
// {
// ALT: () => this.CONSUME(GreaterThanOrEqualOperator).image as StringOps,
// },
{
ALT: () => this.CONSUME(LessThanOperator).image as StringOps,
},
@@ -279,27 +452,82 @@ export class SearchParser extends EmbeddedActionsParser {
});
private numericOperator = this.RULE('numeric_operator', () => {
return this.OR<Ops>([
return this.OR<StrictOmit<SingleNumericQuery, 'field'>>([
{
ALT: () => {
const tok = this.CONSUME(EqOperator);
return tok.image === ':' ? '=' : (tok.image as Ops);
const op = this.OR2<StrictExclude<NumericOps, 'between'>>([
{
ALT: () => {
const tok = this.CONSUME(EqOperator);
return tok.image === ':' ? '=' : (tok.image as '=');
},
},
{
ALT: () => this.CONSUME(NeqOperator).image as '!=',
},
{
ALT: () => this.CONSUME(LessThanOrEqualOperator).image as '<=',
},
{
ALT: () => this.CONSUME(GreaterThanOrEqualOperator).image as '>=',
},
{
ALT: () => this.CONSUME(LessThanOperator).image as '<',
},
{
ALT: () => this.CONSUME(GreaterThanOperator).image as '>',
},
]);
const value = this.SUBRULE(this.numericValue);
return {
op,
value,
type: 'single_numeric_query',
} satisfies StrictOmit<SingleNumericQuery, 'field'>;
},
},
{
ALT: () => this.CONSUME(NeqOperator).image as Ops,
},
{
ALT: () => this.CONSUME(LessThanOrEqualOperator).image as Ops,
},
{
ALT: () => this.CONSUME(GreaterThanOrEqualOperator).image as Ops,
},
{
ALT: () => this.CONSUME(LessThanOperator).image as Ops,
},
{
ALT: () => this.CONSUME(GreaterThanOperator).image as Ops,
ALT: () => {
const op = this.CONSUME(
BetweenOperator,
).image.toLowerCase() as 'between';
let inclLow = false,
inclHi = false;
this.OR3([
{
ALT: () => this.CONSUME2(OpenParenGroup),
},
{
ALT: () => {
this.CONSUME2(OpenArray);
inclLow = true;
},
},
]);
const values: number[] = [];
values.push(this.SUBRULE2(this.numericValue));
this.OPTION(() => this.CONSUME2(Comma));
values.push(this.SUBRULE3(this.numericValue));
this.OR4([
{
ALT: () => this.CONSUME3(CloseParenGroup),
},
{
ALT: () => {
this.CONSUME3(CloseArray);
inclHi = true;
},
},
]);
return {
op,
value: values as [number, number],
includeHigher: inclHi,
includeLow: inclLow,
type: 'single_numeric_query',
} satisfies StrictOmit<SingleNumericQuery, 'field'>;
},
},
]);
});
@@ -315,9 +543,87 @@ export class SearchParser extends EmbeddedActionsParser {
]),
);
private dateOperatorAndValue = this.RULE('date_operator', () => {
return this.OR<StrictOmit<SingleDateSearchQuery, 'field'>>([
{
ALT: () => {
const op = this.OR2<StrictExclude<DateOps, 'between'>>([
{
ALT: () => {
const tok = this.CONSUME(EqOperator);
return tok.image === ':' ? '=' : (tok.image as '=');
},
},
{
ALT: () => this.CONSUME(LessThanOrEqualOperator).image as '<=',
},
{
ALT: () => this.CONSUME(LessThanOperator).image as '<',
},
{
ALT: () => this.CONSUME(GreaterThanOrEqualOperator).image as '>=',
},
{
ALT: () => this.CONSUME(GreaterThanOperator).image as '>',
},
]);
const value = this.SUBRULE(this.searchValue);
return {
type: 'single_date_query',
op,
value,
} satisfies StrictOmit<SingleDateSearchQuery, 'field'>;
},
},
{
ALT: () => {
const op = this.CONSUME(
BetweenOperator,
).image.toLowerCase() as 'between';
let inclLow = false,
inclHi = false;
this.OR3([
{
ALT: () => this.CONSUME2(OpenParenGroup),
},
{
ALT: () => {
this.CONSUME2(OpenArray);
inclLow = true;
},
},
]);
const values: string[] = [];
values.push(this.SUBRULE2(this.searchValue));
this.OPTION(() => this.CONSUME2(Comma));
values.push(this.SUBRULE3(this.searchValue));
this.OR4([
{
ALT: () => this.CONSUME3(CloseParenGroup),
},
{
ALT: () => {
this.CONSUME3(CloseArray);
inclHi = true;
},
},
]);
return {
op,
value: values as [string, string],
includeHigher: inclHi,
includeLow: inclLow,
type: 'single_date_query',
} satisfies StrictOmit<SingleDateSearchQuery, 'field'>;
},
},
]);
});
private singleStringSearch = this.RULE('singleStringSearch', () => {
const field = this.CONSUME(Identifier, { LABEL: 'field' }).image;
const { op, value } = this.SUBRULE(this.operator, { LABEL: 'op' });
const field = this.CONSUME(StringField, { LABEL: 'field' }).image;
const { op, value } = this.SUBRULE(this.stringOperator, { LABEL: 'op' });
return {
type: 'single_query' as const,
field,
@@ -327,14 +633,50 @@ export class SearchParser extends EmbeddedActionsParser {
});
private singleNumericSearch = this.RULE('singleNumericSearch', () => {
const field = this.CONSUME(NumericField).image;
const opRet = this.SUBRULE(this.numericOperator);
if (opRet.op === 'between') {
return {
type: 'single_numeric_query' as const,
field,
op: 'between',
value: opRet.value,
includeHigher: opRet.includeHigher,
includeLow: opRet.includeLow,
} satisfies SingleNumericQuery;
}
return {
type: 'single_numeric_query' as const,
field: this.CONSUME(Identifier).image,
op: this.SUBRULE(this.numericOperator),
value: this.SUBRULE(this.numericValue),
field,
op: opRet.op,
value: opRet.value,
} satisfies SingleNumericQuery;
});
private singleDateSearch = this.RULE('singleDateSearch', () => {
const field = this.CONSUME(DateField, { LABEL: 'field' }).image;
const opRet = this.SUBRULE(this.dateOperatorAndValue, { LABEL: 'op' });
if (opRet.op === 'between') {
return {
type: 'single_date_query',
field,
op: 'between',
value: opRet.value,
includeHigher: opRet.includeHigher,
includeLow: opRet.includeLow,
} satisfies SingleDateSearchQuery;
}
return {
type: 'single_date_query',
field,
op: opRet.op,
value: opRet.value,
} satisfies SingleDateSearchQuery;
});
private singleSearch = this.RULE('singleSearch', () => {
return this.OR<SingleSearch>([
{
@@ -343,6 +685,9 @@ export class SearchParser extends EmbeddedActionsParser {
{
ALT: () => this.SUBRULE(this.singleNumericSearch),
},
{
ALT: () => this.SUBRULE(this.singleDateSearch),
},
]);
});
@@ -427,31 +772,120 @@ export function parsedSearchToRequest(input: SearchClause): SearchFilter {
} satisfies SearchFilterOperatorNode;
}
case 'single_numeric_query': {
return {
type: 'value',
fieldSpec: {
key: input.field,
name: '',
// TODO: Fix
op: input.op,
// TODO: derive better type based on field
type: 'numeric' as const,
value: input.value,
},
} satisfies SearchFilterValueNode;
}
case 'single_query': {
const key: string =
FreeSearchQueryKeyMappings[input.field] ?? input.field;
const key: string = virtualFieldToIndexField[input.field] ?? input.field;
const valueConverter: Converter<number> =
input.field in numericFieldNormalizersByField
? numericFieldNormalizersByField[
input.field as keyof typeof numericFieldNormalizersByField
]
: identity;
// Anything other than full inclusive needs to be translated
// to a binary query.
if (input.op === 'between') {
return match([input.includeLow, input.includeHigher])
.returnType<SearchFilter>()
.with(
[true, true],
() =>
({
type: 'value',
fieldSpec: {
key,
name: '',
op: NumericOpToApiType[input.op],
type: 'numeric' as const,
value: [
valueConverter(input.value[0]),
valueConverter(input.value[1]),
],
},
}) satisfies SearchFilterValueNode,
)
.otherwise(([inclLow, inclHi]) => {
const lhs = {
type: 'value',
fieldSpec: {
key,
name: '',
op: inclLow ? '>=' : '>',
type: 'numeric',
value: valueConverter(input.value[0]),
},
} satisfies SearchFilterValueNode;
const rhs = {
type: 'value',
fieldSpec: {
key,
name: '',
op: inclHi ? '<=' : '<',
type: 'numeric',
value: valueConverter(input.value[1]),
},
} satisfies SearchFilterValueNode;
return {
type: 'op',
op: 'and',
children: [lhs, rhs],
};
});
}
return {
type: 'value',
fieldSpec: {
key,
name: '',
op: NumericOpToApiType[input.op],
type: 'numeric' as const,
value: valueConverter(input.value),
},
} satisfies SearchFilterValueNode;
}
case 'single_date_query': {
const key: string = virtualFieldToIndexField[input.field] ?? input.field;
const converter =
input.field in dateFieldNormalizersByField
? dateFieldNormalizersByField[
input.field as keyof typeof dateFieldNormalizersByField
]
: (input: string) => parseInt(input);
if (input.op === 'between') {
return {
type: 'value',
fieldSpec: {
key,
name: '',
op: NumericOpToApiType[input.op],
type: 'date' as const,
value: [converter(input.value[0]), converter(input.value[1])],
},
} satisfies SearchFilterValueNode;
} else {
return {
type: 'value',
fieldSpec: {
key,
name: '',
op: NumericOpToApiType[input.op],
type: 'date' as const,
value: converter(input.value),
},
} satisfies SearchFilterValueNode;
}
}
case 'single_query': {
const key: string = virtualFieldToIndexField[input.field] ?? input.field;
return {
type: 'value',
fieldSpec: {
// HACK for now
key,
name: '',
op: StringOpToApiType[input.op],
// TODO: derive better type based on field
type: 'string' as const,
value: isArray(input.value) ? input.value : [input.value],
},

View File

@@ -31,6 +31,7 @@
},
"include": [
"./src/**/*.ts",
"./scripts/**/*.ts"
],
"exclude": [
"./dist/**/*",

View File

@@ -41,7 +41,7 @@ const DateSearchFieldSchema = z.object({
export type DateSearchField = z.infer<typeof DateSearchFieldSchema>;
export const SearchFieldSchema = z.union([
export const SearchFieldSchema = z.discriminatedUnion('type', [
StringSearchFieldSchema,
FactedStringSearchFieldSchema,
NumericSearchFieldSchema,
@@ -78,7 +78,7 @@ export type SearchFilterOperatorNode = {
// Hack to get recursive types working in zod
export const SearchFilterOperatorNodeSchema = z.object({
type: z.literal('op'),
op: z.union([z.literal('or'), z.literal('and')]),
op: z.enum(['or', 'and']),
get children(): z.ZodArray<
z.ZodDiscriminatedUnion<
[
@@ -102,7 +102,7 @@ export type SearchFilter = z.infer<typeof SearchFilterQuerySchema>;
export const SearchSortSchema = z.object({
field: z.string(),
direction: z.union([z.literal('asc'), z.literal('desc')]),
direction: z.enum(['asc', 'desc']),
});
export type SearchSort = z.infer<typeof SearchSortSchema>;
@@ -132,25 +132,4 @@ export type SearchFieldSpec<Key extends string = string> = {
| ReadonlyArray<MediaSourceLibrary['mediaType']>;
};
export const SearchFieldToType = {
'genres.name': 'facted_string',
'actors.name': 'string',
'director.name': 'string',
'writer.name': 'string',
duration: 'numeric',
type: 'facted_string',
originalReleaseDate: 'numeric',
originalReleaseYear: 'numeric',
libraryId: 'string',
mediaSourceId: 'string',
tags: 'string',
rating: 'string',
title: 'string',
} satisfies Record<string, SearchFieldType>;
export const FreeSearchQueryKeyMappings: Record<string, string> = {
genre: 'genres.name',
actor: 'actors.name',
};
z.globalRegistry.add(SearchFilterQuerySchema, { id: 'SearchFilter' });