test.fileTests: also return text, expected, config

milahu commented

2022-09-11 14:55:24 +02:00

(Migrated from github.com)

make the fileTests function useful for parsing test files

expected has leading + trailing whitespace (\n)
maybe we should expected = expected.trim()?

make the `fileTests` function useful for parsing test files `expected` has leading + trailing whitespace (`\n`) maybe we should `expected = expected.trim()`?

marijnh commented

2022-09-11 15:40:42 +02:00

(Migrated from github.com)

What are you planning to use this for?

milahu commented

2022-09-11 15:50:28 +02:00

(Migrated from github.com)

updating the expected strings in test files
for example after renaming tokens

similar to updating jest snapshot tests

jest --updateSnapshot

tree-sitter has this feature too

# Update all syntax trees in corpus files with current parser output
tree-sitter test --update

sample code:
update test files for lezer-parser

used in
https://github.com/replit/codemirror-lang-nix/pull/3
https://github.com/milahu/lezer-parser-nix

test/update-expressions.js

// test/update-expressions.js

// based on test-parser.js
// based on manual-test.js

import {parser} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

// use a patched version of fileTests to parse test files
// https://github.com/lezer-parser/generator/pull/7
// https://github.com/lezer-parser/generator/blob/main/src/test.ts
//import {fileTests} from "@lezer/generator/dist/test"
function toLineContext(file, index) {
  const endEol = file.indexOf('\n', index + 80);

  const endIndex = endEol === -1 ? file.length : endEol;

  return file.substring(index, endIndex).split(/\n/).map(str => '  | ' + str).join('\n');
}
const defaultIgnore = false
function fileTests(file, fileName, mayIgnore = defaultIgnore) {
  let caseExpr = /\s*#\s*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy
  let tests = []
  let lastIndex = 0;
  for (;;) {
    let m = caseExpr.exec(file)
    if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`)

    //let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! // typescript
    let execResult = /(.*?)(\{.*?\})?$/.exec(m[1])
    if (execResult === null) throw Error('execResult is null')
    let [, name, configStr] = execResult

    let text = m[2].trim(), expected = m[3].trim()
    let config = configStr ? JSON.parse(configStr) : null
    let strict = !/⚠|\.\.\./.test(expected)

    tests.push({
      name,
      text,
      expected,
      configStr,
      config,
      strict,
      /*
      run(parser) {
        if (parser.configure && (strict || config))
          parser = parser.configure({strict, ...config})
        testTree(parser.parse(text), expected, mayIgnore)
      },
      */
    })
    lastIndex = m.index + m[0].length
    if (lastIndex == file.length) break
  }
  return tests
}

import * as fs from "fs"
import * as path from "path"
import { fileURLToPath } from 'url';
let caseDir = path.dirname(fileURLToPath(import.meta.url))

const writePrettyTree = true

for (let file of fs.readdirSync(caseDir)) {
  if (!/\.txt$/.test(file)) continue
  //let fileName = /^[^\.]*/.exec(file)[0]
  let filePath = path.join(caseDir, file)
  let fileContent = fs.readFileSync(filePath, "utf8")
  const result = []
  for (let testData of fileTests(fileContent, file)) {
    const { name, text, configStr, strict } = testData;
    const strictStr = strict ? '' : '... ' // prefer ascii ... over unicode ⚠
    const tree = parser.parse(testData.text);
    const stringifyOptions = writePrettyTree && { pretty: true, text };
    const actual = stringifyTree(tree, stringifyOptions);
    // parse error -> make tests fail
    // FIXME make this optional, to allow testing for parser errors
    const actualWithFixme = actual.replace(/⚠/g, '⚠ FIXME');
    result.push(`# ${name}${(configStr || '')}\n${text}\n==>\n${strictStr}${actualWithFixme}`)
  }
  const newFileContent = result.join("\n\n") + "\n";
  // TODO backup?
  console.log(`writing ${filePath}`);
  fs.writeFileSync(filePath, newFileContent, "utf8");
}

test/manual-test.js

// test/manual-test.js

import {parser as parserImported} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

if (process.argv.length < 3) {
  console.log(`usage: node ${process.argv[1].split('/').pop()} "input text"`);
  process.exit(1);
}

var text = process.argv[2];
var parser = parserImported; // allow reassign

// based on https://github.com/lezer-parser/generator/blob/main/src/test.ts#L161

var config = null;
var strict = true;
if (parser.configure && (strict || config))
  parser = parser.configure({strict, ...config});

let actual;
try {
  actual = parser.parse(text);
}
catch (e) {
  // https://github.com/lezer-parser/lr/blob/main/src/parse.ts#L300
  if (e.message.startsWith("No parse at ")) {
    const pos = parseInt(e.message.slice("No parse at ".length));
    e.message += `\n      ${text}\n      ${" ".repeat(pos)}^`;
  }
  throw e;
}

console.log(stringifyTree(actual));
console.log();
console.log(stringifyTree(actual, {pretty: true, text }));

test/stringify-tree.js

// test/stringify-tree.js

// FIXME dont print empty lines in pretty tree. this happens in rare cases
// example: extra newline before ")" tokens

// dirty: this will patch the tree's toString methods
export function stringifyTree(tree, options) {

  if (!options) options = {};
  const pretty = options.pretty || false;
  const human = options.human || false; // human readable, like python or yaml
  const text = options.text || '';
  const indentStep = options.indent || '  ';

  // Tree https://github.com/lezer-parser/common/blob/main/src/tree.ts#L314
  tree.toString = function toString(depth = 0) {
    //let mounted = this.prop(NodeProp.mounted)
    //if (mounted && !mounted.overlay) return mounted.tree.toString()
    let children = ""
    for (let ch of this.children) {
      let str = ch.toString(depth + 1)
      if (str) {
        //if (children) children += ","
        children += str
      }
    }
    return !this.type.name ? children :
      (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + (
        human
        ? (children.length ? "\n" + children : "")
        : (children.length ? (" (" + (pretty ? "\n" : "") + children + (pretty ? "\n" : "") + ")") : "")
      )
  }

  if (!tree.children[0].set) {
    // Tree
    // TODO print type + source tree
    //console.dir(tree, { depth: 5 });

    tree.children[0].toString = function toString(depth = -1) {
      //let mounted = this.prop(NodeProp.mounted)
      //if (mounted && !mounted.overlay) return mounted.tree.toString()
      let children = ""
      for (let ch of this.children) {
        let str = ch.toString(depth + 1)
        if (str) {
          //if (children) children += ","
          children += str
        }
      }
      let nodeText = text
      let indent = indentStep.repeat(depth)
      if (human) {
        return indent + (!this.type.name ? children :
          (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
          //(children.length ? "(" + children + ")" : "")
          (text ? ` ${nodeText}` : '') +
          (children.length ? "\n" + children : ""))
      }
      return indent + (!this.type.name ? children :
        (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
        //(children.length ? "(" + children + ")" : "")
        (children.length ? ((pretty ? "\n" : "") + "(" + children + ")") : ""))
    }

    return tree.toString(0);
  }

  else
  if (tree.children[0].set) {
    // TreeBuffer https://github.com/lezer-parser/common/blob/main/src/tree.ts#L530
    // monkeypatch: print type + source tree
    tree.children[0].toString = function toString(depth = 0) {
      let result = []
      for (let index = 0; index < this.buffer.length;) {
        result.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      //return result.join(",")
      return result.join('')
    }
    tree.children[0].childString = function childString(index, depth = 0) {
      let id = this.buffer[index], endIndex = this.buffer[index + 3]
      let type = this.set.types[id], result = type.name // TODO add source to result
      if (/\W/.test(result) && !type.isError) result = JSON.stringify(result)
      let nodeText
      if (human) {
        if (text) {
          nodeText = text.slice(
            this.buffer[index + 1],
            this.buffer[index + 2],
          )
          if (/[\r\n]/.test(nodeText)) nodeText = JSON.stringify(nodeText)
          result += ` ${nodeText}`
        }
        result = indentStep.repeat(depth) + result
      }
      if (pretty) {
        result = indentStep.repeat(depth) + result
      }
      index += 4
      if (endIndex == index) return result
      let children = []
      while (index < endIndex) {
        children.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      if (human) {
        const indent = indentStep.repeat(depth);
        return result + '\n' + children.map(str => str + '\n').join('')
      }
      if (pretty) {
        const indent = indentStep.repeat(depth);
        // TODO? test children.length
        return result + " (" + '\n' + children.map(str => str + '\n').join('') + indent + ")"
      }
      return result + "(" + children.join(",") + ")"
    }

    return tree.toString(-1);
  }
}

updating the `expected` strings in test files for example after renaming tokens similar to [updating jest snapshot tests](https://jestjs.io/docs/snapshot-testing#updating-snapshots) ```sh jest --updateSnapshot ``` tree-sitter has this feature too ```sh # Update all syntax trees in corpus files with current parser output tree-sitter test --update ``` sample code: update test files for lezer-parser used in https://github.com/replit/codemirror-lang-nix/pull/3 https://github.com/milahu/lezer-parser-nix <details> <summary> test/update-expressions.js </summary> ```js // test/update-expressions.js // based on test-parser.js // based on manual-test.js import {parser} from "../dist/index.js" import {stringifyTree} from "./stringify-tree.js" // use a patched version of fileTests to parse test files // https://github.com/lezer-parser/generator/pull/7 // https://github.com/lezer-parser/generator/blob/main/src/test.ts //import {fileTests} from "@lezer/generator/dist/test" function toLineContext(file, index) { const endEol = file.indexOf('\n', index + 80); const endIndex = endEol === -1 ? file.length : endEol; return file.substring(index, endIndex).split(/\n/).map(str => ' | ' + str).join('\n'); } const defaultIgnore = false function fileTests(file, fileName, mayIgnore = defaultIgnore) { let caseExpr = /\s*#\s*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy let tests = [] let lastIndex = 0; for (;;) { let m = caseExpr.exec(file) if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`) //let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! // typescript let execResult = /(.*?)(\{.*?\})?$/.exec(m[1]) if (execResult === null) throw Error('execResult is null') let [, name, configStr] = execResult let text = m[2].trim(), expected = m[3].trim() let config = configStr ? JSON.parse(configStr) : null let strict = !/⚠|\.\.\./.test(expected) tests.push({ name, text, expected, configStr, config, strict, /* run(parser) { if (parser.configure && (strict || config)) parser = parser.configure({strict, ...config}) testTree(parser.parse(text), expected, mayIgnore) }, */ }) lastIndex = m.index + m[0].length if (lastIndex == file.length) break } return tests } import * as fs from "fs" import * as path from "path" import { fileURLToPath } from 'url'; let caseDir = path.dirname(fileURLToPath(import.meta.url)) const writePrettyTree = true for (let file of fs.readdirSync(caseDir)) { if (!/\.txt$/.test(file)) continue //let fileName = /^[^\.]*/.exec(file)[0] let filePath = path.join(caseDir, file) let fileContent = fs.readFileSync(filePath, "utf8") const result = [] for (let testData of fileTests(fileContent, file)) { const { name, text, configStr, strict } = testData; const strictStr = strict ? '' : '... ' // prefer ascii ... over unicode ⚠ const tree = parser.parse(testData.text); const stringifyOptions = writePrettyTree && { pretty: true, text }; const actual = stringifyTree(tree, stringifyOptions); // parse error -> make tests fail // FIXME make this optional, to allow testing for parser errors const actualWithFixme = actual.replace(/⚠/g, '⚠ FIXME'); result.push(`# ${name}${(configStr || '')}\n${text}\n==>\n${strictStr}${actualWithFixme}`) } const newFileContent = result.join("\n\n") + "\n"; // TODO backup? console.log(`writing ${filePath}`); fs.writeFileSync(filePath, newFileContent, "utf8"); } ``` </details> <details> <summary> test/manual-test.js </summary> ```js // test/manual-test.js import {parser as parserImported} from "../dist/index.js" import {stringifyTree} from "./stringify-tree.js" if (process.argv.length < 3) { console.log(`usage: node ${process.argv[1].split('/').pop()} "input text"`); process.exit(1); } var text = process.argv[2]; var parser = parserImported; // allow reassign // based on https://github.com/lezer-parser/generator/blob/main/src/test.ts#L161 var config = null; var strict = true; if (parser.configure && (strict || config)) parser = parser.configure({strict, ...config}); let actual; try { actual = parser.parse(text); } catch (e) { // https://github.com/lezer-parser/lr/blob/main/src/parse.ts#L300 if (e.message.startsWith("No parse at ")) { const pos = parseInt(e.message.slice("No parse at ".length)); e.message += `\n ${text}\n ${" ".repeat(pos)}^`; } throw e; } console.log(stringifyTree(actual)); console.log(); console.log(stringifyTree(actual, {pretty: true, text })); ``` </details> <details> <summary> test/stringify-tree.js </summary> ```js // test/stringify-tree.js // FIXME dont print empty lines in pretty tree. this happens in rare cases // example: extra newline before ")" tokens // dirty: this will patch the tree's toString methods export function stringifyTree(tree, options) { if (!options) options = {}; const pretty = options.pretty || false; const human = options.human || false; // human readable, like python or yaml const text = options.text || ''; const indentStep = options.indent || ' '; // Tree https://github.com/lezer-parser/common/blob/main/src/tree.ts#L314 tree.toString = function toString(depth = 0) { //let mounted = this.prop(NodeProp.mounted) //if (mounted && !mounted.overlay) return mounted.tree.toString() let children = "" for (let ch of this.children) { let str = ch.toString(depth + 1) if (str) { //if (children) children += "," children += str } } return !this.type.name ? children : (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + ( human ? (children.length ? "\n" + children : "") : (children.length ? (" (" + (pretty ? "\n" : "") + children + (pretty ? "\n" : "") + ")") : "") ) } if (!tree.children[0].set) { // Tree // TODO print type + source tree //console.dir(tree, { depth: 5 }); tree.children[0].toString = function toString(depth = -1) { //let mounted = this.prop(NodeProp.mounted) //if (mounted && !mounted.overlay) return mounted.tree.toString() let children = "" for (let ch of this.children) { let str = ch.toString(depth + 1) if (str) { //if (children) children += "," children += str } } let nodeText = text let indent = indentStep.repeat(depth) if (human) { return indent + (!this.type.name ? children : (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + //(children.length ? "(" + children + ")" : "") (text ? ` ${nodeText}` : '') + (children.length ? "\n" + children : "")) } return indent + (!this.type.name ? children : (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + //(children.length ? "(" + children + ")" : "") (children.length ? ((pretty ? "\n" : "") + "(" + children + ")") : "")) } return tree.toString(0); } else if (tree.children[0].set) { // TreeBuffer https://github.com/lezer-parser/common/blob/main/src/tree.ts#L530 // monkeypatch: print type + source tree tree.children[0].toString = function toString(depth = 0) { let result = [] for (let index = 0; index < this.buffer.length;) { result.push(this.childString(index, depth + 1)) index = this.buffer[index + 3] } //return result.join(",") return result.join('') } tree.children[0].childString = function childString(index, depth = 0) { let id = this.buffer[index], endIndex = this.buffer[index + 3] let type = this.set.types[id], result = type.name // TODO add source to result if (/\W/.test(result) && !type.isError) result = JSON.stringify(result) let nodeText if (human) { if (text) { nodeText = text.slice( this.buffer[index + 1], this.buffer[index + 2], ) if (/[\r\n]/.test(nodeText)) nodeText = JSON.stringify(nodeText) result += ` ${nodeText}` } result = indentStep.repeat(depth) + result } if (pretty) { result = indentStep.repeat(depth) + result } index += 4 if (endIndex == index) return result let children = [] while (index < endIndex) { children.push(this.childString(index, depth + 1)) index = this.buffer[index + 3] } if (human) { const indent = indentStep.repeat(depth); return result + '\n' + children.map(str => str + '\n').join('') } if (pretty) { const indent = indentStep.repeat(depth); // TODO? test children.length return result + " (" + '\n' + children.map(str => str + '\n').join('') + indent + ")" } return result + "(" + children.join(",") + ")" } return tree.toString(-1); } } ``` </details>

marijnh commented

2022-09-11 15:59:53 +02:00

(Migrated from github.com)

All right, seems harmless enough.

Rows
Columns

test.fileTests: also return text, expected, config #7