Compare commits

...

5 Commits

Author SHA1 Message Date
Jaehwang Jung c79e43bb4d
Merge 1dc295cdb7 into c18d7941ef 2024-05-10 01:08:44 +08:00
dundargoc c18d7941ef build: allow sccache as compiler cache
Also enable caching for dependencies.

Closes https://github.com/neovim/neovim/issues/28670
2024-05-09 16:39:45 +02:00
Jaehwang Jung 1dc295cdb7 perf(treesitter): allow parsing multiple ranges
Problem:
Partial injection invalidates regions and children parsers outside the
visible range (passed to `parse`). Invalidating non-matching regions for
each `parse()` is not efficient if multiple windows display different
ranges of the same buffer.

Solution:
Let `parse()` take set of ranges, and invoke `parse()` for all visible
ranges at `on_start`.
2024-04-25 23:33:40 +09:00
Jaehwang Jung 14a7dbbf8a perf(treesitter): run injection query only for visible lines
Problem:
Executing injection query on the full source is slow.

Solution:
Execute injection query only on the given range.

Notes
* This is not applicable to languages with combined injection.
* `is_valid(false)` should run full injection to determine if the
  current set of children parsers and their regions are complete. Since
  this can be slow, `parse()` no longer checks this at the beginning.
* Children parsers and regions outside the given range are discarded.
2024-04-25 23:33:38 +09:00
Jaehwang Jung 3513c62a63 perf(treesitter)!: incremental invalidation
Problem:
After an edit that changes the number of injection regions, the
LanguageTree drops all the existing trees. This inefficient because the
injections should be parsed from scratch.

Solution:
When setting included regions, match them with the existing regions so
that they can be reparsed incrementally. This uses a table that maps
region values to their indices. Regions are matched by "similarity",
because some changes of regions cannot be precisely tracked by
`_edit()`.

Breaking change:
The indices of `parser:trees()` behave now differently because existing
regions are reused. So `parser:parse(true)` does not ensure that the
tree table is list-like. Also, when new regions are added manually, they
are first added and then the stale regions are discarded. So the
existing uses of `trees[1]` may break. Use `next(trees())` instead.
2024-04-25 02:25:16 +09:00
7 changed files with 568 additions and 161 deletions

View File

@ -50,11 +50,6 @@ file(GLOB DOCFILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/runtime/doc/*.txt)
set_directory_properties(PROPERTIES
EP_PREFIX "${DEPS_BUILD_DIR}")
find_program(CCACHE_PRG ccache)
if(CCACHE_PRG)
set(CMAKE_C_COMPILER_LAUNCHER ${CMAKE_COMMAND} -E env CCACHE_SLOPPINESS=pch_defines,time_macros ${CCACHE_PRG})
endif()
if(NOT CI_BUILD)
set(CMAKE_INSTALL_MESSAGE NEVER)
endif()

View File

@ -23,6 +23,12 @@ if(POLICY CMP0092)
list(APPEND DEPS_CMAKE_ARGS -D CMAKE_POLICY_DEFAULT_CMP0092=NEW)
endif()
find_program(CACHE_PRG NAMES ccache sccache)
if(CACHE_PRG)
set(CMAKE_C_COMPILER_LAUNCHER ${CMAKE_COMMAND} -E env CCACHE_SLOPPINESS=pch_defines,time_macros ${CACHE_PRG})
list(APPEND DEPS_CMAKE_CACHE_ARGS -DCMAKE_C_COMPILER_LAUNCHER:STRING=${CMAKE_C_COMPILER_LAUNCHER})
endif()
# MAKE_PRG
if(UNIX)
find_program(MAKE_PRG NAMES gmake make)
@ -58,7 +64,8 @@ function(get_externalproject_options name DEPS_IGNORE_SHA)
set(EXTERNALPROJECT_OPTIONS
DOWNLOAD_NO_PROGRESS TRUE
EXTERNALPROJECT_OPTIONS URL ${${name_allcaps}_URL})
EXTERNALPROJECT_OPTIONS URL ${${name_allcaps}_URL}
CMAKE_CACHE_ARGS ${DEPS_CMAKE_CACHE_ARGS})
if(NOT ${DEPS_IGNORE_SHA})
list(APPEND EXTERNALPROJECT_OPTIONS URL_HASH SHA256=${${name_allcaps}_SHA256})

View File

@ -1329,7 +1329,8 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()*
Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()|
reflects the latest state of the source. If invalid, user should call
|LanguageTree:parse()|.
|LanguageTree:parse()|. `is_valid(false)` can be slow because it runs
injection on the full source.
Parameters: ~
• {exclude_children} (`boolean?`) whether to ignore the validity of
@ -1364,7 +1365,7 @@ LanguageTree:named_node_for_range({range}, {opts})
Return: ~
(`TSNode?`)
LanguageTree:parse({range}) *LanguageTree:parse()*
LanguageTree:parse({ranges}) *LanguageTree:parse()*
Recursively parse all regions in the language tree using
|treesitter-parsers| for the corresponding languages and run injection
queries on the parsed trees to determine whether child trees should be
@ -1375,11 +1376,11 @@ LanguageTree:parse({range}) *LanguageTree:parse()*
if {range} is `true`).
Parameters: ~
• {range} (`boolean|Range?`) Parse this range in the parser's source.
Set to `true` to run a complete parse of the source (Note:
Can be slow!) Set to `false|nil` to only parse regions with
empty ranges (typically only the root tree without
injections).
• {ranges} (`boolean|Range|(Range)[]?`) Parse this range(s) in the
parser's source. Set to `true` to run a complete parse of
the source (Note: Can be slow!) Set to `false|nil` to only
parse regions with empty ranges (typically only the root
tree without injections).
Return: ~
(`table<integer, TSTree>`)
@ -1427,10 +1428,7 @@ LanguageTree:tree_for_range({range}, {opts})
LanguageTree:trees() *LanguageTree:trees()*
Returns all trees of the regions parsed by this parser. Does not include
child languages. The result is list-like if
• this LanguageTree is the root, in which case the result is empty or a
singleton list; or
• the root LanguageTree is fully parsed.
child languages.
Return: ~
(`table<integer, TSTree>`)

View File

@ -384,6 +384,23 @@ function TSHighlighter._on_spell_nav(_, _, buf, srow, _, erow, _)
end
end
function TSHighlighter._on_start()
local buf_ranges = {} ---@type table<integer, (Range)[]>
for _, win in ipairs(api.nvim_tabpage_list_wins(0)) do
local buf = api.nvim_win_get_buf(win)
if TSHighlighter.active[buf] then
if not buf_ranges[buf] then
buf_ranges[buf] = {}
end
local topline, botline = vim.fn.line('w0', win) - 1, vim.fn.line('w$', win)
table.insert(buf_ranges[buf], { topline, botline })
end
end
for buf, ranges in pairs(buf_ranges) do
TSHighlighter.active[buf].tree:parse(ranges)
end
end
---@private
---@param _win integer
---@param buf integer
@ -394,13 +411,13 @@ function TSHighlighter._on_win(_, _win, buf, topline, botline)
if not self then
return false
end
self.tree:parse({ topline, botline + 1 })
self:prepare_highlight_states(topline, botline + 1)
self.redraw_count = self.redraw_count + 1
return true
end
api.nvim_set_decoration_provider(ns, {
on_start = TSHighlighter._on_start,
on_win = TSHighlighter._on_win,
on_line = TSHighlighter._on_line,
_on_spell_nav = TSHighlighter._on_spell_nav,

View File

@ -72,19 +72,33 @@ local TSCallbackNames = {
---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages
---@field private _injection_query vim.treesitter.Query Queries defining injected languages
---@field private _injection_query vim.treesitter.Query? Queries defining injected languages
---
---If `is_valid(true) and _injections_processed`, the set of children parsers and their sets of
---regions are complete wrt. the full source, so that it's not necessary to execute injections.
---@field private _injections_processed boolean
---
---@field private _opts table Options
---@field private _parser TSParser Parser for language
---@field private _has_regions boolean
---@field private _regions table<integer, Range6[]>?
---
---List of regions this tree should manage and parse. If nil then regions are
---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _regions table<integer, Range6[]>?
---
---Inverse of `_regions`. start byte of range 1 ↦ { region1, index1, region2, index2, .. }.
---Used for checking if a new region is already managed by this parser, so that it can be parsed
---incrementally.
---@field private _regions_inv table<integer, (Range6[]|integer)[]>?
---
---@field private _lang string Language name
---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
---@field private _source (integer|string) Buffer or string to parse
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---
---Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid.
---@field private _trees table<integer, TSTree>
---
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
---@field private _logger? fun(logtype: string, msg: string)
---@field private _logfile? file*
@ -245,9 +259,6 @@ end
--- Returns all trees of the regions parsed by this parser.
--- Does not include child languages.
--- The result is list-like if
--- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
--- * the root LanguageTree is fully parsed.
---
---@return table<integer, TSTree>
function LanguageTree:trees()
@ -261,6 +272,7 @@ end
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
--- `is_valid(false)` can be slow because it runs injection on the full source.
---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
---@return boolean
function LanguageTree:is_valid(exclude_children)
@ -275,8 +287,11 @@ function LanguageTree:is_valid(exclude_children)
end
if not exclude_children then
-- Run full injection to check if the current set of children and their regions are complete.
-- Note that `set_included_regions` marks new regions invalid.
if not self._injections_processed then
return false
self:_add_injections(true)
self._injections_processed = true
end
for _, child in pairs(self._children) do
@ -305,24 +320,26 @@ function LanguageTree:source()
end
--- @param region Range6[]
--- @param range? boolean|Range
--- @param ranges? boolean|(Range)[]
--- @return boolean
local function intercepts_region(region, range)
local function intercepts_region(region, ranges)
if #region == 0 then
return true
end
if range == nil then
if ranges == nil then
return false
end
if type(range) == 'boolean' then
return range
if type(ranges) == 'boolean' then
return ranges
end
for _, r in ipairs(region) do
if Range.intercepts(r, range) then
return true
for _, r1 in ipairs(region) do
for _, r2 in ipairs(ranges) do
if Range.intercepts(r1, r2) then
return true
end
end
end
@ -330,11 +347,11 @@ local function intercepts_region(region, range)
end
--- @private
--- @param range boolean|Range?
--- @param ranges boolean|(Range)[]?
--- @return Range6[] changes
--- @return integer no_regions_parsed
--- @return number total_parse_time
function LanguageTree:_parse_regions(range)
function LanguageTree:_parse_regions(ranges)
local changes = {}
local no_regions_parsed = 0
local total_parse_time = 0
@ -343,17 +360,17 @@ function LanguageTree:_parse_regions(range)
self._valid = {}
end
-- If there are no ranges, set to an empty list
-- If there is no region, set to an empty list
-- so the included ranges in the parser are cleared.
for i, ranges in pairs(self:included_regions()) do
for i, region in pairs(self:included_regions()) do
if
not self._valid[i]
and (
intercepts_region(ranges, range)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
intercepts_region(region, ranges)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), ranges))
)
then
self._parser:set_included_ranges(ranges)
self._parser:set_included_ranges(region)
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
@ -374,11 +391,12 @@ function LanguageTree:_parse_regions(range)
end
--- @private
--- @param ranges boolean|(Range)[]|nil
--- @return number
function LanguageTree:_add_injections()
function LanguageTree:_add_injections(ranges)
local seen_langs = {} ---@type table<string,boolean>
local query_time, injections_by_lang = tcall(self._get_injections, self)
local query_time, injections_by_lang = tcall(self._get_injections, self, ranges)
for lang, injection_regions in pairs(injections_by_lang) do
local has_lang = pcall(language.add, lang)
@ -406,6 +424,14 @@ function LanguageTree:_add_injections()
return query_time
end
---@param region (Range)[]
---@return Range4
local function region_range(region)
local srow, scol, _, _ = Range.unpack4(region[1])
local _, _, erow, ecol = Range.unpack4(region[#region])
return { srow, scol, erow, ecol }
end
--- Recursively parse all regions in the language tree using |treesitter-parsers|
--- for the corresponding languages and run injection queries on the parsed trees
--- to determine whether child trees should be created and parsed.
@ -413,16 +439,16 @@ end
--- Any region with empty range (`{}`, typically only the root tree) is always parsed;
--- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
---
--- @param range boolean|Range|nil: Parse this range in the parser's source.
--- @param ranges boolean|Range|(Range)[]|nil: Parse this range(s) in the parser's source.
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections).
--- @return table<integer, TSTree>
function LanguageTree:parse(range)
if self:is_valid() then
self:_log('valid')
return self._trees
function LanguageTree:parse(ranges)
if type(ranges) == 'table' and #ranges > 0 and type(ranges[1]) == 'number' then
ranges = { ranges }
end
---@cast ranges boolean|(Range)[]|nil
local changes --- @type Range6[]?
@ -433,16 +459,26 @@ function LanguageTree:parse(range)
-- At least 1 region is invalid
if not self:is_valid(true) then
changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
changes, no_regions_parsed, total_parse_time = self:_parse_regions(ranges)
-- Need to run injections when we parsed something
if no_regions_parsed > 0 then
self._injections_processed = false
end
end
if not self._injections_processed and range ~= false and range ~= nil then
query_time = self:_add_injections()
self._injections_processed = true
-- NOTE: Trade-off in partial injection query execution
-- * The good: Each `parse()` is faster.
-- * The bad: `is_valid(false)` is more expensive, requiring a full injection query execution. To
-- avoid this cost, each `parse()` always runs partial injection. However, this is not a big
-- problem as partial injection is very cheap even on huge files.
-- * A potential optimization: Track the ranges where the set of injected regions are known to be
-- complete and valid, and run the injection query only on the intersection of requested ranges
-- and the invalid ranges. This would be even more beneficial for combined injection.
if self._injection_query and not self._injections_processed and ranges then
query_time = self:_add_injections(ranges)
if ranges == true or self._injection_query.has_combined_injection then
self._injections_processed = true
end
end
self:_log({
@ -450,11 +486,11 @@ function LanguageTree:parse(range)
regions_parsed = no_regions_parsed,
parse_time = total_parse_time,
query_time = query_time,
range = range,
ranges = ranges,
})
for _, child in pairs(self._children) do
child:parse(range)
child:parse(ranges)
end
return self._trees
@ -600,6 +636,120 @@ function LanguageTree:_iter_regions(fn)
end
end
---Whether two region values are approximately equal. Should be implied by equality.
---See the comment in `set_included_regions` on why we use similarity.
---For now it simply compares the last bytes of the first and the last regions.
---@param region1 Range6[]
---@param region2 Range6[]
---@return boolean
local function region_similar(region1, region2)
return region1[1][6] == region2[1][6] or region1[#region1][6] == region2[#region2][6]
end
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param region Range6[]
---@return integer?
---@return boolean? exact
local function regions_inv_lookup(regions_inv, region)
local bucket = regions_inv[region[1][3]]
if not bucket then
return
end
local i ---@type integer?
for e = 1, #bucket, 2 do
local old_region = bucket[e] --[[@as Range6[] ]]
if region_similar(old_region, region) then
i = bucket[e + 1] --[[@as integer]]
if vim.deep_equal(old_region, region) then
return i, true
end
end
end
return i, false
end
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param i integer
---@param region Range6[]
local function regions_inv_insert(regions_inv, i, region)
local start_byte = region[1][3]
local bucket = regions_inv[start_byte]
if not bucket then
regions_inv[start_byte] = { region, i }
else
table.insert(bucket, region)
table.insert(bucket, i)
end
end
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param region Range6[]
local function regions_inv_remove(regions_inv, region)
local start_byte = region[1][3]
local bucket = assert(regions_inv[start_byte])
for e = 1, #bucket, 2 do
if vim.deep_equal(bucket[e], region) then
table.remove(bucket, e + 1)
table.remove(bucket, e)
if #bucket == 0 then
regions_inv[start_byte] = nil
end
return
end
end
error('region not found')
end
---@param i integer
function LanguageTree:_invalidate_region(i)
if self._valid == true then
self._valid = {}
for j, _ in pairs(self._regions) do
self._valid[j] = true
end
self._valid[i] = false
elseif type(self._valid) == 'table' then
self._valid[i] = false
end
end
---@param i integer
function LanguageTree:_discard_region(i)
if not self._has_regions then
return
end
if self._regions then
regions_inv_remove(self._regions_inv, self._regions[i])
self._regions[i] = nil
end
if self._trees[i] then
local region = self._trees[i]:included_ranges(true)
self:_log(function()
return 'discarding region', i, region_tostr(region)
end)
self:_do_callback('changedtree', region, self._trees[i])
local discarded_range = region_range(region)
self._trees[i] = nil
-- Discard children's regions that are included in the discarded region. This is necessary
-- because changes that only remove trees in this parser keep the children parsers untouched.
for _, child in pairs(self._children) do
for child_i, child_region in pairs(child:included_regions()) do
if Range.contains(discarded_range, region_range(child_region)) then
child:_discard_region(child_i)
end
end
end
end
if type(self._valid) == 'table' then
self._valid[i] = nil
end
end
--- Sets the included regions that should be parsed by this |LanguageTree|.
--- A region is a set of nodes and/or ranges that will be parsed in the same context.
---
@ -619,7 +769,23 @@ end
function LanguageTree:set_included_regions(new_regions)
self._has_regions = true
-- Transform the tables from 4 element long to 6 element long (with byte offset)
-- Refresh self._regions and self._regions_inv
self:included_regions()
local touched = {} ---@type table<integer, true>
-- Check if the parser already has each region so that they can be parsed incrementally from an
-- existing tree. We find the existing regions by "similarity" instead of the exact equality,
-- because the values of an existing region and the matching region in `new_regions` may not be
-- equal, in which case the existing tree can't be reused.
--
-- Inequality of matching regions happens because `_edit` does not accurately track changes in the
-- existing regions. One (probably the only?) case is when a multi-range region created from a
-- non-`include-children` injection or a combined injection is edited in a way that adds a range
-- to the region, e.g., when adding a line in markdown fenced code block (with language).
--
-- Matching the regions doesn't need to precise: the consequence of false match and false
-- non-match is just a minor loss in efficiency due to reparsing a region from scratch.
for _, region in ipairs(new_regions) do
for i, range in ipairs(region) do
if type(range) == 'table' and #range == 4 then
@ -628,26 +794,50 @@ function LanguageTree:set_included_regions(new_regions)
region[i] = { range:range(true) }
end
end
end
---@cast region Range6[]
-- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
-- new_regions is different from included_regions, then outdated regions in included_regions are
-- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
-- outdated_regions is invalidated by _iter_regions in else branch.
if #self:included_regions() ~= #new_regions then
-- TODO(lewis6991): inefficient; invalidate trees incrementally
for _, t in pairs(self._trees) do
self:_do_callback('changedtree', t:included_ranges(true), t)
local i, exact = regions_inv_lookup(self._regions_inv, region)
if not exact then
if i then
self:_log(function()
return 'invalidating inexactly matched region', i, region_tostr(self._regions[i])
end)
regions_inv_remove(self._regions_inv, self._regions[i])
else
i = #self._regions + 1 -- this always gives an unoccupied index even if there are holes
end
self._regions[i] = region
regions_inv_insert(self._regions_inv, i, region)
self:_invalidate_region(i)
end
self._trees = {}
self:invalidate()
else
self:_iter_regions(function(i, region)
return vim.deep_equal(new_regions[i], region)
end)
---@cast i integer
touched[i] = true
end
self._regions = new_regions
-- Discard stale regions.
for i, _ in pairs(self._regions) do
if not touched[i] then
self:_discard_region(i)
end
end
end
--- @param region Range6[]
local function prune_empty_ranges(region)
local cur = 1
for i, range in ipairs(region) do
if range[3] ~= range[6] then
if cur < i then
region[cur] = range
end
cur = cur + 1
end
end
for i = #region, cur, -1 do
region[i] = nil
end
end
---Gets the set of included regions managed by this LanguageTree. This can be different from the
@ -664,12 +854,24 @@ function LanguageTree:included_regions()
return { {} }
end
local regions = {} ---@type Range6[][]
local regions = {} ---@type table<integer, Range6[]>
local regions_inv = {} ---@type table<integer, (Range6[]|integer)[]>
for i, _ in pairs(self._trees) do
regions[i] = self._trees[i]:included_ranges(true)
local region = self._trees[i]:included_ranges(true)
prune_empty_ranges(region)
if #region > 0 then
regions[i] = region
regions_inv_insert(regions_inv, i, region)
else
self._trees[i] = nil
if type(self._valid) == 'table' then
self._valid[i] = nil
end
end
end
self._regions = regions
self._regions_inv = regions_inv
return regions
end
@ -829,36 +1031,60 @@ end
---
--- This is where most of the injection processing occurs.
---
--- TODO: Allow for an offset predicate to tailor the injection range
--- instead of using the entire nodes range.
--- @param ranges boolean|(Range)[]|nil
--- @private
--- @return table<string, Range6[][]>
function LanguageTree:_get_injections()
if not self._injection_query then
function LanguageTree:_get_injections(ranges)
if not self._injection_query or not ranges then
return {}
end
---@type table<integer,vim.treesitter.languagetree.Injection>
local injections = {}
-- Combined injection must be run on the full source, and currently there is no simply way to
-- selectively match each pattern separately.
if ranges == true or self._injection_query.has_combined_injection then
ranges = { true } ---@diagnostic disable-line: assign-type-mismatch
else
for i, range in ipairs(ranges) do
local sline, _, eline, _ = Range.unpack4(range)
ranges[i] = { sline, eline }
end
end
---@cast ranges (true|Range2)[]
for index, tree in pairs(self._trees) do
local root_node = tree:root()
local start_line, _, end_line, _ = root_node:range()
local start_line, _, end_line, end_col = root_node:range()
if end_col > 0 then
end_line = end_line + 1
end
for pattern, match, metadata in
self._injection_query:iter_matches(
root_node,
self._source,
start_line,
end_line + 1,
{ all = true }
)
do
local lang, combined, ranges = self:_get_injection(match, metadata)
if lang then
add_injection(injections, index, pattern, lang, combined, ranges)
else
self:_log('match from injection query failed for pattern', pattern)
for _, range in ipairs(ranges) do
local start_line_in_range, end_line_in_range = start_line, end_line
if range ~= true then
start_line_in_range = math.max(start_line, range[1])
end_line_in_range = math.min(end_line, range[2])
end
-- Duplicates from overlapping ranges are handled by `set_included_ranges`.
if start_line_in_range < end_line_in_range then
for pattern, match, metadata in
self._injection_query:iter_matches(
root_node,
self._source,
start_line_in_range,
end_line_in_range,
{ all = true }
)
do
local lang, combined, inj_ranges = self:_get_injection(match, metadata)
if lang then
add_injection(injections, index, pattern, lang, combined, inj_ranges)
else
self:_log('match from injection query failed for pattern', pattern)
end
end
end
end
end
@ -878,8 +1104,8 @@ function LanguageTree:_get_injections()
if entry.combined then
table.insert(result[lang], combine_regions(entry.regions))
else
for _, ranges in pairs(entry.regions) do
table.insert(result[lang], ranges)
for _, inj_ranges in pairs(entry.regions) do
table.insert(result[lang], inj_ranges)
end
end
end
@ -927,6 +1153,7 @@ function LanguageTree:_edit(
end
self._regions = nil
self._regions_inv = nil
local changed_range = {
start_row,
@ -1090,14 +1317,7 @@ end
---@param range Range
---@return boolean
local function tree_contains(tree, range)
local tree_ranges = tree:included_ranges(false)
return Range.contains({
tree_ranges[1][1],
tree_ranges[1][2],
tree_ranges[#tree_ranges][3],
tree_ranges[#tree_ranges][4],
}, range)
return Range.contains(region_range(tree:included_ranges(false)), range)
end
--- Determines whether {range} is contained in the |LanguageTree|.

View File

@ -11,6 +11,7 @@ local M = {}
---@field lang string name of the language for this parser
---@field captures string[] list of (unique) capture names defined in query
---@field info vim.treesitter.QueryInfo contains information used in the query (e.g. captures, predicates, directives)
---@field has_combined_injection true? whether this query has a combined injection pattern
---@field query TSQuery userdata query object
local Query = {}
Query.__index = Query
@ -30,6 +31,18 @@ function Query.new(lang, ts_query)
patterns = query_info.patterns,
}
self.captures = self.info.captures
for _, preds in pairs(self.info.patterns) do
if
vim.tbl_contains(preds, function(pred)
return vim.deep_equal(pred, { 'set!', 'injection.combined' })
end, { predicate = true })
then
self.has_combined_injection = true
break
end
end
return self
end

View File

@ -253,20 +253,25 @@ end]]
local root = parser:parse()[1]:root()
parser:set_included_regions({{root:child(0)}})
parser:invalidate()
return { parser:parse(true)[1]:root():range() }
local _, tree = next(parser:parse(true))
return { tree:root():range() }
]]
eq({ 0, 0, 18, 1 }, res2)
eq({ { { 0, 0, 0, 18, 1, 512 } } }, exec_lua [[ return parser:included_regions() ]])
eq(
{ { { 0, 0, 0, 18, 1, 512 } } },
exec_lua [[return vim.tbl_values(parser:included_regions())]]
)
local range_tbl = exec_lua [[
eq(
{ { { 0, 0, 0, 17, 1, 508 } } },
exec_lua [[
parser:set_included_regions { { { 0, 0, 17, 1 } } }
parser:parse()
return parser:included_regions()
return vim.tbl_values(parser:included_regions())
]]
eq({ { { 0, 0, 0, 17, 1, 508 } } }, range_tbl)
)
end)
it('allows to set complex ranges', function()
@ -283,7 +288,8 @@ end]]
parser:set_included_regions({nodes})
local root = parser:parse(true)[1]:root()
local _, tree = next(parser:parse(true))
local root = tree:root()
local res = {}
for i=0,(root:named_child_count() - 1) do
@ -813,15 +819,33 @@ int x = INT_MAX;
1,
exec_lua [[
parser:parse({0, 2})
return #parser:children().lua:trees()
return vim.tbl_count(parser:children().lua:trees())
]]
)
-- Regions outside the given range are discarded.
eq(
1,
exec_lua [[
parser:parse({2, 6})
return vim.tbl_count(parser:children().lua:trees())
]]
)
eq(
2,
exec_lua [[
parser:parse({2, 6})
return #parser:children().lua:trees()
parser:invalidate()
parser:parse({{0, 2}, {2,6}})
return vim.tbl_count(parser:children().lua:trees())
]]
)
eq(
2,
exec_lua [[
parser:parse({{0, 5}, {2, 6}})
return vim.tbl_count(parser:children().lua:trees())
]]
)
@ -829,11 +853,106 @@ int x = INT_MAX;
7,
exec_lua [[
parser:parse(true)
return #parser:children().lua:trees()
return vim.tbl_count(parser:children().lua:trees())
]]
)
end)
it('reuses similar existing regions', function()
insert(dedent [[
* line1
line2]])
exec_lua [[
parser = vim.treesitter.get_parser(0, "markdown", {
injections = {
markdown = '((inline) @injection.content (#set! injection.language "markdown_inline"))'
}
})
]]
local function get_regions()
return exec_lua [[
parser:parse(true)
local result = {}
for i, tree in pairs(parser:children().markdown_inline:trees()) do
result[i] = tree:included_ranges()
end
return result
]]
end
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
feed('2ggyyp')
-- region index does not change
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 2, 0 }, { 2, 2, 2, 7 } },
}, get_regions())
feed('2ggdd')
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
feed('ggyGP')
-- the old region moves while maintaining its index
eq({
[1] = { { 2, 2, 3, 0 }, { 3, 2, 3, 7 } },
[2] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
end)
it("recursively discards children's regions contained in a parent's discarded region", function()
insert(dedent [[
`return`
```
line 4
```
line 6 `return`
```]])
exec_lua [[
parser = vim.treesitter.get_parser(0, "markdown", {
injections = {
-- inject code span to lua
markdown_inline = '((code_span) @injection.content (#offset! @injection.content 0 1 0 -1) (#set! injection.language "lua"))'
}
})
]]
local function get_regions()
return exec_lua [[
parser:parse(true)
local result = {}
for i, tree in pairs(parser:children().markdown_inline:children().lua:trees()) do
result[i] = tree:included_ranges()
end
return result
]]
end
-- Initially, "line 4" is in the fenced code block, and "line 6 `return`" is a normal paragraph
-- with a inline code span.
eq({
[1] = { { 0, 1, 0, 7 } },
[2] = { { 5, 8, 5, 14 } },
}, get_regions())
-- Extend the code block to "line 6 `return`". Note that the only effect to markdown_inline
-- parser is removing a region, so it does not parse anything in markdown_inline parser.
feed('5ggD')
-- Despite not parsing at the parent (markdown_inline) parser, the regions in children (lua)
-- parser that are included in the parent's removed region should be removed as well.
-- The "`return`" at the first line is just for preventing the lua parser from being removed.
eq({
[1] = { { 0, 1, 0, 7 } },
}, get_regions())
end)
describe('languagetree is_valid()', function()
before_each(function()
insert(dedent [[
@ -844,10 +963,8 @@ int x = INT_MAX;
]])
feed(':set ft=help<cr>')
exec_lua [[
vim.treesitter.get_parser(0, "vimdoc", {
parser = vim.treesitter.get_parser(0, "vimdoc", {
injections = {
vimdoc = "((codeblock (language) @injection.language (code) @injection.content) (#set! injection.include-children))"
}
@ -855,21 +972,22 @@ int x = INT_MAX;
]]
end)
it('is valid excluding, invalid including children initially', function()
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
local function get_regions()
return exec_lua [[
if not parser:children().lua then
return nil
end
local result = {}
for i, tree in pairs(parser:children().lua:trees()) do
result[i] = tree:included_ranges()
end
return result
]]
end
it('is fully valid after a full parse', function()
exec_lua('vim.treesitter.get_parser():parse(true)')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is fully valid after a parsing a range on parsed tree', function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
it('is valid including children since it does not have one', function()
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
describe('when adding content with injections', function()
@ -884,36 +1002,36 @@ int x = INT_MAX;
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('vim.treesitter.get_parser():parse()')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it(
'is fully valid after a range parse that leads to parsing not parsed injections',
function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end
)
it(
'is valid excluding, invalid including children after a range parse that does not lead to parsing not parsed injections',
function()
exec_lua('vim.treesitter.get_parser():parse({2, 4})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse({2, 4})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end
)
end)
describe('when removing content with injections', function()
describe('when removing an injection region', function()
before_each(function()
feed('G')
insert(dedent [[
@ -922,41 +1040,80 @@ int x = INT_MAX;
<
>lua
local a = {}
local b = {}
<
]])
exec_lua('vim.treesitter.get_parser():parse(true)')
exec_lua('parser:parse(true)')
eq({ [1] = { { 6, 0, 7, 0 } }, [2] = { { 10, 0, 11, 0 } } }, get_regions())
feed('Gd3k')
-- the empty region is pruned
eq({ [1] = { { 6, 0, 7, 0 } } }, get_regions())
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('vim.treesitter.get_parser():parse()')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is fully valid after a range parse that leads to parsing modified child tree', function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it(
'is valid excluding, invalid including children after a range parse that does not lead to parsing modified child tree',
'is fully valid after a rangeless parse, since the only change to the children was removing a region',
function()
exec_lua('vim.treesitter.get_parser():parse({2, 4})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end
)
it('is fully valid after a range parse that includes injection region', function()
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
it(
'is valid excluding, invalid including children after a range parse that does not include injection region',
function()
exec_lua('parser:parse({2, 4})')
eq(vim.NIL, get_regions())
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end
)
end)
describe('when editing an injection region', function()
before_each(function()
feed('G')
insert(dedent [[
>lua
local a = 1
<
]])
exec_lua('parser:parse(true)')
feed('G2kA<BS>2<ESC>') -- 1 → 2
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is fully valid after a range parse that includes modified region', function()
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
end)
end)
end)