mirror of https://github.com/neovim/neovim.git
perf(treesitter): run injection query only for visible lines
Problem: Executing injection query on the full source is slow. Solution: Execute injection query only on the given range. Notes * This is not applicable to languages with combined injection. * `is_valid(false)` should run full injection to determine if the current set of children parsers and their regions are complete. Since this can be slow, `parse()` no longer checks this at the beginning. * Children parsers and regions outside the given range are discarded.
This commit is contained in:
parent
0365f5a82c
commit
e24ca40217
|
@ -1338,7 +1338,8 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
|
|||
LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()*
|
||||
Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()|
|
||||
reflects the latest state of the source. If invalid, user should call
|
||||
|LanguageTree:parse()|.
|
||||
|LanguageTree:parse()|. `is_valid(false)` can be slow because it runs
|
||||
injection on the full source.
|
||||
|
||||
Parameters: ~
|
||||
• {exclude_children} (`boolean?`) whether to ignore the validity of
|
||||
|
|
|
@ -72,8 +72,12 @@ local TSCallbackNames = {
|
|||
---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
|
||||
---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
|
||||
---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages
|
||||
---@field private _injection_query vim.treesitter.Query Queries defining injected languages
|
||||
---@field private _injection_query vim.treesitter.Query? Queries defining injected languages
|
||||
---
|
||||
---If `is_valid(true) and _injections_processed`, the set of children parsers and their sets of
|
||||
---regions are complete wrt. the full source, so that it's not necessary to execute injections.
|
||||
---@field private _injections_processed boolean
|
||||
---
|
||||
---@field private _opts table Options
|
||||
---@field private _parser TSParser Parser for language
|
||||
---@field private _has_regions boolean
|
||||
|
@ -271,6 +275,7 @@ end
|
|||
|
||||
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
|
||||
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
|
||||
--- `is_valid(false)` can be slow because it runs injection on the full source.
|
||||
---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
|
||||
---@return boolean
|
||||
function LanguageTree:is_valid(exclude_children)
|
||||
|
@ -285,8 +290,11 @@ function LanguageTree:is_valid(exclude_children)
|
|||
end
|
||||
|
||||
if not exclude_children then
|
||||
-- Run full injection to check if the current set of children and their regions are complete.
|
||||
-- Note that `set_included_regions` marks new regions invalid.
|
||||
if not self._injections_processed then
|
||||
return false
|
||||
self:_add_injections(true)
|
||||
self._injections_processed = true
|
||||
end
|
||||
|
||||
for _, child in pairs(self._children) do
|
||||
|
@ -384,11 +392,12 @@ function LanguageTree:_parse_regions(range)
|
|||
end
|
||||
|
||||
--- @private
|
||||
--- @param range boolean|Range|nil
|
||||
--- @return number
|
||||
function LanguageTree:_add_injections()
|
||||
function LanguageTree:_add_injections(range)
|
||||
local seen_langs = {} ---@type table<string,boolean>
|
||||
|
||||
local query_time, injections_by_lang = tcall(self._get_injections, self)
|
||||
local query_time, injections_by_lang = tcall(self._get_injections, self, range)
|
||||
for lang, injection_regions in pairs(injections_by_lang) do
|
||||
local has_lang = pcall(language.add, lang)
|
||||
|
||||
|
@ -437,11 +446,6 @@ end
|
|||
--- only the root tree without injections).
|
||||
--- @return table<integer, TSTree>
|
||||
function LanguageTree:parse(range)
|
||||
if self:is_valid() then
|
||||
self:_log('valid')
|
||||
return self._trees
|
||||
end
|
||||
|
||||
local changes --- @type Range6[]?
|
||||
|
||||
-- Collect some stats
|
||||
|
@ -458,9 +462,19 @@ function LanguageTree:parse(range)
|
|||
end
|
||||
end
|
||||
|
||||
if not self._injections_processed and range ~= false and range ~= nil then
|
||||
query_time = self:_add_injections()
|
||||
self._injections_processed = true
|
||||
-- NOTE: Trade-off in partial injection query execution
|
||||
-- * The good: Each `parse()` is faster.
|
||||
-- * The bad: `is_valid(false)` is more expensive, requiring a full injection query execution. To
|
||||
-- avoid this cost, each `parse()` always runs partial injection. However, this is not a big
|
||||
-- problem as partial injection is very cheap even on huge files.
|
||||
-- * A potential optimization: Track the ranges where the set of injected regions are known to be
|
||||
-- complete and valid, and run the injection query only on the intersection of requested ranges
|
||||
-- and the invalid ranges. This would be even more beneficial for combined injection.
|
||||
if self._injection_query and not self._injections_processed and range then
|
||||
query_time = self:_add_injections(range)
|
||||
if range == true or self._injection_query.has_combined_injection then
|
||||
self._injections_processed = true
|
||||
end
|
||||
end
|
||||
|
||||
self:_log({
|
||||
|
@ -995,36 +1009,54 @@ end
|
|||
---
|
||||
--- This is where most of the injection processing occurs.
|
||||
---
|
||||
--- TODO: Allow for an offset predicate to tailor the injection range
|
||||
--- instead of using the entire nodes range.
|
||||
--- @param range boolean|Range|nil
|
||||
--- @private
|
||||
--- @return table<string, Range6[][]>
|
||||
function LanguageTree:_get_injections()
|
||||
if not self._injection_query then
|
||||
function LanguageTree:_get_injections(range)
|
||||
if not self._injection_query or not range then
|
||||
return {}
|
||||
end
|
||||
|
||||
---@type table<integer,vim.treesitter.languagetree.Injection>
|
||||
local injections = {}
|
||||
|
||||
local range_start_line, range_end_line ---@type integer, integer
|
||||
if range ~= true then
|
||||
local sline, _, eline, _ = Range.unpack4(range)
|
||||
range_start_line, range_end_line = sline, eline
|
||||
end
|
||||
|
||||
for index, tree in pairs(self._trees) do
|
||||
local root_node = tree:root()
|
||||
local start_line, _, end_line, _ = root_node:range()
|
||||
local start_line, _, end_line, end_col = root_node:range()
|
||||
if end_col > 0 then
|
||||
end_line = end_line + 1
|
||||
end
|
||||
|
||||
for pattern, match, metadata in
|
||||
self._injection_query:iter_matches(
|
||||
root_node,
|
||||
self._source,
|
||||
start_line,
|
||||
end_line + 1,
|
||||
{ all = true }
|
||||
)
|
||||
do
|
||||
local lang, combined, ranges = self:_get_injection(match, metadata)
|
||||
if lang then
|
||||
add_injection(injections, index, pattern, lang, combined, ranges)
|
||||
else
|
||||
self:_log('match from injection query failed for pattern', pattern)
|
||||
-- If the query doesn't have combined injection, run the query on the given range. Combined
|
||||
-- injection must be run on the full range. Currently there is no simply way to selectively
|
||||
-- match each pattern separately.
|
||||
if range ~= true and not self._injection_query.has_combined_injection then
|
||||
start_line = math.max(start_line, range_start_line)
|
||||
end_line = math.min(end_line, range_end_line)
|
||||
end
|
||||
|
||||
if start_line < end_line then
|
||||
for pattern, match, metadata in
|
||||
self._injection_query:iter_matches(
|
||||
root_node,
|
||||
self._source,
|
||||
start_line,
|
||||
end_line,
|
||||
{ all = true }
|
||||
)
|
||||
do
|
||||
local lang, combined, ranges = self:_get_injection(match, metadata)
|
||||
if lang then
|
||||
add_injection(injections, index, pattern, lang, combined, ranges)
|
||||
else
|
||||
self:_log('match from injection query failed for pattern', pattern)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -11,6 +11,7 @@ local M = {}
|
|||
---@field lang string name of the language for this parser
|
||||
---@field captures string[] list of (unique) capture names defined in query
|
||||
---@field info vim.treesitter.QueryInfo contains information used in the query (e.g. captures, predicates, directives)
|
||||
---@field has_combined_injection true? whether this query has a combined injection pattern
|
||||
---@field query TSQuery userdata query object
|
||||
local Query = {}
|
||||
Query.__index = Query
|
||||
|
@ -30,6 +31,18 @@ function Query.new(lang, ts_query)
|
|||
patterns = query_info.patterns,
|
||||
}
|
||||
self.captures = self.info.captures
|
||||
|
||||
for _, preds in pairs(self.info.patterns) do
|
||||
if
|
||||
vim.tbl_contains(preds, function(pred)
|
||||
return vim.deep_equal(pred, { 'set!', 'injection.combined' })
|
||||
end, { predicate = true })
|
||||
then
|
||||
self.has_combined_injection = true
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
|
|
|
@ -853,8 +853,9 @@ print()
|
|||
]]
|
||||
)
|
||||
|
||||
-- Regions outside the given range are discarded.
|
||||
eq(
|
||||
2,
|
||||
1,
|
||||
exec_lua [[
|
||||
parser:parse({2, 6})
|
||||
return vim.tbl_count(parser:children().lua:trees())
|
||||
|
@ -997,19 +998,7 @@ print()
|
|||
]]
|
||||
end
|
||||
|
||||
it('is valid excluding, invalid including children initially', function()
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(false, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
|
||||
it('is fully valid after a full parse', function()
|
||||
exec_lua('parser:parse(true)')
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(true, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
|
||||
it('is fully valid after a parsing a range on parsed tree', function()
|
||||
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
|
||||
it('is valid including children since it does not have one', function()
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(true, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
|
@ -1082,17 +1071,30 @@ print()
|
|||
eq(false, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
|
||||
it('is valid excluding, invalid including children after a rangeless parse', function()
|
||||
exec_lua('parser:parse()')
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(false, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
it(
|
||||
'is fully valid after a rangeless parse, since the only change to the children was removing a region',
|
||||
function()
|
||||
exec_lua('parser:parse()')
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(true, exec_lua('return parser:is_valid()'))
|
||||
end
|
||||
)
|
||||
|
||||
it('is fully valid after a range parse that includes injection region', function()
|
||||
exec_lua('parser:parse({5, 7})')
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(true, exec_lua('return parser:is_valid()'))
|
||||
end)
|
||||
|
||||
it(
|
||||
'is valid excluding, invalid including children after a range parse that does not include injection region',
|
||||
function()
|
||||
exec_lua('parser:parse({2, 4})')
|
||||
eq(vim.NIL, get_regions())
|
||||
eq(true, exec_lua('return parser:is_valid(true)'))
|
||||
eq(false, exec_lua('return parser:is_valid()'))
|
||||
end
|
||||
)
|
||||
end)
|
||||
|
||||
describe('when editing an injection region', function()
|
||||
|
|
Loading…
Reference in New Issue