Skip to content

Commit

Permalink
Support LSP semantic tokens
Browse files Browse the repository at this point in the history
This patch implements `textDocument/semanticTokens/{full,range}`. If the
client supports semantic tokens, $ccls/publishSemanticHighlight (now
deprecated) is disabled.

These token modifiers are mostly useful to emphasize certain symbols:
`static, classScope, globalScope, namespaceScope`.

To enable a colorful syntax highlighting scheme, set the
highlight.rainbow initialization option to 10.
https://maskray.me/blog/2024-10-20-ccls-and-lsp-semantic-tokens

Note that the older $ccls/publishSemanticHighlight protocol with
highlight.lsRanges==true (used by vscode-ccls) is no longer supported.
  • Loading branch information
MaskRay committed Nov 6, 2024
1 parent 50fd8d0 commit cc13ced
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 43 deletions.
9 changes: 6 additions & 3 deletions src/config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ struct Config {
bool hierarchicalDocumentSymbolSupport = true;
// TextDocumentClientCapabilities.definition.linkSupport
bool linkSupport = true;
// ClientCapabilities.workspace.semanticTokens.refreshSupport
bool semanticTokensRefresh = true;

// If false, disable snippets and complete just the identifier part.
// TextDocumentClientCapabilities.completion.completionItem.snippetSupport
Expand Down Expand Up @@ -226,8 +228,9 @@ struct Config {
// Disable semantic highlighting for files larger than the size.
int64_t largeFileSize = 2 * 1024 * 1024;

// true: LSP line/character; false: position
bool lsRanges = false;
// If non-zero, enable rainbow semantic tokens by assinging an extra modifier
// indicating the rainbow ID to each symbol.
int rainbow = 0;

// Like index.{whitelist,blacklist}, don't publish semantic highlighting to
// blacklisted files.
Expand Down Expand Up @@ -342,7 +345,7 @@ REFLECT_STRUCT(Config::Completion, caseSensitivity, detailedLabel,
maxNum, placeholder);
REFLECT_STRUCT(Config::Diagnostics, blacklist, onChange, onOpen, onSave,
spellChecking, whitelist)
REFLECT_STRUCT(Config::Highlight, largeFileSize, lsRanges, blacklist, whitelist)
REFLECT_STRUCT(Config::Highlight, largeFileSize, rainbow, blacklist, whitelist)
REFLECT_STRUCT(Config::Index::Name, suppressUnwrittenScope);
REFLECT_STRUCT(Config::Index, blacklist, comments, initialNoLinkage,
initialBlacklist, initialWhitelist, maxInitializerLines,
Expand Down
36 changes: 36 additions & 0 deletions src/enum.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef TOKEN_MODIFIER
#define TOKEN_MODIFIER(name, str)
#endif
// vscode
TOKEN_MODIFIER(Declaration, "declaration")
TOKEN_MODIFIER(Definition, "definition")
TOKEN_MODIFIER(Static, "static")

// ccls extensions
TOKEN_MODIFIER(Read, "read")
TOKEN_MODIFIER(Write, "write")
TOKEN_MODIFIER(ClassScope, "classScope")
TOKEN_MODIFIER(FunctionScope, "functionScope")
TOKEN_MODIFIER(NamespaceScope, "namespaceScope")

// Rainbow semantic tokens
TOKEN_MODIFIER(Id0, "id0")
TOKEN_MODIFIER(Id1, "id1")
TOKEN_MODIFIER(Id2, "id2")
TOKEN_MODIFIER(Id3, "id3")
TOKEN_MODIFIER(Id4, "id4")
TOKEN_MODIFIER(Id5, "id5")
TOKEN_MODIFIER(Id6, "id6")
TOKEN_MODIFIER(Id7, "id7")
TOKEN_MODIFIER(Id8, "id8")
TOKEN_MODIFIER(Id9, "id9")
TOKEN_MODIFIER(Id10, "id10")
TOKEN_MODIFIER(Id11, "id11")
TOKEN_MODIFIER(Id12, "id12")
TOKEN_MODIFIER(Id13, "id13")
TOKEN_MODIFIER(Id14, "id14")
TOKEN_MODIFIER(Id15, "id15")
TOKEN_MODIFIER(Id16, "id16")
TOKEN_MODIFIER(Id17, "id17")
TOKEN_MODIFIER(Id18, "id18")
TOKEN_MODIFIER(Id19, "id19")
6 changes: 6 additions & 0 deletions src/indexer.hh
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ void reflect(BinaryWriter &visitor, SymbolRef &value);
void reflect(BinaryWriter &visitor, Use &value);
void reflect(BinaryWriter &visitor, DeclRef &value);

enum class TokenModifier {
#define TOKEN_MODIFIER(name, str) name,
#include "enum.inc"
#undef TOKEN_MODIFIER
};

template <typename T> using VectorAdapter = std::vector<T, std::allocator<T>>;

template <typename D> struct NameMixin {
Expand Down
3 changes: 3 additions & 0 deletions src/lsp.hh
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ enum class SymbolKind : uint8_t {
// For C++, this is interpreted as "template parameter" (including
// non-type template parameters).
TypeParameter = 26,
FirstNonStandard,

// ccls extensions
// See also https://github.com/Microsoft/language-server-protocol/issues/344
Expand All @@ -174,6 +175,8 @@ enum class SymbolKind : uint8_t {
Parameter = 253,
StaticMethod = 254,
Macro = 255,
FirstExtension = TypeAlias,
LastExtension = Macro,
};

struct SymbolInformation {
Expand Down
162 changes: 135 additions & 27 deletions src/message_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,25 @@
#include <rapidjson/document.h>
#include <rapidjson/reader.h>

#include <llvm/ADT/STLExtras.h>

#include <algorithm>
#include <stdexcept>

using namespace clang;

#if LLVM_VERSION_MAJOR < 15 // llvmorg-15-init-6118-gb39f43775796
namespace llvm {
template <typename T, typename E>
constexpr bool is_contained(std::initializer_list<T> set, const E &e) {
for (const T &v : set)
if (v == e)
return true;
return false;
}
}
#endif

MAKE_HASHABLE(ccls::SymbolIdx, t.usr, t.kind);

namespace ccls {
Expand Down Expand Up @@ -51,23 +65,26 @@ REFLECT_STRUCT(DidChangeWorkspaceFoldersParam, event);
REFLECT_STRUCT(WorkspaceSymbolParam, query, folders);

namespace {
struct Occur {
lsRange range;
Role role;
};
struct CclsSemanticHighlightSymbol {
int id = 0;
SymbolKind parentKind;
SymbolKind kind;
uint8_t storage;
std::vector<std::pair<int, int>> ranges;

// `lsRanges` is used to compute `ranges`.
std::vector<lsRange> lsRanges;
// `lsOccur` is used to compute `ranges`.
std::vector<Occur> lsOccurs;
};

struct CclsSemanticHighlight {
DocumentUri uri;
std::vector<CclsSemanticHighlightSymbol> symbols;
};
REFLECT_STRUCT(CclsSemanticHighlightSymbol, id, parentKind, kind, storage,
ranges, lsRanges);
REFLECT_STRUCT(CclsSemanticHighlightSymbol, id, parentKind, kind, storage, ranges);
REFLECT_STRUCT(CclsSemanticHighlight, uri, symbols);

struct CclsSetSkippedRanges {
Expand All @@ -76,10 +93,16 @@ struct CclsSetSkippedRanges {
};
REFLECT_STRUCT(CclsSetSkippedRanges, uri, skippedRanges);

struct SemanticTokensPartialResult {
std::vector<int> data;
};
REFLECT_STRUCT(SemanticTokensPartialResult, data);

struct ScanLineEvent {
Position pos;
Position end_pos; // Second key when there is a tie for insertion events.
int id;
Role role;
CclsSemanticHighlightSymbol *symbol;
bool operator<(const ScanLineEvent &o) const {
// See the comments below when insertion/deletion events are inserted.
Expand Down Expand Up @@ -190,6 +213,8 @@ MessageHandler::MessageHandler() {
bind("textDocument/rangeFormatting", &MessageHandler::textDocument_rangeFormatting);
bind("textDocument/references", &MessageHandler::textDocument_references);
bind("textDocument/rename", &MessageHandler::textDocument_rename);
bind("textDocument/semanticTokens/full", &MessageHandler::textDocument_semanticTokensFull);
bind("textDocument/semanticTokens/range", &MessageHandler::textDocument_semanticTokensRange);
bind("textDocument/signatureHelp", &MessageHandler::textDocument_signatureHelp);
bind("textDocument/typeDefinition", &MessageHandler::textDocument_typeDefinition);
bind("workspace/didChangeConfiguration", &MessageHandler::workspace_didChangeConfiguration);
Expand Down Expand Up @@ -281,16 +306,16 @@ void emitSkippedRanges(WorkingFile *wfile, QueryFile &file) {
pipeline::notify("$ccls/publishSkippedRanges", params);
}

void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
static std::unordered_map<SymbolIdx, CclsSemanticHighlightSymbol> computeSemanticTokens(DB *db, WorkingFile *wfile,
QueryFile &file) {
static GroupMatch match(g_config->highlight.whitelist,
g_config->highlight.blacklist);
assert(file.def);
if (wfile->buffer_content.size() > g_config->highlight.largeFileSize ||
!match.matches(file.def->path))
return;

// Group symbols together.
std::unordered_map<SymbolIdx, CclsSemanticHighlightSymbol> grouped_symbols;
if (!match.matches(file.def->path))
return grouped_symbols;

for (auto [sym, refcnt] : file.symbol2refcnt) {
if (refcnt <= 0)
continue;
Expand Down Expand Up @@ -369,14 +394,14 @@ void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
if (std::optional<lsRange> loc = getLsRange(wfile, sym.range)) {
auto it = grouped_symbols.find(sym);
if (it != grouped_symbols.end()) {
it->second.lsRanges.push_back(*loc);
it->second.lsOccurs.push_back({*loc, sym.role});
} else {
CclsSemanticHighlightSymbol symbol;
symbol.id = idx;
symbol.parentKind = parent_kind;
symbol.kind = kind;
symbol.storage = storage;
symbol.lsRanges.push_back(*loc);
symbol.lsOccurs.push_back({*loc, sym.role});
grouped_symbols[sym] = symbol;
}
}
Expand All @@ -387,17 +412,17 @@ void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
int id = 0;
for (auto &entry : grouped_symbols) {
CclsSemanticHighlightSymbol &symbol = entry.second;
for (auto &loc : symbol.lsRanges) {
for (auto &occur : symbol.lsOccurs) {
// For ranges sharing the same start point, the one with leftmost end
// point comes first.
events.push_back({loc.start, loc.end, id, &symbol});
events.push_back({occur.range.start, occur.range.end, id, occur.role, &symbol});
// For ranges sharing the same end point, their relative order does not
// matter, therefore we arbitrarily assign loc.end to them. We use
// matter, therefore we arbitrarily assign occur.range.end to them. We use
// negative id to indicate a deletion event.
events.push_back({loc.end, loc.end, ~id, &symbol});
events.push_back({occur.range.end, occur.range.end, ~id, occur.role, &symbol});
id++;
}
symbol.lsRanges.clear();
symbol.lsOccurs.clear();
}
std::sort(events.begin(), events.end());

Expand All @@ -413,26 +438,33 @@ void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
// Attribute range [events[i-1].pos, events[i].pos) to events[top-1].symbol
// .
if (top && !(events[i - 1].pos == events[i].pos))
events[top - 1].symbol->lsRanges.push_back(
{events[i - 1].pos, events[i].pos});
events[top - 1].symbol->lsOccurs.push_back({{events[i - 1].pos, events[i].pos}, events[i].role});
if (events[i].id >= 0)
events[top++] = events[i];
else
deleted[~events[i].id] = 1;
}
return grouped_symbols;
}

void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
// Disable $ccls/publishSemanticHighlight if semantic tokens support is
// enabled or the file is too large.
if (g_config->client.semanticTokensRefresh || wfile->buffer_content.size() > g_config->highlight.largeFileSize)
return;
auto grouped_symbols = computeSemanticTokens(db, wfile, file);

CclsSemanticHighlight params;
params.uri = DocumentUri::fromPath(wfile->filename);
// Transform lsRange into pair<int, int> (offset pairs)
if (!g_config->highlight.lsRanges) {
std::vector<std::pair<lsRange, CclsSemanticHighlightSymbol *>> scratch;
{
std::vector<std::pair<Occur, CclsSemanticHighlightSymbol *>> scratch;
for (auto &entry : grouped_symbols) {
for (auto &range : entry.second.lsRanges)
scratch.emplace_back(range, &entry.second);
entry.second.lsRanges.clear();
for (auto &occur : entry.second.lsOccurs)
scratch.push_back({occur, &entry.second});
entry.second.lsOccurs.clear();
}
std::sort(scratch.begin(), scratch.end(),
[](auto &l, auto &r) { return l.first.start < r.first.start; });
std::sort(scratch.begin(), scratch.end(), [](auto &l, auto &r) { return l.first.range < r.first.range; });
const auto &buf = wfile->buffer_content;
int l = 0, c = 0, i = 0, p = 0;
auto mov = [&](int line, int col) {
Expand All @@ -455,7 +487,7 @@ void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
return c < col;
};
for (auto &entry : scratch) {
lsRange &r = entry.first;
lsRange &r = entry.first.range;
if (mov(r.start.line, r.start.character))
continue;
int beg = p;
Expand All @@ -466,8 +498,84 @@ void emitSemanticHighlight(DB *db, WorkingFile *wfile, QueryFile &file) {
}

for (auto &entry : grouped_symbols)
if (entry.second.ranges.size() || entry.second.lsRanges.size())
if (entry.second.ranges.size() || entry.second.lsOccurs.size())
params.symbols.push_back(std::move(entry.second));
pipeline::notify("$ccls/publishSemanticHighlight", params);
}

void MessageHandler::textDocument_semanticTokensFull(TextDocumentParam &param, ReplyOnce &reply) {
SemanticTokensRangeParams parameters{param.textDocument, lsRange{{0, 0}, {UINT16_MAX, INT16_MAX}}};
textDocument_semanticTokensRange(parameters, reply);
}

void MessageHandler::textDocument_semanticTokensRange(SemanticTokensRangeParams &param, ReplyOnce &reply) {
int file_id;
auto [file, wf] = findOrFail(param.textDocument.uri.getPath(), reply, &file_id);
if (!wf)
return;

auto grouped_symbols = computeSemanticTokens(db, wf, *file);
std::vector<std::pair<Occur, CclsSemanticHighlightSymbol *>> scratch;
for (auto &entry : grouped_symbols) {
for (auto &occur : entry.second.lsOccurs)
scratch.emplace_back(occur, &entry.second);
entry.second.lsOccurs.clear();
}
std::sort(scratch.begin(), scratch.end(), [](auto &l, auto &r) { return l.first.range < r.first.range; });

SemanticTokensPartialResult result;
int line = 0, column = 0;
for (auto &entry : scratch) {
lsRange &r = entry.first.range;
CclsSemanticHighlightSymbol &symbol = *entry.second;
if (r.start.line != line)
column = 0;
result.data.push_back(r.start.line - line);
line = r.start.line;
result.data.push_back(r.start.character - column);
column = r.start.character;
result.data.push_back(r.end.character - r.start.character);

int tokenType = (int)symbol.kind, modifier = 0;
if (tokenType == (int)SymbolKind::StaticMethod) {
tokenType = (int)SymbolKind::Method;
modifier |= 1 << (int)TokenModifier::Static;
} else if (tokenType >= (int)SymbolKind::FirstExtension) {
tokenType += (int)SymbolKind::FirstNonStandard - (int)SymbolKind::FirstExtension;
}

// Set modifiers.
if (entry.first.role & Role::Declaration)
modifier |= 1 << (int)TokenModifier::Declaration;
if (entry.first.role & Role::Definition)
modifier |= 1 << (int)TokenModifier::Definition;
if (entry.first.role & Role::Read)
modifier |= 1 << (int)TokenModifier::Read;
if (entry.first.role & Role::Write)
modifier |= 1 << (int)TokenModifier::Write;
if (symbol.storage == SC_Static)
modifier |= 1 << (int)TokenModifier::Static;

if (llvm::is_contained({SymbolKind::Constructor, SymbolKind::Field, SymbolKind::Method, SymbolKind::StaticMethod},
symbol.kind))
modifier |= 1 << (int)TokenModifier::ClassScope;
else if (llvm::is_contained({SymbolKind::File, SymbolKind::Namespace}, symbol.parentKind))
modifier |= 1 << (int)TokenModifier::NamespaceScope;
else if (llvm::is_contained(
{SymbolKind::Constructor, SymbolKind::Function, SymbolKind::Method, SymbolKind::StaticMethod},
symbol.parentKind))
modifier |= 1 << (int)TokenModifier::FunctionScope;

// Rainbow semantic tokens
static_assert((int)TokenModifier::Id0 + 20 < 31);
if (int rainbow = g_config->highlight.rainbow)
modifier |= 1 << ((int)TokenModifier::Id0 + symbol.id % std::min(rainbow, 20));

result.data.push_back(tokenType);
result.data.push_back(modifier);
}

reply(result);
}

} // namespace ccls
Loading

0 comments on commit cc13ced

Please sign in to comment.