You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We currently use Position which is a (line, column) pair.
This means we spend a lot of time on Source.offsetToPosition when parsing.
Instead, we should just use byte offsets to mark positions, computing the line & column on demand (and caching it).
I tried this a long time ago (almost 6 months), but I'm attaching the code that somewhat worked back then, it just needs a clean up (refactoring :)) and some benchmarking (used to be something like 20-30% faster parsing!). It also needs some proper LSP testing: it worked on my machine for at least a few minutes, but that's probably not good enough -- I'm a bit scared that there could be bugs that make the LSP much slower to use.
Note that this change is across Effekt andour fork of Kiama.
Here's the code I used for this change in the Effekt codebase:
diff --git i/effekt/js/src/main/scala/effekt/LanguageServer.scala w/effekt/js/src/main/scala/effekt/LanguageServer.scala
index ecbcbd73..9cc157fc 100644
--- i/effekt/js/src/main/scala/effekt/LanguageServer.scala+++ w/effekt/js/src/main/scala/effekt/LanguageServer.scala@@ -175,7 +175,7 @@ class LanguageServer extends Intelligence {
new lsp.Position(p.line - 1, p.column - 1)
private def fromLSPPosition(p: lsp.Position, source: Source): Position =
- Position(p.line + 1, p.character + 1, source)+ source.lineAndColumnToPosition(p.line, p.character)
private def toLSPLocation(from: Position, to: Position): Option[lsp.Location] = from.source match {
case VirtualFileSource(path) => Some(new lsp.Location(from.source.name, new lsp.Range(toLSPPosition(from), toLSPPosition(to))))
diff --git i/effekt/shared/src/main/scala/effekt/RecursiveDescent.scala w/effekt/shared/src/main/scala/effekt/RecursiveDescent.scala
index 4c117084..46b052d0 100644
--- i/effekt/shared/src/main/scala/effekt/RecursiveDescent.scala+++ w/effekt/shared/src/main/scala/effekt/RecursiveDescent.scala@@ -39,7 +39,7 @@ class RecursiveDescent(positions: Positions, tokens: Seq[Token], source: Source)
val to = source.offsetToPosition(value.end + 1)
Some(Range(from, to))
case None =>
- val pos = Position(0, 0, source)+ val pos = Position(0, source)
Some(Range(pos, pos))
}
and in Kiama:
diff --git i/jvm/src/main/scala/kiama/util/Server.scala w/jvm/src/main/scala/kiama/util/Server.scala
index 23e39b0..32f7574 100644
--- i/jvm/src/main/scala/kiama/util/Server.scala+++ w/jvm/src/main/scala/kiama/util/Server.scala@@ -322,12 +322,7 @@ trait Server[N, C <: Config, M <: Message] extends Compiler[C, M] with LanguageS
def positionOfStartFinish(optStart: Option[Position], optFinish: Option[Position]): Option[(Int, Int)] =
(optStart, optFinish) match {
case (Some(start), Some(finish)) =>
- (start.optOffset, finish.optOffset) match {- case (Some(s), Some(f)) =>- Some((s, f))- case _ =>- None- }+ Some((start.offset, finish.offset))
case _ =>
None
}
@@ -512,7 +507,7 @@ class Services[N, C <: Config, M <: Message](
def positionOfNotification(document: TextDocumentIdentifier, position: LSPPosition): Option[Position] =
server.sources.get(document.getUri).map(source => {
- Position(position.getLine + 1, position.getCharacter + 1, source)+ source.lineAndColumnToPosition(position.getLine, position.getCharacter)
})
@JsonNotification("textDocument/codeAction")
diff --git i/shared/src/main/scala/kiama/util/Positions.scala w/shared/src/main/scala/kiama/util/Positions.scala
index 64e580d..ba9bd83 100644
--- i/shared/src/main/scala/kiama/util/Positions.scala+++ w/shared/src/main/scala/kiama/util/Positions.scala@@ -15,7 +15,13 @@ package util
* Record of a source position at a particular line and column relative to
* a given source. The line and column numbers are one-indexed.
*/
-case class Position(line: Int, column: Int, source: Source) {+case class Position(offset: Int, source: Source) {+ lazy val (line, column) = source.lineStarts.lastIndexWhere(offset >= _) match {+ case -1 =>+ (0, 0)+ case line =>+ (line + 1, offset - source.lineStarts(line) + 1)+ }
/**
* Format this position. The result is of the form `/foo/bar.txt:2:10:` if
@@ -37,24 +43,12 @@ case class Position(line: Int, column: Int, source: Source) {
lazy val optContext: Option[String] =
source.optLineContents(line).map(s => s"$s\n${" " * (column - 1)}^")
- /**- * Return the offset that this position refers to in its source. `None`- * is returned if the position is not valid for its source.- */- lazy val optOffset: Option[Int] =- source.positionToOffset(this)-
/**
* Apply a binary Boolean operation to this position and another one,
* as offsets, return the result.
*/
def op2(op: (Int, Int) => Boolean, p: Position): Boolean =
- (optOffset, p.optOffset) match {- case (Some(l), Some(r)) =>- op(l, r)- case (l, r) =>- false- }+ op(this.offset, p.offset)
/**
* Does this position occur no later than `p`? The two positions
@@ -84,7 +78,7 @@ case class Position(line: Int, column: Int, source: Source) {
case class Range(from: Position, to: Position)
object Range {
- def empty(source: Source): Range = Range(Position(1, 1, source), Position(1, 1, source))+ def empty(source: Source): Range = Range(Position(0, source), Position(0, source))
}
/**
@@ -205,13 +199,8 @@ class Positions {
* positions doesn't refer to a valid offset in the source then
* `None` is returned.
*/
- def substring(s: Position, f: Position): Option[String] =- (s.optOffset, f.optOffset) match {- case (Some(soffset), Some(foffset)) =>- Some(s.source.content.substring(soffset, foffset))- case _ =>- None- }+ def substring(s: Position, f: Position): String =+ s.source.content.substring(s.offset, f.offset)
/**
* If `t` has valid start and finish positions, return the source text
@@ -222,7 +211,7 @@ class Positions {
def textOf[T](t: T): Option[String] =
(getStart(t), getFinish(t)) match {
case (Some(start), Some(finish)) =>
- substring(start, finish)+ Some(substring(start, finish))
case _ =>
None
}
diff --git i/shared/src/main/scala/kiama/util/Source.scala w/shared/src/main/scala/kiama/util/Source.scala
index 58d300d..814157b 100644
--- i/shared/src/main/scala/kiama/util/Source.scala+++ w/shared/src/main/scala/kiama/util/Source.scala@@ -65,29 +65,23 @@ trait Source {
* Convert an offset into the content into a position.
*/
def offsetToPosition(offset: Int): Position =
- lineStarts.lastIndexWhere(offset >= _) match {+ Position(offset, source = this)+ /*lineStarts.lastIndexWhere(offset >= _) match {
case -1 =>
- Position(0, 0, this)+ Position(0, this)
case line =>
Position(line + 1, offset - lineStarts(line) + 1, this)
- }+ }*/
/**
* If the position is valid for this source, return the corresponding
* offset into the content, otherwise return `None`.
*/
- def positionToOffset(position: Position): Option[Int] = {- val line = position.line- if ((line >= 1) && (line <= lineCount)) {- val lineStart = lineStarts(line - 1)- val column = position.column- if ((column >= 1) && (column <= lineFinish(line) - lineStart + 1))- Some(lineStart + column - 1)- else- None- } else- None- }+ def positionToOffset(position: Position): Int =+ position.offset++ def lineAndColumnToPosition(line: Int, column: Int): Position =+ Position(lineFinish(line) + column, this)
}
/**
The text was updated successfully, but these errors were encountered:
jiribenes
changed the title
Source positions should be cached byte offsets, not always recomputed line & character pair
Change Position to a cached byte offset, not an always-recomputed line & column pair
Dec 12, 2024
We currently use
Position
which is a(line, column)
pair.This means we spend a lot of time on
Source.offsetToPosition
when parsing.Instead, we should just use byte
offset
s to mark positions, computing the line & column on demand (and caching it).I tried this a long time ago (almost 6 months), but I'm attaching the code that somewhat worked back then, it just needs a clean up (refactoring :)) and some benchmarking (used to be something like 20-30% faster parsing!). It also needs some proper LSP testing: it worked on my machine for at least a few minutes, but that's probably not good enough -- I'm a bit scared that there could be bugs that make the LSP much slower to use.
Note that this change is across Effekt and our fork of Kiama.
Here's the code I used for this change in the Effekt codebase:
and in Kiama:
The text was updated successfully, but these errors were encountered: