-
-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement basic one-off language inections #82
base: master
Are you sure you want to change the base?
Changes from 14 commits
4448751
02610cb
1848120
6b6df0e
a442904
d709114
88c8dce
9bc2749
82d8b4c
6d88fd6
d8086c8
f97fee3
43664e2
d675347
ade4a64
7d2f79b
f47a403
328d644
ce8ee54
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package org.nixos.idea.psi | ||
|
||
import com.intellij.openapi.util.TextRange | ||
import com.intellij.psi.LiteralTextEscaper | ||
import com.intellij.psi.PsiLanguageInjectionHost | ||
import org.intellij.lang.annotations.Language | ||
import org.nixos.idea.psi.impl.AbstractNixString | ||
|
||
class NixStringLiteralEscaper(host: AbstractNixString) : LiteralTextEscaper<PsiLanguageInjectionHost>(host) { | ||
|
||
override fun isOneLine(): Boolean = false | ||
|
||
private var outSourceOffsets: IntArray? = null | ||
|
||
override fun getRelevantTextRange(): TextRange { | ||
if (myHost.textLength <= 4) return TextRange.EMPTY_RANGE | ||
return TextRange.create(2, myHost.textLength - 2) | ||
} | ||
|
||
override fun decode(rangeInsideHost: TextRange, outChars: StringBuilder): Boolean { | ||
// TODO issue #81 only indented strings supported for now | ||
// single line strings require a new decode function because | ||
// it uses different escaping mechanisms | ||
if (myHost !is NixIndString) return false | ||
|
||
val subText: String = rangeInsideHost.substring(myHost.text) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (non-blocking): I think this is technically wrong.
According to my understanding of the documentation, if the range starts between the first and last charachter of |
||
val array = IntArray(subText.length + 1) | ||
val success = unescapeAndDecode(subText, outChars, array) | ||
outSourceOffsets = array | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. price: I like this solution of creating a lookup table. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wish I could take credit, I borrowed the idea from terraform |
||
return success | ||
} | ||
|
||
override fun getOffsetInHost(offsetInDecoded: Int, rangeInsideHost: TextRange): Int { | ||
val offsets = outSourceOffsets ?: throw IllegalStateException("#decode was not called") | ||
val result = if (offsetInDecoded < offsets.size) offsets[offsetInDecoded] else -1 | ||
return result.coerceIn(0..rangeInsideHost.length) + rangeInsideHost.startOffset | ||
} | ||
|
||
companion object { | ||
/** | ||
* Does not consider interpolations so that | ||
* they do appear in the guest language and remain when we end up converting back to Nix. | ||
* | ||
* @returns the minIndent of the string if successful, or null if unsuccessful. | ||
JojOatXGME marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
fun unescapeAndDecode(chars: String, outChars: StringBuilder, sourceOffsets: IntArray?): Boolean { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: I am not entirely happy with having two implementations to parse/unescape strings now. Anyway, that is not important right now. I may merge them at some point in the future. |
||
assert(sourceOffsets == null || sourceOffsets.size == chars.length + 1) | ||
|
||
var index = 0 | ||
val outOffset = outChars.length | ||
var braces = 0 | ||
var indentSoFar = 0 | ||
val minIndent = chars.lines() | ||
.filterNot { it.isEmpty() } | ||
.minOfOrNull { it.takeWhile(Char::isWhitespace).count() } ?: 0 | ||
|
||
|
||
while (index < chars.length) { | ||
fun updateOffsets(index: Int) { | ||
if (sourceOffsets != null) { | ||
sourceOffsets[outChars.length - outOffset] = index - 1 | ||
sourceOffsets[outChars.length - outOffset + 1] = index | ||
} | ||
} | ||
|
||
var c = chars[index++] | ||
updateOffsets(index) | ||
|
||
|
||
if (braces > 0) { | ||
if (c == '{') braces++ | ||
else if (c == '}') braces-- | ||
outChars.append(c) | ||
continue | ||
} | ||
|
||
if (c == '\n' && index < chars.length - 1) { | ||
// we know that the next n chars are going to be whitespace indent | ||
index += minIndent | ||
outChars.append(c) | ||
if (sourceOffsets != null) { | ||
sourceOffsets[outChars.length - outOffset] = index | ||
} | ||
continue | ||
} | ||
|
||
if (c == '\'') { | ||
if (index == chars.length) return false | ||
c = chars[index++] | ||
|
||
if (c != '\'') { | ||
// if what follows isn't another ' then we are not escaping anything, | ||
// so we can backtrace and continue | ||
outChars.append("\'") | ||
index-- | ||
continue | ||
} | ||
|
||
if (index == chars.length) return false | ||
c = chars[index++] | ||
|
||
when (c) { | ||
// '' can be escaped by prefixing it with ', i.e., '''. | ||
'\'' -> { | ||
outChars.append("\'") | ||
updateOffsets(index - 1) | ||
outChars.append(c) | ||
} | ||
// $ can be escaped by prefixing it with '' (that is, two single quotes), i.e., ''$. | ||
'$' -> outChars.append(c) | ||
'\\' -> { | ||
if (index == chars.length) return false | ||
c = chars[index++] | ||
when (c) { | ||
// Linefeed, carriage-return and tab characters can | ||
// be written as ''\n, ''\r, ''\t, and ''\ escapes any other character. | ||
'a' -> outChars.append(0x07.toChar()) | ||
'b' -> outChars.append('\b') | ||
'f' -> outChars.append(0x0c.toChar()) | ||
'n' -> outChars.append('\n') | ||
't' -> outChars.append('\t') | ||
'r' -> outChars.append('\r') | ||
'v' -> outChars.append(0x0b.toChar()) | ||
else -> return false | ||
} | ||
} | ||
|
||
else -> return false | ||
} | ||
if (sourceOffsets != null) { | ||
sourceOffsets[outChars.length - outOffset] = index | ||
} | ||
continue | ||
} | ||
|
||
outChars.append(c) | ||
} | ||
return true | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package org.nixos.idea.psi | ||
|
||
import com.intellij.openapi.util.TextRange | ||
import com.intellij.psi.AbstractElementManipulator | ||
import com.intellij.refactoring.suggested.startOffset | ||
import org.nixos.idea.psi.impl.AbstractNixString | ||
import org.nixos.idea.util.NixIndStringUtil | ||
import org.nixos.idea.util.NixStringUtil | ||
|
||
class NixStringManipulator : AbstractElementManipulator<NixString>() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question (non-blocking): Just curious, do you know why we need this? I mean, we already override There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not know no, again I followed the example Your guess is as good as mine! The name of |
||
|
||
/** | ||
* This function's result changes the original text in the host language | ||
* when the fragment in the guest language changes | ||
*/ | ||
override fun handleContentChange( | ||
element: NixString, | ||
range: TextRange, | ||
newContent: String | ||
): NixString? { | ||
val escaped = newContent | ||
val replacement = range.replace(element.text, escaped) | ||
return element.updateText(replacement) as? NixString | ||
} | ||
|
||
override fun getRangeInElement(element: NixString): TextRange = when { | ||
element.textLength == 0 -> TextRange.EMPTY_RANGE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: This first case seems unreachable to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To me too (and it should be unreachable in the HCL plugin too, but they also do it there). My guess is they just wanted to cover all the codepaths? Or maybe it's possible to have non-compiling code that returns zero-length tokens. |
||
element is NixIndString && element.textLength < 4 -> TextRange(0, element.textLength) | ||
element is NixIndString -> TextRange(2, element.textLength - 2) | ||
// element is not IndString, so it must be StdString | ||
element.textLength == 1 -> TextRange(0, 1) | ||
else -> TextRange(1, element.textLength - 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question (non-blocking): Is it important that the returned range only contains the content of the string? Just wondering because this looks as you may return the quotes as part of the range in some cases. (Specifically when the string is not closed.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. correct, I also don't know what the consequences of this are. My reference has the same. I can always cut out quotes if you want, I have no opinion here |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package org.nixos.idea.psi.impl | ||
|
||
import com.intellij.lang.ASTNode | ||
import com.intellij.openapi.diagnostic.Logger | ||
import com.intellij.psi.PsiLanguageInjectionHost | ||
import com.intellij.psi.impl.source.tree.LeafPsiElement | ||
import org.nixos.idea.psi.NixIndString | ||
import org.nixos.idea.psi.NixString | ||
import org.nixos.idea.psi.NixStringLiteralEscaper | ||
|
||
|
||
abstract class AbstractNixString(private val astNode: ASTNode) : PsiLanguageInjectionHost, | ||
AbstractNixPsiElement(astNode), NixString { | ||
|
||
override fun isValidHost() = true | ||
JojOatXGME marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
override fun updateText(s: String): NixString { | ||
// TODO issue #81 also support single-line strings | ||
if (this !is NixIndString) { | ||
LOG.info("not a nix ind string") | ||
return this | ||
} | ||
val originalNode = astNode.firstChildNode.treeNext.firstChildNode as? LeafPsiElement | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue: This method doesn't seem to escape the input, and may therefore also remove existing escape sequences. For some reason, some lines were also duplicated for me after trying it with the following code: pkgs.writeShellScript "my-script.sh" ''
first_of_array=''${ARRAY[0]}
from_nix=${lib.escapeShellArg someVar}
'' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a good catch. I was aware this method did not escape the input and would remove existing escape sequences. This is because at the stage where we get the bash text, I have no way to know if the coming I think this is where the method of not injecting in the text fragment may have reached its limits :/ |
||
val minIndentInOriginal = originalNode?.text?.lines() | ||
?.filterNot { it.isEmpty() } | ||
?.minOfOrNull { it.takeWhile(Char::isWhitespace).count() } ?: 0 | ||
|
||
val leadingSpace = buildString { repeat(minIndentInOriginal) { append(' ') } } | ||
|
||
val lines = s.substring(2..(s.lastIndex - 2)) // remove quotes | ||
.lines() | ||
|
||
// restore indent | ||
val withIndent = lines | ||
.withIndex() | ||
.map { (index, line) -> if (index != 0) leadingSpace + line else line } | ||
|
||
// if the first line was removed in the fragment, add it back to preserve a multiline string | ||
val withLeadingBlankLine = if (lines.first().isNotEmpty()) listOf("") + withIndent else withIndent | ||
|
||
originalNode?.replaceWithText(withLeadingBlankLine.joinToString(separator = System.lineSeparator())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue: This line throws an exception when using line feeds which don't match the default on the system. We probably have to use the line feeds configured in IntelliJ for the current file, instead of using See exception I got on Windows
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm I am not sure how to get a line separator from the IDEA API. I cannot use I will replace this with |
||
return this | ||
} | ||
|
||
override fun createLiteralTextEscaper() = NixStringLiteralEscaper(this) | ||
|
||
companion object { | ||
val LOG = Logger.getInstance(AbstractNixString::class.java) | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package org.nixos.idea.util | ||
|
||
object NixIndStringUtil { | ||
/** | ||
* Unescapes the given string for use in a double-quoted string expression in the Nix Expression Language. | ||
* | ||
* See [Nix docs](https://nix.dev/manual/nix/2.22/language/values.html#type-string) for the logic, which | ||
* is non-trivial. | ||
* | ||
* For example, `'` can be used to escape `''`, which means `'''` does not contain | ||
* a string terminator | ||
* ``` | ||
* $ nix eval --expr " '' ''' '' " | ||
* "'' " | ||
* ``` | ||
* | ||
* This function does not erase string interpolations, because | ||
* they are hard to parse in a loop without a proper grammar. For example: | ||
JojOatXGME marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* ```nix | ||
* '' ${someNixFunc "${foo "}}" }" } '' | ||
* ``` | ||
*/ | ||
@JvmStatic | ||
fun unescape(chars: CharSequence): String = buildString { | ||
for ((index, c) in chars.withIndex()) { | ||
fun prevChar() = chars.getOrNull(index - 1) | ||
fun prev2Chars(): String? { | ||
val prev = prevChar() ?: return null | ||
val prevPrev = chars.getOrNull(index - 2) ?: return null | ||
return "${prevPrev}${prev}" | ||
} | ||
|
||
fun prev3Chars(): String? { | ||
val prev2 = prev2Chars() ?: return null | ||
val prevPrev2 = chars.getOrNull(index - 3) ?: return null | ||
return "${prevPrev2}${prev2}" | ||
} | ||
|
||
when (c) { | ||
JojOatXGME marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// ''\ escapes any character, but we can only cover known ones in advance: | ||
'\'' -> when { | ||
// ''' is escaped to '' | ||
prev2Chars() == "''" -> append("''") | ||
// '' is the string delimiter | ||
else -> continue | ||
} | ||
|
||
'\\' -> when { | ||
prev2Chars() == "''" -> continue | ||
prevChar() == '\'' -> continue | ||
else -> append(c) | ||
} | ||
|
||
'$' -> if (prevChar() == '$') append(c) else continue | ||
'{' -> if (prevChar() == '$') append("\${") else append(c) | ||
|
||
else -> if (prev3Chars() == "''\\") when (c) { | ||
'r' -> if (prev3Chars() == "''\\") append('\r') else append(c) | ||
'n' -> if (prev3Chars() == "''\\") append('\n') else append(c) | ||
't' -> if (prev3Chars() == "''\\") append('\t') else append(c) | ||
else -> append("''\\").append(c) | ||
} else { | ||
append(c) | ||
} | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.nixos.idea.util; | ||
|
||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.CsvSource; | ||
import org.nixos.idea.psi.NixStringLiteralEscaper; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
final class NixIndStringUtilTest { | ||
@ParameterizedTest(name = "[{index}] {0} -> {1}") | ||
@CsvSource(quoteCharacter = '|', textBlock = """ | ||
|| , || | ||
abc , abc | ||
" , " | ||
\\ , \\ | ||
\\x , \\x | ||
a${b}c , a${b}c | ||
|\n| , |\n| | ||
|\r| , |\r| | ||
|\t| , |\t| | ||
|''\\t| , |\t| | ||
|''\\r| , |\r| | ||
|''\\n| , |\n| | ||
|'''| , |''| | ||
$$ , $$ | ||
''$ , $ | ||
# supplementary character, i.e. character form a supplementary plane, | ||
# which needs a surrogate pair to be represented in UTF-16 | ||
\uD83C\uDF09 , \uD83C\uDF09 | ||
""") | ||
void unescape(String escaped, String expectedResult) { | ||
var sb = new StringBuilder(); | ||
NixStringLiteralEscaper.Companion.unescapeAndDecode(escaped, sb, null); | ||
var str = sb.toString(); | ||
assertEquals(expectedResult, str); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question (non-blocking): What is this used for? Some strings may only be one line, right? Maybe we should return
true
forNixStdString
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took the example from HCL which also always returns false. I will try to implement proper smart logic, see if that makes any difference, but I suspected it might be best to copy my reference for now 🤷