diff --git a/language-nix/CHANGELOG.md b/language-nix/CHANGELOG.md index e26aa937..3ea3bba7 100644 --- a/language-nix/CHANGELOG.md +++ b/language-nix/CHANGELOG.md @@ -10,4 +10,13 @@ `quote`, `needsQuoting` and `Pretty` will take this list into account and quote such identifiers. However, `HasParser` will _not_ reject them even if they are unquoted. -* Add an hspec/QuickCheck based test suite. +* Resolved discrepancies between `Language.Nix.Identifier` and Nix w.r.t. + quoting and escaping: + + - Fixed missing escaping of some Nix syntax elements, e.g. in the case of + `ident # "${foo}"`. + - Pretty printing `Identifier`s will no longer produce escape sequences + Haskell supports, but Nix doesn't. + - Parsing `Identifier`s won't interpret escape sequences that Nix wouldn't + understand. +* Added an hspec/QuickCheck based test suite. diff --git a/language-nix/language-nix.cabal b/language-nix/language-nix.cabal index 49abdfd9..00818672 100644 --- a/language-nix/language-nix.cabal +++ b/language-nix/language-nix.cabal @@ -45,4 +45,5 @@ test-suite hspec , lens , parsec-class , pretty + , process default-language: Haskell2010 diff --git a/language-nix/src/Language/Nix/Identifier.hs b/language-nix/src/Language/Nix/Identifier.hs index 4c69d11f..056adf0e 100644 --- a/language-nix/src/Language/Nix/Identifier.hs +++ b/language-nix/src/Language/Nix/Identifier.hs @@ -87,6 +87,11 @@ instance Pretty Identifier where -- | Note that this parser is more lenient than Nix w.r.t. simple identifiers, -- since it will accept 'nixKeywords'. +-- +-- Naturally, it does not support string interpolation, but does not reject +-- strings that contain them. E.g. the string literal @"hello ${world}"@ +-- will contain @${world}@ verbatim after parsing. Do not rely on this +-- behavior, as it may be changed in the future. instance HasParser Identifier where parser = parseQuotedIdentifier <|> parseSimpleIdentifier @@ -108,17 +113,24 @@ parseQuotedIdentifier :: CharParser st tok m Identifier parseQuotedIdentifier = Identifier <$> qstring where qstring :: CharParser st tok m String - qstring = do txt <- between (P.char '"') (P.char '"') (many qtext) - return (read ('"' : concat txt ++ ['"'])) + qstring = between (P.char '"') (P.char '"') (many qtext) - qtext :: CharParser st tok m String - qtext = quotedPair <|> many1 (P.noneOf "\\\"") + qtext :: CharParser st tok m Char + qtext = quotedPair <|> P.noneOf "\\\"" - quotedPair :: CharParser st tok m String + quotedPair :: CharParser st tok m Char quotedPair = do - c1 <- P.char '\\' - c2 <- anyChar - return [c1,c2] + _ <- P.char '\\' + c <- anyChar + -- See https://github.com/NixOS/nix/blob/2d83bc6b83763290e9bbf556209927ba469956aa/src/libexpr/lexer.l#L54-L60 + return $ case c of + 'n' -> '\n' + 't' -> '\t' + 'r' -> '\r' + -- Note that this handles actual escapes like \" and \\ and + -- bogus cases like \f which Nix doesn't fail on (despite not + -- supporting it), but simply maps to plain f + _ -> c -- | Checks whether a given string needs quoting when interpreted as an -- 'Identifier'. @@ -142,5 +154,28 @@ nixKeywords = -- abc -- >>> putStrLn (quote "abc.def") -- "abc.def" +-- >>> putStrLn (quote "$foo") +-- "$foo" +-- >>> putStrLn (quote "${foo}") +-- "\${foo}" quote :: String -> String -quote s = if needsQuoting s then show s else s +quote s = if needsQuoting s then '"' : quote' s else s + where + quote' (c1:c2:cs) = escapeChar c1 (Just c2) ++ quote' (c2:cs) + quote' (c:cs) = escapeChar c Nothing ++ quote' cs + quote' "" = "\"" + +escapeChar :: Char -> Maybe Char -> String +escapeChar c1 c2 = + case c1 of + -- supported escape sequences, see quotedPair above + -- N.B. technically, we only need to escape \r (since Nix converts raw \r to \n), + -- but it's nicer to escape what we can. + '\n' -> "\\n" + '\t' -> "\\t" + '\r' -> "\\r" + -- syntactically significant in doubly quoted strings + '\\' -> "\\\\" + '"' -> "\\\"" + '$' | c2 == Just '{' -> "\\$" + _ -> [c1] diff --git a/language-nix/test/hspec.hs b/language-nix/test/hspec.hs index 3104877c..be5068a0 100644 --- a/language-nix/test/hspec.hs +++ b/language-nix/test/hspec.hs @@ -1,10 +1,14 @@ +{-# LANGUAGE ScopedTypeVariables #-} module Main (main) where +import Control.Exception import Control.Lens import Control.Monad (forM_) import Data.Char (isAscii, isSpace) +import Data.List (dropWhileEnd) import Data.String (fromString) import Language.Nix.Identifier +import System.Process (callProcess, readCreateProcess, proc) import Test.Hspec import Test.QuickCheck import Text.Parsec.Class (parseM) @@ -26,6 +30,18 @@ main = hspec $ do identProperty $ \i -> parseM "Identifier" (prettyShow i) == Just (i :: Identifier) it "can parse the result of quote" $ stringIdentProperty $ \str -> parseM "Identifier" (quote str) == Just (ident # str) + it "parses redundant escape sequences" $ + forM_ + [ ("\"\\f\"", "f") + , ("\"echo \\$var\"", "echo $var") + , ("\"\\h\\e\\l\\l\\o\\ \\w\\or\\l\\d\"", "hello world") + -- \t and \n don't need to be escaped, though it's advisable + , ("\"only\\ttechnically\nredundant\"", "only\ttechnically\nredundant") + ] + $ \(i, e) -> do + let e' = Just (ident # e) + parseM "Identifier" i `shouldBe` e' + parseM "Identifier" ("\"" ++ e ++ "\"") `shouldBe` e' describe "nixKeywords" $ do it "are quoted" $ forM_ nixKeywords $ \str -> do @@ -39,6 +55,32 @@ main = hspec $ do any isSpace s ==> needsQuoting s it "if length is zero" $ shouldSatisfy "" needsQuoting + describe "nix-instantiate" $ do + nixInstantiate <- runIO $ do + (callProcess nixInstantiateBin [ "--version" ] >> pure (Just nixInstantiateBin)) + `catch` (\(_ :: SomeException) -> pure Nothing) + let nix :: Example a => String -> (String -> a) -> SpecWith (Arg a) + nix str spec = + case nixInstantiate of + Nothing -> it str $ \_ -> + pendingWith (nixInstantiateBin ++ " could not be found or executed") + Just exec -> it str $ spec exec + + nix "parses and produces result of quote" $ \exec -> stringIdentProperty $ \str -> ioProperty $ do + let expAttr = quote str + expr = "{" ++ expAttr ++ "=null;}" + + out <- readCreateProcess (proc exec ["--eval", "--strict", "-E", expr]) "" + pure $ extractIdentSyntax out === expAttr + + nix "produces parseM-able identifiers" $ \exec -> identProperty $ \i -> ioProperty $ do + let expr = "{" ++ prettyShow i ++ "=null;}" + out <- readCreateProcess (proc exec ["--eval", "--strict", "-E", expr]) "" + pure $ parseM "Identifier" (extractIdentSyntax out) == Just i + +nixInstantiateBin :: String +nixInstantiateBin = "nix-instantiate" + stringIdentProperty :: Testable prop => (String -> prop) -> Property stringIdentProperty p = property $ \s -> '\0' `notElem` s ==> classify (needsQuoting s) "need quoting" $ p s @@ -46,3 +88,16 @@ stringIdentProperty p = property $ \s -> identProperty :: Testable prop => (Identifier -> prop) -> Property identProperty p = property $ \i -> classify (needsQuoting (from ident # i)) "need quoting" $ p i + +-- | Given the (pretty) printed representation of the Nix value produced by the +-- expression @{ ${ident} = null; }@, for any value of @ident@, extract the +-- part that represents the identifier. +-- +-- Note that pretty printing is buggy in some versions of Nix and the result +-- may not actually be valid Nix syntax. +extractIdentSyntax :: String -> String +extractIdentSyntax = + dropWhileEnd (`elem` "= \n\t") -- remove "… = " + . dropWhileEnd (`elem` "null") -- remove "null" + . dropWhileEnd (`elem` ";} \n\t") -- remove "…; }" + . dropWhile (`elem` "{ \n\t") -- remove "{ …"