Skip to content

Commit 36b3152

Browse files
committed
Add copy-paste scanner and tests
1 parent 72ae99c commit 36b3152

File tree

5 files changed

+91
-6
lines changed

5 files changed

+91
-6
lines changed

src/Xrefcheck/Core.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ data FileInfo = FileInfo
130130
}
131131
deriving stock (Show, Generic)
132132
deriving anyclass NFData
133+
133134
makeLenses ''FileInfo
134135

135136
instance Default FileInfo where

src/Xrefcheck/Scanners/Markdown.hs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ module Xrefcheck.Scanners.Markdown
1818
import Universum
1919

2020
import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode)
21+
import Control.Lens hiding ((^?))
2122
import Control.Monad.Except (MonadError, throwError)
2223
import Data.Aeson.TH (deriveFromJSON)
2324
import Data.ByteString.Lazy qualified as BSL
2425
import Data.DList qualified as DList
2526
import Data.Default (def)
27+
import Data.List (isSubsequenceOf)
2628
import Data.Text qualified as T
2729
import Data.Text.Lazy qualified as LT
2830
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
@@ -37,6 +39,8 @@ data MarkdownConfig = MarkdownConfig
3739

3840
deriveFromJSON aesonConfigOption ''MarkdownConfig
3941

42+
makePrisms ''NodeType
43+
4044
defGithubMdConfig :: MarkdownConfig
4145
defGithubMdConfig = MarkdownConfig
4246
{ mcFlavor = GitHub
@@ -176,8 +180,34 @@ nodeExtractInfo input@(Node _ _ nSubs) = do
176180
_ -> return mempty
177181

178182
copyPaste :: Node -> m FileInfoDiff
183+
copyPaste (Node _ (LIST _) nodes) = do
184+
case items of
185+
top : rest | urlIsASubsequence top -> do
186+
let bad = filter (not . urlIsASubsequence) rest
187+
pure mempty { _fidCopyPastes = DList.fromList bad }
188+
_ -> do
189+
pure mempty
190+
where
191+
items = do
192+
(_, nodes', _) <- takeOnly _ITEM nodes
193+
(_, nodes'', _) <- takeOnly _PARAGRAPH nodes'
194+
take 1 $ do
195+
(_, texts, (url, _)) <- takeOnly _LINK nodes''
196+
(pos, _, txt) <- take 1 $ takeOnly _TEXT texts
197+
return (CopyPaste url txt (toPosition pos))
198+
179199
copyPaste _ = pure mempty
180200

201+
takeOnly prizm list = do
202+
Node pos hdr nodes <- list
203+
case hdr^?prizm of
204+
Just res -> return (pos, nodes, res)
205+
Nothing -> []
206+
207+
urlIsASubsequence :: CopyPaste -> Bool
208+
urlIsASubsequence paste =
209+
T.unpack (cpAnchorText paste) `isSubsequenceOf` T.unpack (cpPlainText paste)
210+
181211
merge :: (Monad m, Monoid b) => [a -> m b] -> a -> m b
182212
merge fs a = mconcat <$> traverse ($ a) fs
183213

src/Xrefcheck/Verify.hs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import Universum
2727
import Control.Concurrent.Async (wait, withAsync)
2828
import Control.Exception (throwIO)
2929
import Control.Monad.Except (MonadError (..))
30+
import Data.Bits (toIntegralSized)
3031
import Data.ByteString qualified as BS
3132
import Data.Map qualified as M
3233
import Data.Text qualified as T
@@ -49,7 +50,6 @@ import Text.Regex.TDFA.Text (Regex, regexec)
4950
import Text.URI (Authority (..), URI (..), mkURI)
5051
import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout)
5152

52-
import Data.Bits (toIntegralSized)
5353
import Xrefcheck.Config
5454
import Xrefcheck.Core
5555
import Xrefcheck.Orphans ()
@@ -70,9 +70,7 @@ deriving newtype instance Semigroup (VerifyResult e)
7070
deriving newtype instance Monoid (VerifyResult e)
7171

7272
instance Buildable e => Buildable (VerifyResult e) where
73-
build vr = case verifyErrors vr of
74-
Nothing -> "ok"
75-
Just errs -> listF errs
73+
build vr = maybe "ok" listF (verifyErrors vr)
7674

7775
verifyOk :: VerifyResult e -> Bool
7876
verifyOk (VerifyResult errors) = null errors
@@ -114,6 +112,7 @@ data VerifyError
114112
| ExternalFtpException FTPException
115113
| FtpEntryDoesNotExist FilePath
116114
| ExternalResourceSomeError Text
115+
| PossiblyIncorrectCopyPaste Text Text
117116
deriving stock (Show, Eq)
118117

119118
instance Buildable VerifyError where
@@ -156,10 +155,15 @@ instance Buildable VerifyError where
156155
"⛂ FTP exception (" +| err |+ ")\n"
157156

158157
FtpEntryDoesNotExist entry ->
159-
"⛂ File or directory does not exist:\n" +| entry |+ "\n"
158+
" File or directory does not exist:\n" +| entry |+ "\n"
160159

161160
ExternalResourceSomeError err ->
162161
"" +| build err |+ "\n\n"
162+
163+
PossiblyIncorrectCopyPaste url text ->
164+
"⛂ Possibly incorrect copy-paste in list with references\n" +|
165+
" the url is " +| build url |+ "\n " +|
166+
" but the text is " +| build text |+ "\n\n"
163167
where
164168
anchorHints = \case
165169
[] -> "\n"
@@ -219,10 +223,19 @@ verifyRepo
219223

220224
progressRef <- newIORef $ initVerifyProgress (map snd toScan)
221225

226+
errorss <- for (M.toList repoInfo) $ \(file, info) -> do
227+
let pasta = _fiCopyPastes info
228+
return
229+
$ VerifyResult
230+
$ fmap (\(CopyPaste url txt pos) ->
231+
WithReferenceLoc file (Reference "" "" Nothing pos)
232+
$ PossiblyIncorrectCopyPaste url txt)
233+
pasta
234+
222235
accumulated <- withAsync (printer progressRef) $ \_ ->
223236
forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) ->
224237
verifyReference config mode progressRef repoInfo' root file ref
225-
return $ fold accumulated
238+
return $ fold errorss <> fold accumulated
226239
where
227240
printer progressRef = forever $ do
228241
readIORef progressRef >>= reprintAnalyseProgress rw mode
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{- SPDX-FileCopyrightText: 2019 Serokell <https://serokell.io>
2+
-
3+
- SPDX-License-Identifier: MPL-2.0
4+
-}
5+
6+
module Test.Xrefcheck.CopyPasteInListsSpec where
7+
8+
import Universum
9+
10+
import Test.Hspec (Spec, describe, it, shouldBe)
11+
12+
import Test.Xrefcheck.Util
13+
import Xrefcheck.Core
14+
15+
spec :: Spec
16+
spec = do
17+
describe "Possibly incorrect copy-paste" $ do
18+
for_ allFlavors $ \fl -> do
19+
it ("is detected (" <> show fl <> ")") $ do
20+
fi <- getFI fl "tests/markdowns/without-annotations/copy-paste_in_lists.md"
21+
getPasta fi `shouldBe`[("a", "c")]
22+
where
23+
getPasta :: FileInfo -> [(Text, Text)]
24+
getPasta fi = map (cpAnchorText &&& cpPlainText) $ fi ^. fiCopyPastes
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<!--
2+
- SPDX-FileCopyrightText: 2018-2019 Serokell <https://serokell.io>
3+
-
4+
- SPDX-License-Identifier: MPL-2.0
5+
-->
6+
7+
A list with bad copy-paste:
8+
9+
- [a](a) e
10+
- [b](b) e
11+
- [c](a) e
12+
13+
A list that is completely fine:
14+
15+
- [a](a) d
16+
- [b](b) d
17+
- [c](c) d

0 commit comments

Comments
 (0)