diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 029ea35..153c6f2 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: haskell-actions/run-ormolu@v16 + - uses: haskell-actions/run-ormolu@v17 build: name: GHC ${{ matrix.ghc-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} @@ -30,13 +30,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - ghc-version: ['9.10', '9.8', '9.6'] + ghc-version: ['9.12', '9.10', '9.8', '9.6'] include: - os: windows-latest - ghc-version: '9.8' + ghc-version: '9.12' - os: macos-latest - ghc-version: '9.8' + ghc-version: '9.12' steps: - uses: actions/checkout@v4 @@ -80,7 +80,7 @@ jobs: - name: Build run: cabal build all - - if: ${{ matrix.os == 'ubuntu-latest' && matrix.ghc-version == '9.8'}} + - if: ${{ matrix.os == 'ubuntu-latest' && matrix.ghc-version == '9.12'}} name: doctests run: | cabal run doctests diff --git a/ChangeLog.md b/ChangeLog.md index 2612fd4..c70efa3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,12 @@ +0.2.0 +=== + +- switched to Diff for testing round-trips +- removed MarkupParse.Patch +- renamed MarkupParse.FlatParse to MarkupParse.Internal.FlatParse and removed unused exports. Added non-stable note. +- moved ParserWarn, runParser_ & runParserWarn to MarkupParse. +- changed diff executable to markupparse-diff. + 0.1.1 === diff --git a/app/diff.hs b/app/diff.hs deleted file mode 100644 index 0f25556..0000000 --- a/app/diff.hs +++ /dev/null @@ -1,127 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE QuasiQuotes #-} -{-# OPTIONS_GHC -Wno-unused-top-binds #-} - -module Main (main) where - -import Control.Monad -import Data.ByteString (ByteString) -import Data.ByteString qualified as B -import Data.Foldable -import Data.Function -import Data.Maybe -import Data.String.Interpolate -import Data.TreeDiff -import MarkupParse -import MarkupParse.Patch -import Test.Tasty (TestTree, defaultMain, testGroup) -import Test.Tasty.Golden.Advanced (goldenTest) -import Prelude - -main :: IO () -main = - defaultMain $ - testGroup - "tests" - [ goldenTests - ] - -goldenTests :: TestTree -goldenTests = - testGroup - "examples" - ( testExample - <$> [ (Compact, Xml, "other/line.svg"), - (Compact, Html, "other/ex1.html") - ] - ) - -testExample :: (RenderStyle, Standard, FilePath) -> TestTree -testExample (r, s, fp) = - goldenTest - fp - (getMarkupFile s fp) - (isoMarkdownMarkup r s <$> getMarkupFile s fp) - (\expected actual -> pure (show . ansiWlEditExpr <$> patch expected actual)) - (\_ -> pure ()) - -getMarkupFile :: Standard -> FilePath -> IO Markup -getMarkupFile s fp = do - bs <- B.readFile fp - pure $ warnError $ markup s bs - --- round trip markdown >>> markup -isoMarkdownMarkup :: RenderStyle -> Standard -> Markup -> Markup -isoMarkdownMarkup r s m = m & (markdown r s >=> markup s) & warnError - --- patch testing -printPatchExamples :: IO () -printPatchExamples = traverse_ (printPatchExample m0) patchExamples - -printPatchExample :: ByteString -> (String, ByteString) -> IO () -printPatchExample m (s, m') = do - print s - case show . ansiWlEditExpr <$> patch (warnError $ markup Html m) (warnError $ markup Html m') of - Nothing -> putStrLn ("no changes" :: String) - Just x -> putStrLn x - -patchExamples :: [(String, ByteString)] -patchExamples = - [ ("change an attribute name", m1'), - ("change an attribute value", m1), - ("delete an attribute", m2), - ("insert an attribute", m3), - ("change a tag", m4), - ("change a markup leaf", m5), - ("delete a leaf", m6), - ("insert a leaf", m7), - ("insert attribute", m8), - ("modify content", m9), - ("deep leaf insertion", m10) - ] - -m0 :: ByteString -m0 = [i|text|] - --- Changing class -m1 :: ByteString -m1 = [i|text|] - -m1' :: ByteString -m1' = [i|text|] - --- deleting an attribute -m2 :: ByteString -m2 = [i|text|] - --- inserting an attribute -m3 :: ByteString -m3 = [i|text|] - --- changing a tag -m4 :: ByteString -m4 = [i|text|] - --- changing a leaf -m5 :: ByteString -m5 = [i|text|] - --- deleting a leaf -m6 :: ByteString -m6 = [i|text|] - --- inserting a leaf -m7 :: ByteString -m7 = [i|text|] - --- inserting Attributes -m8 :: ByteString -m8 = [i|text|] - --- modifying content -m9 :: ByteString -m9 = [i|textual content|] - --- inserting a leaf deeper down -m10 :: ByteString -m10 = [i|text|] diff --git a/app/markup-parse-diff.hs b/app/markup-parse-diff.hs new file mode 100644 index 0000000..5ffa631 --- /dev/null +++ b/app/markup-parse-diff.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-unused-top-binds #-} + +module Main (main) where + +import Control.Category ((>>>)) +import Control.Monad +import Data.Algorithm.Diff +import Data.Algorithm.DiffOutput +import Data.Bifunctor +import Data.Bool +import Data.ByteString (ByteString) +import Data.ByteString qualified as B +import Data.ByteString.Char8 qualified as C +import Data.Function +import Data.Maybe +import MarkupParse +import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.Golden.Advanced (goldenTest) +import Prelude + +main :: IO () +main = + defaultMain $ + testGroup + "tests" + [ goldenTests + ] + +goldenTests :: TestTree +goldenTests = + testGroup + "examples" + ( testExample + <$> [ (Compact, Xml, "other/line.svg"), + (Compact, Html, "other/ex1.html") + ] + ) + +testExample :: (RenderStyle, Standard, FilePath) -> TestTree +testExample (r, s, fp) = + goldenTest + fp + (B.readFile fp) + (isoMarkupMarkdown r s <$> B.readFile fp) + (\expected actual -> getDiff (C.lines expected) (C.lines actual) & fmap (bimap (C.unpack >>> pure) (C.unpack >>> pure)) & diffToLineRanges & prettyDiffs & (\xs -> bool (pure $ Just (show xs)) (pure Nothing) (xs == mempty))) + (\_ -> pure ()) + +-- round trip markdown >>> markup +isoMarkupMarkdown :: RenderStyle -> Standard -> ByteString -> ByteString +isoMarkupMarkdown r s m = m & (markup s >=> markdown r s) & warnError diff --git a/markup-parse.cabal b/markup-parse.cabal index bb1c866..7120dbb 100644 --- a/markup-parse.cabal +++ b/markup-parse.cabal @@ -1,6 +1,6 @@ cabal-version: 3.0 name: markup-parse -version: 0.1.1.1 +version: 0.2.0.0 license: BSD-3-Clause license-file: LICENSE copyright: Copyright, Tony Day, 2023- @@ -14,9 +14,10 @@ description: A markup parser and printer, from and to strict bytestrings, optimised for speed. build-type: Simple tested-with: - , GHC == 9.10.1 - , GHC == 9.6.5 - , GHC == 9.8.2 + , GHC == 9.6.7 + , GHC == 9.8.4 + , GHC == 9.10.2 + , GHC == 9.12.2 extra-doc-files: ChangeLog.md other/*.html @@ -52,18 +53,14 @@ library build-depends: , base >=4.14 && <5 , bytestring >=0.11.3 && <0.13 - , containers >=0.6 && <0.8 - , deepseq >=1.4.4 && <1.6 + , containers >=0.6 && <0.9 + , deepseq >=1.4 && <1.6 , flatparse >=0.3.5 && <0.6 , string-interpolate >=0.3 && <0.4 - , tasty >=1.2 && <1.6 - , tasty-golden >=2.3.1.1 && <2.4 , these >=1.1 && <1.3 - , tree-diff >=0.3 && <0.4 exposed-modules: MarkupParse - MarkupParse.FlatParse - MarkupParse.Patch + MarkupParse.Internal.FlatParse test-suite doctests import: ghc2021-stanza @@ -71,7 +68,7 @@ test-suite doctests hs-source-dirs: test build-depends: , base >=4.14 && <5 - , doctest-parallel >=0.3 && <0.4 + , doctest-parallel >=0.3 && <0.5 ghc-options: -threaded type: exitcode-stdio-1.0 @@ -79,14 +76,13 @@ test-suite markup-parse-diff import: ghc-options-exe-stanza import: ghc-options-stanza import: ghc2021-stanza - main-is: diff.hs + main-is: markup-parse-diff.hs hs-source-dirs: app build-depends: , base >=4.14 && <5 , bytestring >=0.11.3 && <0.13 , markup-parse - , string-interpolate >=0.3 && <0.4 , tasty >=1.2 && <1.6 , tasty-golden >=2.3.1.1 && <2.4 - , tree-diff >=0.3 && <0.4 + , Diff >=1.0 && <1.1 type: exitcode-stdio-1.0 diff --git a/src/MarkupParse.hs b/src/MarkupParse.hs index cad9c8a..20162d3 100644 --- a/src/MarkupParse.hs +++ b/src/MarkupParse.hs @@ -82,6 +82,16 @@ module MarkupParse -- * Tree support Tree (..), + + -- * token parsing support + ParserWarning (..), + runParserWarn, + runParser_, + + -- * Flatparse re-exports + runParser, + Parser, + Result (..), ) where @@ -101,10 +111,9 @@ import Data.Maybe import Data.String.Interpolate import Data.These import Data.Tree -import Data.TreeDiff -import FlatParse.Basic hiding (Result, cut, take) +import FlatParse.Basic hiding (cut, take) import GHC.Generics -import MarkupParse.FlatParse +import MarkupParse.Internal.FlatParse import Prelude hiding (replicate) -- $setup @@ -112,8 +121,7 @@ import Prelude hiding (replicate) -- >>> :set -XQuasiQuotes -- >>> :set -XOverloadedStrings -- >>> import MarkupParse --- >>> import MarkupParse.Patch --- >>> import MarkupParse.FlatParse +-- >>> import MarkupParse.Internal.FlatParse -- >>> import FlatParse.Basic -- >>> import Data.String.Interpolate -- >>> import Data.ByteString.Char8 qualified as B @@ -164,7 +172,7 @@ import Prelude hiding (replicate) -- The xml parsing logic is based on the XML productions found in https://www.w3.org/TR/xml/ -- -- The html parsing was based on a reading of , but ignores the various '\x00' to '\xfffd' & eof directives that form part of the html standards. -data Standard = Html | Xml deriving (Eq, Show, Ord, Generic, NFData, ToExpr) +data Standard = Html | Xml deriving (Eq, Show, Ord, Generic, NFData) -- | A list of 'Element's or 'Tree' 'Token's -- @@ -172,7 +180,7 @@ data Standard = Html | Xml deriving (Eq, Show, Ord, Generic, NFData, ToExpr) -- That (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]}) newtype Markup = Markup {elements :: [Element]} deriving stock (Show, Eq, Ord, Generic) - deriving anyclass (NFData, ToExpr) + deriving anyclass (NFData) deriving newtype (Semigroup, Monoid) -- | markup-parse generally tries to continue on parse errors, and return what has/can still be parsed, together with any warnings. @@ -276,7 +284,7 @@ wellFormed s (Markup trees) = List.nub $ mconcat (foldTree checkNode <$> trees) type NameTag = ByteString -- | Whether an opening tag is a start tag or an empty element tag. -data OpenTagType = StartTag | EmptyElemTag deriving (Show, Ord, Eq, Generic, NFData, ToExpr) +data OpenTagType = StartTag | EmptyElemTag deriving (Show, Ord, Eq, Generic, NFData) -- | A Markup token. The term is borrowed from standards but is used across 'Html' and 'Xml' in this library. -- @@ -324,7 +332,7 @@ data Token Decl !ByteString ![Attr] | -- | Contents of a doctype declaration. Doctype !ByteString - deriving (Show, Ord, Eq, Generic, NFData, ToExpr) + deriving (Show, Ord, Eq, Generic, NFData) -- | Escape a single character. escapeChar :: Char -> ByteString @@ -480,8 +488,6 @@ data Attr = Attr {attrName :: !AttrName, attrValue :: !AttrValue} instance NFData Attr -instance ToExpr Attr - normTokenAttrs :: Token -> Token normTokenAttrs (OpenTag t n as) = OpenTag t n (normAttrs as) normTokenAttrs x = x @@ -943,3 +949,49 @@ isBooleanAttrName x = isWhitespace x || (x == '/') || (x == '>') + +-- | Warnings covering leftovers, 'Err's and 'Fail' +-- +-- >>> runParserWarn ws " x" +-- These (ParserLeftover "x") ' ' +-- +-- >>> runParserWarn ws "x" +-- This ParserUncaught +-- +-- >>> runParserWarn (ws `cut` "no whitespace") "x" +-- This (ParserError "no whitespace") +data ParserWarning = ParserLeftover ByteString | ParserError ByteString | ParserUncaught deriving (Eq, Show, Ord, Generic, NFData) + +-- | Run parser, returning leftovers and errors as 'ParserWarning's. +-- +-- >>> runParserWarn ws " " +-- That ' ' +-- +-- >>> runParserWarn ws "x" +-- This ParserUncaught +-- +-- >>> runParserWarn ws " x" +-- These (ParserLeftover "x") ' ' +runParserWarn :: Parser ByteString a -> ByteString -> These ParserWarning a +runParserWarn p bs = case runParser p bs of + Err e -> This (ParserError e) + OK a "" -> That a + OK a x -> These (ParserLeftover $ B.take 200 x) a + Fail -> This ParserUncaught + +-- | Run parser, ignore leftovers, and error on Fail. +-- +-- >>> runParser_ ws " " +-- ' ' +-- +-- >>> runParser_ ws " x" +-- ' ' +-- +-- >>> runParser_ ws "x" +-- *** Exception: Uncaught parse failure +-- ... +runParser_ :: Parser ByteString a -> ByteString -> a +runParser_ p bs = case runParser p bs of + Err e -> error (B.unpack e) + OK a _ -> a + Fail -> error "Uncaught parse failure" diff --git a/src/MarkupParse/FlatParse.hs b/src/MarkupParse/FlatParse.hs deleted file mode 100644 index a332859..0000000 --- a/src/MarkupParse/FlatParse.hs +++ /dev/null @@ -1,361 +0,0 @@ -{-# LANGUAGE DeriveAnyClass #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TemplateHaskell #-} - --- | Various helpers and combinators. -module MarkupParse.FlatParse - ( -- * Parsing - ParserWarning (..), - runParserMaybe, - runParserEither, - runParserWarn, - runParser_, - - -- * Flatparse re-exports - runParser, - Parser, - Result (..), - - -- * Parsers - isWhitespace, - ws_, - ws, - wss, - nota, - isa, - sq, - dq, - wrappedDq, - wrappedSq, - wrappedQ, - wrappedQNoGuard, - eq, - sep, - bracketed, - bracketedSB, - wrapped, - digit, - int, - double, - signed, - byteStringOf', - comma, - ) -where - -import Control.DeepSeq -import Data.Bool -import Data.ByteString (ByteString) -import Data.ByteString.Char8 qualified as B -import Data.Char hiding (isDigit) -import Data.These -import FlatParse.Basic hiding (cut, take) -import GHC.Exts -import GHC.Generics (Generic) -import Prelude hiding (replicate) - --- $setup --- >>> :set -XTemplateHaskell --- >>> :set -XOverloadedStrings --- >>> import MarkupParse.FlatParse --- >>> import FlatParse.Basic - --- | Run a Parser, throwing away leftovers. Nothing on 'Fail' or 'Err'. --- --- >>> runParserMaybe ws "x" --- Nothing --- --- >>> runParserMaybe ws " x" --- Just ' ' -runParserMaybe :: Parser e a -> ByteString -> Maybe a -runParserMaybe p b = case runParser p b of - OK r _ -> Just r - Fail -> Nothing - Err _ -> Nothing - --- | Run a Parser, throwing away leftovers. Returns Left on 'Fail' or 'Err'. --- --- >>> runParserEither ws " x" --- Right ' ' -runParserEither :: (IsString e) => Parser e a -> ByteString -> Either e a -runParserEither p bs = case runParser p bs of - Err e -> Left e - OK a _ -> Right a - Fail -> Left "uncaught parse error" - --- | Warnings covering leftovers, 'Err's and 'Fail' --- --- >>> runParserWarn ws " x" --- These (ParserLeftover "x") ' ' --- --- >>> runParserWarn ws "x" --- This ParserUncaught --- --- >>> runParserWarn (ws `cut` "no whitespace") "x" --- This (ParserError "no whitespace") -data ParserWarning = ParserLeftover ByteString | ParserError ByteString | ParserUncaught deriving (Eq, Show, Ord, Generic, NFData) - --- | Run parser, returning leftovers and errors as 'ParserWarning's. --- --- >>> runParserWarn ws " " --- That ' ' --- --- >>> runParserWarn ws "x" --- This ParserUncaught --- --- >>> runParserWarn ws " x" --- These (ParserLeftover "x") ' ' -runParserWarn :: Parser ByteString a -> ByteString -> These ParserWarning a -runParserWarn p bs = case runParser p bs of - Err e -> This (ParserError e) - OK a "" -> That a - OK a x -> These (ParserLeftover $ B.take 200 x) a - Fail -> This ParserUncaught - --- | Run parser, discards leftovers & throws an error on failure. --- --- >>> runParser_ ws " " --- ' ' --- --- >>> runParser_ ws "x" --- *** Exception: uncaught parse error --- ... -runParser_ :: Parser String a -> ByteString -> a -runParser_ p bs = case runParser p bs of - Err e -> error e - OK a _ -> a - Fail -> error "uncaught parse error" - --- | Consume whitespace. --- --- >>> runParser ws_ " \nx" --- OK () "x" --- --- >>> runParser ws_ "x" --- OK () "x" -ws_ :: Parser e () -ws_ = - $( switch - [| - case _ of - " " -> ws_ - "\n" -> ws_ - "\t" -> ws_ - "\r" -> ws_ - "\f" -> ws_ - _ -> pure () - |] - ) -{-# INLINE ws_ #-} - --- | \\n \\t \\f \\r and space -isWhitespace :: Char -> Bool -isWhitespace ' ' = True -- \x20 space -isWhitespace '\x0a' = True -- \n linefeed -isWhitespace '\x09' = True -- \t tab -isWhitespace '\x0c' = True -- \f formfeed -isWhitespace '\x0d' = True -- \r carriage return -isWhitespace _ = False -{-# INLINE isWhitespace #-} - --- | single whitespace --- --- >>> runParser ws " \nx" --- OK ' ' "\nx" -ws :: Parser e Char -ws = satisfy isWhitespace - --- | multiple whitespace --- --- >>> runParser wss " \nx" --- OK " \n" "x" --- --- >>> runParser wss "x" --- Fail -wss :: Parser e ByteString -wss = byteStringOf $ some ws - --- | Single quote --- --- >>> runParserMaybe sq "'" --- Just () -sq :: ParserT st e () -sq = $(char '\'') - --- | Double quote --- --- >>> runParserMaybe dq "\"" --- Just () -dq :: ParserT st e () -dq = $(char '"') - --- | Parse whilst not a specific character --- --- >>> runParser (nota 'x') "abcxyz" --- OK "abc" "xyz" -nota :: Char -> Parser e ByteString -nota c = withSpan (skipMany (satisfy (/= c))) (\() s -> unsafeSpanToByteString s) -{-# INLINE nota #-} - --- | Parse whilst satisfying a predicate. --- --- >>> runParser (isa (=='x')) "xxxabc" --- OK "xxx" "abc" -isa :: (Char -> Bool) -> Parser e ByteString -isa p = withSpan (skipMany (satisfy p)) (\() s -> unsafeSpanToByteString s) -{-# INLINE isa #-} - --- | 'byteStringOf' but using withSpan internally. Doesn't seems faster... -byteStringOf' :: Parser e a -> Parser e ByteString -byteStringOf' p = withSpan p (\_ s -> unsafeSpanToByteString s) -{-# INLINE byteStringOf' #-} - --- | A single-quoted string. -wrappedSq :: Parser b ByteString -wrappedSq = $(char '\'') *> nota '\'' <* $(char '\'') -{-# INLINE wrappedSq #-} - --- | A double-quoted string. -wrappedDq :: Parser b ByteString -wrappedDq = $(char '"') *> nota '"' <* $(char '"') -{-# INLINE wrappedDq #-} - --- | A single-quoted or double-quoted string. --- --- >>> runParserMaybe wrappedQ "\"quoted\"" --- Just "quoted" --- --- >>> runParserMaybe wrappedQ "'quoted'" --- Just "quoted" -wrappedQ :: Parser e ByteString -wrappedQ = - wrappedDq - <|> wrappedSq -{-# INLINE wrappedQ #-} - --- | A single-quoted or double-quoted wrapped parser. --- --- >>> runParser (wrappedQNoGuard (many $ satisfy (/= '"'))) "\"name\"" --- OK "name" "" --- --- Will consume quotes if the underlying parser does. --- --- >>> runParser (wrappedQNoGuard (many anyChar)) "\"name\"" --- Fail -wrappedQNoGuard :: Parser e a -> Parser e a -wrappedQNoGuard p = wrapped dq p <|> wrapped sq p - --- | xml production [25] --- --- >>> runParserMaybe eq " = " --- Just () --- --- >>> runParserMaybe eq "=" --- Just () -eq :: Parser e () -eq = ws_ *> $(char '=') <* ws_ -{-# INLINE eq #-} - --- | Some with a separator. --- --- >>> runParser (sep ws (many (satisfy (/= ' ')))) "a b c" --- OK ["a","b","c"] "" -sep :: Parser e s -> Parser e a -> Parser e [a] -sep s p = (:) <$> p <*> many (s *> p) - --- | Parser bracketed by two other parsers. --- --- >>> runParser (bracketed ($(char '[')) ($(char ']')) (many (satisfy (/= ']')))) "[bracketed]" --- OK "bracketed" "" -bracketed :: Parser e b -> Parser e b -> Parser e a -> Parser e a -bracketed o c p = o *> p <* c -{-# INLINE bracketed #-} - --- | Parser bracketed by square brackets. --- --- >>> runParser bracketedSB "[bracketed]" --- OK "bracketed" "" -bracketedSB :: Parser e [Char] -bracketedSB = bracketed $(char '[') $(char ']') (many (satisfy (/= ']'))) - --- | Parser wrapped by another parser. --- --- >>> runParser (wrapped ($(char '"')) (many (satisfy (/= '"')))) "\"wrapped\"" --- OK "wrapped" "" -wrapped :: Parser e () -> Parser e a -> Parser e a -wrapped x p = bracketed x x p -{-# INLINE wrapped #-} - --- | A single digit --- --- >>> runParserMaybe digit "5" --- Just 5 -digit :: Parser e Int -digit = (\c -> ord c - ord '0') <$> satisfyAscii isDigit - --- | An (unsigned) 'Int' parser --- --- >>> runParserMaybe int "567" --- Just 567 -int :: Parser e Int -int = do - (place, n) <- chainr (\n (!place, !acc) -> (place * 10, acc + place * n)) digit (pure (1, 0)) - case place of - 1 -> empty - _ -> pure n - -digits :: Parser e (Int, Int) -digits = chainr (\n (!place, !acc) -> (place * 10, acc + place * n)) digit (pure (1, 0)) - --- | A 'Double' parser. --- --- >>> runParser double "1.234x" --- OK 1.234 "x" --- --- >>> runParser double "." --- Fail --- --- >>> runParser double "123" --- OK 123.0 "" --- --- >>> runParser double ".123" --- OK 0.123 "" --- --- >>> runParser double "123." --- OK 123.0 "" -double :: Parser e Double -double = do - (placel, nl) <- digits - withOption - ($(char '.') *> digits) - ( \(placer, nr) -> - case (placel, placer) of - (1, 1) -> empty - _ -> pure $ fromIntegral nl + fromIntegral nr / fromIntegral placer - ) - ( case placel of - 1 -> empty - _ -> pure $ fromIntegral nl - ) - -minus :: Parser e () -minus = $(char '-') - --- | Parser for a signed prefix to a number. --- --- >>> runParser (signed double) "-1.234x" --- OK (-1.234) "x" -signed :: (Num b) => Parser e b -> Parser e b -signed p = do - m <- optional minus - case m of - Nothing -> p - Just () -> negate <$> p - --- | Comma parser --- --- >>> runParserMaybe comma "," --- Just () -comma :: Parser e () -comma = $(char ',') diff --git a/src/MarkupParse/Internal/FlatParse.hs b/src/MarkupParse/Internal/FlatParse.hs new file mode 100644 index 0000000..6320cf6 --- /dev/null +++ b/src/MarkupParse/Internal/FlatParse.hs @@ -0,0 +1,178 @@ +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TemplateHaskell #-} + +-- | Various helpers and combinators. +-- +-- This module is exposed only for testing via doctest-parallel and is not intended to form part of the stable API. +module MarkupParse.Internal.FlatParse + ( -- * Parsers + isWhitespace, + ws_, + ws, + nota, + isa, + sq, + dq, + wrappedDq, + wrappedSq, + wrappedQ, + wrappedQNoGuard, + eq, + bracketed, + bracketedSB, + wrapped, + ) +where + +import Data.Bool +import Data.ByteString (ByteString) +import Data.Char hiding (isDigit) +import FlatParse.Basic hiding (cut, take) +import Prelude hiding (replicate) + +-- $setup +-- >>> :set -XTemplateHaskell +-- >>> :set -XQuasiQuotes +-- >>> :set -XOverloadedStrings +-- >>> import MarkupParse.Internal.FlatParse +-- >>> import FlatParse.Basic + +-- | Consume whitespace. +-- +-- >>> runParser ws_ " \nx" +-- OK () "x" +-- +-- >>> runParser ws_ "x" +-- OK () "x" +ws_ :: Parser e () +ws_ = + $( switch + [| + case _ of + " " -> ws_ + "\n" -> ws_ + "\t" -> ws_ + "\r" -> ws_ + "\f" -> ws_ + _ -> pure () + |] + ) +{-# INLINE ws_ #-} + +-- | \\n \\t \\f \\r and space +isWhitespace :: Char -> Bool +isWhitespace ' ' = True -- \x20 space +isWhitespace '\x0a' = True -- \n linefeed +isWhitespace '\x09' = True -- \t tab +isWhitespace '\x0c' = True -- \f formfeed +isWhitespace '\x0d' = True -- \r carriage return +isWhitespace _ = False +{-# INLINE isWhitespace #-} + +-- | single whitespace +-- +-- >>> runParser ws " \nx" +-- OK ' ' "\nx" +ws :: Parser e Char +ws = satisfy isWhitespace + +-- | Single quote +-- +-- >>> runParser sq "'" +-- OK () "" +sq :: ParserT st e () +sq = $(char '\'') + +-- | Double quote +-- +-- >>> runParser dq "\"" +-- OK () "" +dq :: ParserT st e () +dq = $(char '"') + +-- | Parse whilst not a specific character +-- +-- >>> runParser (nota 'x') "abcxyz" +-- OK "abc" "xyz" +nota :: Char -> Parser e ByteString +nota c = withSpan (skipMany (satisfy (/= c))) (\() s -> unsafeSpanToByteString s) +{-# INLINE nota #-} + +-- | Parse whilst satisfying a predicate. +-- +-- >>> runParser (isa (=='x')) "xxxabc" +-- OK "xxx" "abc" +isa :: (Char -> Bool) -> Parser e ByteString +isa p = withSpan (skipMany (satisfy p)) (\() s -> unsafeSpanToByteString s) +{-# INLINE isa #-} + +-- | A single-quoted string. +wrappedSq :: Parser b ByteString +wrappedSq = $(char '\'') *> nota '\'' <* $(char '\'') +{-# INLINE wrappedSq #-} + +-- | A double-quoted string. +wrappedDq :: Parser b ByteString +wrappedDq = $(char '"') *> nota '"' <* $(char '"') +{-# INLINE wrappedDq #-} + +-- | A single-quoted or double-quoted string. +-- +-- >>> runParser wrappedQ "\"quoted\"" +-- OK "quoted" "" +-- +-- >>> runParser wrappedQ "'quoted'" +-- OK "quoted" "" +wrappedQ :: Parser e ByteString +wrappedQ = + wrappedDq + <|> wrappedSq +{-# INLINE wrappedQ #-} + +-- | A single-quoted or double-quoted wrapped parser. +-- +-- >>> runParser (wrappedQNoGuard (many $ satisfy (/= '"'))) "\"name\"" +-- OK "name" "" +-- +-- Will consume quotes if the underlying parser does. +-- +-- >>> runParser (wrappedQNoGuard (many anyChar)) "\"name\"" +-- Fail +wrappedQNoGuard :: Parser e a -> Parser e a +wrappedQNoGuard p = wrapped dq p <|> wrapped sq p + +-- | xml production [25] +-- +-- >>> runParser eq " = " +-- OK () "" +-- +-- >>> runParser eq "=" +-- OK () "" +eq :: Parser e () +eq = ws_ *> $(char '=') <* ws_ +{-# INLINE eq #-} + +-- | Parser bracketed by two other parsers. +-- +-- >>> runParser (bracketed ($(char '[')) ($(char ']')) (many (satisfy (/= ']')))) "[bracketed]" +-- OK "bracketed" "" +bracketed :: Parser e b -> Parser e b -> Parser e a -> Parser e a +bracketed o c p = o *> p <* c +{-# INLINE bracketed #-} + +-- | Parser bracketed by square brackets. +-- +-- >>> runParser bracketedSB "[bracketed]" +-- OK "bracketed" "" +bracketedSB :: Parser e [Char] +bracketedSB = bracketed $(char '[') $(char ']') (many (satisfy (/= ']'))) + +-- | Parser wrapped by another parser. +-- +-- >>> runParser (wrapped ($(char '"')) (many (satisfy (/= '"')))) "\"wrapped\"" +-- OK "wrapped" "" +wrapped :: Parser e () -> Parser e a -> Parser e a +wrapped x p = bracketed x x p +{-# INLINE wrapped #-} diff --git a/src/MarkupParse/Patch.hs b/src/MarkupParse/Patch.hs deleted file mode 100644 index a240ec9..0000000 --- a/src/MarkupParse/Patch.hs +++ /dev/null @@ -1,100 +0,0 @@ --- | A patch function for . -module MarkupParse.Patch - ( patch, - goldenPatch, - showPatch, - ) -where - -import Control.Category ((>>>)) -import Data.Foldable -import Data.Function -import Data.Maybe -import Data.TreeDiff -import Data.TreeDiff.OMap qualified as O -import GHC.Exts -import Test.Tasty (TestTree) -import Test.Tasty.Golden.Advanced (goldenTest) -import Prelude - --- $setup --- >>> :set -XOverloadedStrings --- >>> import MarkupParse.Patch - --- | Compare a file with a round-trip transformation. -goldenPatch :: (ToExpr a) => (FilePath -> IO a) -> (a -> a) -> FilePath -> TestTree -goldenPatch f testf fp = - goldenTest - fp - (f fp) - (testf <$> f fp) - (\expected actual -> pure (show . ansiWlEditExpr <$> patch expected actual)) - (\_ -> pure ()) - -isUnchangedList :: [Edit EditExpr] -> Bool -isUnchangedList xs = all isCpy xs && all isUnchangedExpr (mapMaybe cpy xs) - -isCpy :: Edit a -> Bool -isCpy (Cpy _) = True -isCpy _ = False - -cpy :: Edit a -> Maybe a -cpy (Cpy a) = Just a -cpy _ = Nothing - -isUnchangedEdit :: Edit EditExpr -> Bool -isUnchangedEdit (Cpy e) = isUnchangedExpr e -isUnchangedEdit _ = False - -isUnchangedExpr :: EditExpr -> Bool -isUnchangedExpr e = isUnchangedList $ getList e - -getList :: EditExpr -> [Edit EditExpr] -getList (EditApp _ xs) = xs -getList (EditRec _ m) = snd <$> O.toList m -getList (EditLst xs) = xs -getList (EditExp _) = [] - -filterChangedExprs :: EditExpr -> Maybe EditExpr -filterChangedExprs (EditApp n xs) = - case filter (not . isUnchangedEdit) (filterChangedEdits xs) of - [] -> Nothing - xs' -> Just $ EditApp n xs' -filterChangedExprs (EditRec n m) = - case filterChangedEditMap (O.fromList $ filter (not . isUnchangedEdit . snd) (O.toList m)) of - Nothing -> Nothing - Just m' -> Just (EditRec n m') -filterChangedExprs (EditLst xs) = - case filter (not . isUnchangedEdit) (filterChangedEdits xs) of - [] -> Nothing - xs' -> Just (EditLst xs') -filterChangedExprs (EditExp _) = Nothing - -filterChangedEdit :: Edit EditExpr -> Maybe (Edit EditExpr) -filterChangedEdit (Cpy a) = Cpy <$> filterChangedExprs a -filterChangedEdit x = Just x - -filterChangedEdit' :: (f, Edit EditExpr) -> Maybe (f, Edit EditExpr) -filterChangedEdit' (f, e) = (f,) <$> filterChangedEdit e - -filterChangedEdits :: [Edit EditExpr] -> [Edit EditExpr] -filterChangedEdits xs = mapMaybe filterChangedEdit xs - -filterChangedEditMap :: O.OMap FieldName (Edit EditExpr) -> Maybe (O.OMap FieldName (Edit EditExpr)) -filterChangedEditMap m = case xs' of - [] -> Nothing - xs'' -> Just $ O.fromList xs'' - where - xs = O.toList m - xs' = mapMaybe filterChangedEdit' xs - --- | 'ediff' with unchanged sections filtered out --- --- >>> showPatch $ patch [1, 2, 3, 5] [0, 1, 2, 4, 6] --- "[+0, -3, +4, -5, +6]" -patch :: (ToExpr a) => a -> a -> Maybe (Edit EditExpr) -patch m m' = filterChangedEdit $ ediff m m' - --- | Create a String representation of a patch. -showPatch :: Maybe (Edit EditExpr) -> String -showPatch p = p & maybe mempty (ansiWlEditExpr >>> show)