• balsoft@lemmy.ml
    link
    fedilink
    arrow-up
    16
    ·
    edit-2
    2 days ago

    I decided to write it myself for fun. I decided that “From Scratch” means:

    • No parser libraries (parsec/happy/etc)
    • No using read from Prelude
    • No hacky meta-parsing

    Here is what I came up with (using my favourite parsing method: parser combinators):

    import Control.Monad ((>=>), replicateM)
    import Control.Applicative (Alternative (..), asum, optional)
    import Data.Maybe (fromMaybe)
    import Data.Functor (($>))
    import Data.List (singleton)
    import Data.Map (Map, fromList)
    import Data.Bifunctor (first, second)
    import Data.Char (toLower, chr)
    
    newtype Parser i o = Parser { parse :: i -> Maybe (i, o) } deriving (Functor)
    
    instance Applicative (Parser i) where
      pure a = Parser $ \i -> Just (i, a)
      a <*> b = Parser $ parse a >=> \(i, f) -> second f <$> parse b i
    instance Alternative (Parser i) where
      empty = Parser $ const Nothing
      a <|> b = Parser $ \i -> parse a i <|> parse b i
    instance Monad (Parser i) where
      a >>= f = Parser $ parse a >=> \(i, b) -> parse (f b) i
    instance Semigroup o => Semigroup (Parser i o) where
      a <> b = (<>) <$> a <*> b
    instance Monoid o => Monoid (Parser i o) where
      mempty = pure mempty
    
    type SParser = Parser String
    
    charIf :: (a -> Bool) -> Parser [a] a
    charIf cond = Parser $ \i -> case i of
      (x:xs) | cond x -> Just (xs, x)
      _ -> Nothing
    
    char :: Eq a => a -> Parser [a] a
    char c = charIf (== c)
    
    one :: Parser i a -> Parser i [a]
    one = fmap singleton
    
    str :: Eq a => [a] -> Parser [a] [a]
    str = mapM char
    
    sepBy :: Parser i a -> Parser i b -> Parser i [a]
    sepBy a b = (one a <> many (b *> a)) <|> mempty
    
    data Decimal = Decimal { mantissa :: Integer, exponent :: Int } deriving Show
    
    data JSON = Object (Map String JSON) | Array [JSON] | Bool Bool | Number Decimal | String String | Null deriving Show
    
    whitespace :: SParser String
    whitespace = many $ asum $ map char [' ', '\t', '\r', '\n']
    
    digit :: Int -> SParser Int
    digit base = asum $ take base [asum [char c, char (toLower c)] $> n | (c, n) <- zip (['0'..'9'] <> ['A'..'Z']) [0..]]
    
    collectDigits :: Int -> [Int] -> Integer
    collectDigits base = foldl (\acc x -> acc * fromIntegral base + fromIntegral x) 0
    
    unsignedInteger :: SParser Integer
    unsignedInteger = collectDigits 10 <$> some (digit 10)
    
    integer :: SParser Integer
    integer = asum [char '-' $> (-1), char '+' $> 1, str "" $> 1] >>= \sign -> (sign *) <$> unsignedInteger
    
    -- This is the ceil of the log10 and also very inefficient
    log10 :: Integer -> Int
    log10 n
      | n < 1 = 0
      | otherwise = 1 + log10 (n `div` 10)
    
    jsonNumber :: SParser Decimal
    jsonNumber = do
      whole <- integer
      fraction <- fromMaybe 0 <$> optional (str "." *> unsignedInteger)
      e <- fromIntegral . fromMaybe 0 <$> optional ((str "E" <|> str "e") *> integer)
      pure $ Decimal (whole * 10^log10 fraction + signum whole * fraction) (e - log10 fraction)
    
    escapeChar :: SParser Char
    escapeChar = char '\\'
      *> asum [
        str "'" $> '\'',
        str "\"" $> '"',
        str "\\" $> '\\',
        str "n" $> '\n',
        str "r" $> '\r',
        str "t" $> '\t',
        str "b" $> '\b',
        str "f" $> '\f',
        str "u" *> (chr . fromIntegral . collectDigits 16 <$> replicateM 4 (digit 16))
      ]
    
    jsonString :: SParser String
    jsonString =
      char '"'
      *> many (asum [charIf (\c -> c /= '"' && c /= '\\'), escapeChar])
      <* char '"'
    
    jsonObjectPair :: SParser (String, JSON)
    jsonObjectPair = (,) <$> (whitespace *> jsonString <* whitespace <* char ':') <*> json
    
    json :: SParser JSON
    json =
      whitespace *>
        asum [
          Object <$> fromList <$> (char '{' *> jsonObjectPair `sepBy` char ',' <* char '}'),
          Array <$> (char '[' *> json `sepBy` char ',' <* char ']'),
          Bool <$> asum [str "true" $> True, str "false" $> False],
          Number <$> jsonNumber,
          String <$> jsonString,
          Null <$ str "null"
        ]
        <* whitespace
    
    main :: IO ()
    main = interact $ show . parse json
    
    

    This parses numbers as my own weird Decimal type, in order to preserve all information (converting to Double is lossy). I didn’t bother implementing any methods on the Decimal, because there are other libraries that do that and we’re just writing a parser.

    It’s also slow as hell but hey, that’s naive implementations for you!

    It ended up being 113 lines. I think I could reduce it a bit more if I was willing to sacrifice readability and/or just inline things instead of implementing stdlib typeclasses.