{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.BCP47
( BCP47
, inits
, mkLanguage
, mkLocalized
, fromText
, toText
, toSubtags
, ISO639_1
, language
, languageToText
, languageFromText
, LanguageExtension
, extendedLanguageSubtags
, languageExtensionToText
, languageExtensionFromText
, Script
, script
, scriptToText
, scriptFromText
, Country
, region
, regionToText
, regionFromText
, Variant
, variants
, variantToText
, variantFromText
, Extension
, extensions
, extensionToText
, extensionFromText
, PrivateUse
, privateUse
, privateUseToText
, privateUseFromText
, en
, es
, sw
, enGB
, enUS
, enTJP
, enGBTJP
)
where
import Control.Applicative ((<|>))
import Control.Monad (MonadPlus)
import Country (Country)
import Country.Identifier
(unitedKingdomOfGreatBritainAndNorthernIreland, unitedStatesOfAmerica)
import Data.Aeson
import Data.BCP47.Internal.Arbitrary
(Arbitrary, arbitrary, choose, elements, listOf, vectorOf)
import Data.BCP47.Internal.Extension
import Data.BCP47.Internal.Language
import Data.BCP47.Internal.LanguageExtension
import Data.BCP47.Internal.PrivateUse
import Data.BCP47.Internal.Region
import Data.BCP47.Internal.Script
import Data.BCP47.Internal.Subtags
import Data.BCP47.Internal.Variant
import Data.Bifunctor (first)
import Data.Foldable (toList)
import Data.LanguageCodes (ISO639_1(EN, ES, SW))
import qualified Data.List as List
import Data.Maybe (mapMaybe)
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text, pack, unpack)
import qualified Data.Text as T
import Data.Void (Void)
import Text.Megaparsec (Parsec, eof, hidden, many, optional, parse, try)
import Text.Megaparsec.Char (char)
import Text.Megaparsec.Error (errorBundlePretty)
data BCP47
= BCP47
{ language :: ISO639_1
, subtags :: Set Subtags
}
deriving (Eq, Ord)
instance Arbitrary BCP47 where
arbitrary = BCP47 <$> elements [EN, ES] <*> specs
where
oneOrNone f = choose (0, 1) >>= (`vectorOf` (f <$> arbitrary))
manyOf f = listOf (f <$> arbitrary)
regions = [minBound .. maxBound]
specs = Set.fromList . mconcat <$> sequenceA
[ manyOf SpecifyLanguageExtension
, oneOrNone SpecifyScript
, choose (0, 1) >>= (`vectorOf` (elements $ SpecifyRegion <$> regions))
, manyOf SpecifyVariant
, manyOf SpecifyExtension
, oneOrNone SpecifyPrivateUse
]
instance Show BCP47 where
show = T.unpack . toText
instance Read BCP47 where
readsPrec _ s = case fromText $ T.pack s of
Left _ -> []
Right b -> [(b, "")]
instance ToJSON BCP47 where
toEncoding = toEncoding . toText
toJSON = toJSON . toText
instance FromJSON BCP47 where
parseJSON = withText "BCP47" $ either (fail . unpack) pure . fromText
toText :: BCP47 -> Text
toText b = T.intercalate "-" $ mconcat
[ [languageToText $ language b]
, mapMaybe fromSubtags . Set.toList $ subtags b
, if Set.null (privateUse b) then [] else ["x"]
, map privateUseToText . Set.toList $ privateUse b
]
where
fromSubtags = \case
SpecifyLanguageExtension x -> Just $ languageExtensionToText x
SpecifyScript x -> Just $ scriptToText x
SpecifyRegion x -> Just $ regionToText x
SpecifyVariant x -> Just $ variantToText x
SpecifyExtension x -> Just $ extensionToText x
SpecifyPrivateUse _ -> Nothing
extendedLanguageSubtags :: BCP47 -> Set LanguageExtension
extendedLanguageSubtags = asSet $ \case
SpecifyLanguageExtension x -> Just x
_otherwise -> Nothing
script :: BCP47 -> Maybe Script
script = headMay . mapMaybe f . Set.toList . subtags
where
f = \case
SpecifyScript x -> Just x
_otherwise -> Nothing
region :: BCP47 -> Maybe Country
region = headMay . mapMaybe f . Set.toList . subtags
where
f = \case
SpecifyRegion x -> Just x
_otherwise -> Nothing
variants :: BCP47 -> Set Variant
variants = asSet $ \case
SpecifyVariant x -> Just x
_otherwise -> Nothing
extensions :: BCP47 -> Set Extension
extensions = asSet $ \case
SpecifyExtension x -> Just x
_otherwise -> Nothing
privateUse :: BCP47 -> Set PrivateUse
privateUse = asSet $ \case
SpecifyPrivateUse x -> Just x
_otherwise -> Nothing
asSet :: Ord a => (Subtags -> Maybe a) -> BCP47 -> Set a
asSet f = Set.fromList . mapMaybe f . Set.toList . subtags
headMay :: [x] -> Maybe x
headMay [] = Nothing
headMay (x : _) = Just x
toSubtags :: BCP47 -> [Subtags]
toSubtags tag = toList $ subtags tag
inits :: BCP47 -> [BCP47]
inits tag =
map (BCP47 (language tag) . Set.fromList) . List.inits $ toSubtags tag
mkLanguage :: ISO639_1 -> BCP47
mkLanguage lang = BCP47 lang mempty
mkLocalized :: ISO639_1 -> Country -> BCP47
mkLocalized lang locale = BCP47 lang . Set.singleton $ SpecifyRegion locale
fromText :: Text -> Either Text BCP47
fromText = first (pack . errorBundlePretty) . parse parser "fromText"
parser :: Parsec Void Text BCP47
parser = BCP47 <$> languageP <*> subtagsP <* hidden eof
where
subtagsP = mconcat <$> sequenceA
[ manyAsSet SpecifyLanguageExtension (try (char '-' *> languageExtensionP))
, maybe mempty (Set.singleton . SpecifyScript)
<$> (try (optional $ char '-' *> scriptP) <|> pure Nothing)
, maybe mempty (Set.singleton . SpecifyRegion)
<$> (try (optional $ char '-' *> regionP) <|> pure Nothing)
, manyAsSet SpecifyVariant (try (char '-' *> variantP))
, manyAsSet SpecifyExtension (try (char '-' *> extensionP))
, Set.map SpecifyPrivateUse <$> (try (char '-' *> privateUseP) <|> mempty)
]
manyAsSet :: (Ord b, MonadPlus m) => (a -> b) -> m a -> m (Set b)
manyAsSet f p = Set.fromList . map f <$> many p
es :: BCP47
es = mkLanguage ES
en :: BCP47
en = mkLanguage EN
sw :: BCP47
sw = mkLanguage SW
enGB :: BCP47
enGB = mkLocalized EN unitedKingdomOfGreatBritainAndNorthernIreland
enUS :: BCP47
enUS = mkLocalized EN unitedStatesOfAmerica
enTJP :: BCP47
enTJP = en
{ subtags = Set.insert (SpecifyExtension (Extension (pack "t-jp")))
$ subtags en
}
enGBTJP :: BCP47
enGBTJP = enGB
{ subtags = Set.insert (SpecifyExtension (Extension (pack "t-jp")))
$ subtags enGB
}