forked from jgm/pandoc-citeproc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandoc-citeproc.hs
171 lines (154 loc) · 6.18 KB
/
pandoc-citeproc.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
{-# LANGUAGE CPP #-}
module Main where
import Text.CSL.Input.Bibutils (readBiblioString, BibFormat(..))
import Text.CSL.Reference (Reference(refId), Literal(..))
import Data.List (group, sort)
import Data.Char (chr, toLower)
import Data.Yaml.Builder (toByteString)
import Control.Applicative ((<|>), many)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Char8 as B8
import Data.Attoparsec.ByteString.Char8 as Attoparsec
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Aeson.Encode.Pretty (encodePretty', Config(..), Indent(Spaces), NumberFormat(Generic))
import System.Console.GetOpt
import Control.Monad
import System.IO
import System.FilePath (takeExtension)
import System.Environment (getArgs)
import System.Exit
import Data.Version (showVersion)
import Paths_pandoc_citeproc (version)
import Text.CSL.Pandoc (processCites')
import Text.Pandoc.JSON hiding (Format)
import Text.Pandoc.Walk
import qualified Text.Pandoc.UTF8 as UTF8
main :: IO ()
main = do
argv <- getArgs
let (flags, args, errs) = getOpt Permute options argv
let header = "Usage: pandoc-citeproc [options] [file..]"
unless (null errs) $ do
UTF8.hPutStrLn stderr $ usageInfo (unlines $ errs ++ [header]) options
exitWith $ ExitFailure 1
when (Version `elem` flags) $ do
UTF8.putStrLn $ "pandoc-citeproc " ++ showVersion version
exitWith ExitSuccess
when (Help `elem` flags) $ do
UTF8.putStrLn $ usageInfo header options
exitWith ExitSuccess
if Bib2YAML `elem` flags || Bib2JSON `elem` flags
then do
let mbformat = case [f | Format f <- flags] of
[x] -> readFormat x
_ -> Nothing
bibformat <- case mbformat <|>
msum (map formatFromExtension args) of
Just f -> return f
Nothing -> do
UTF8.hPutStrLn stderr $ usageInfo
("Unknown format\n" ++ header) options
exitWith $ ExitFailure 3
bibstring <- case args of
[] -> UTF8.getContents
xs -> mconcat <$> mapM UTF8.readFile xs
readBiblioString bibformat bibstring >>=
warnDuplicateKeys >>=
if Bib2YAML `elem` flags
then outputYamlBlock .
B8.intercalate (B.singleton 10) .
map (unescapeTags . toByteString . (:[]))
else B8.putStrLn . unescapeUnicode . B.concat . BL.toChunks .
encodePretty' Config{ confIndent = Spaces 2
, confCompare = compare
, confNumFormat = Generic }
else toJSONFilter doCites
formatFromExtension :: FilePath -> Maybe BibFormat
formatFromExtension = readFormat . dropWhile (=='.') . takeExtension
readFormat :: String -> Maybe BibFormat
readFormat = go . map toLower
where go "biblatex" = Just BibLatex
go "bib" = Just BibLatex
go "bibtex" = Just Bibtex
go "json" = Just Json
go "yaml" = Just Yaml
#ifdef USE_BIBUTILS
go "ris" = Just Ris
go "endnote" = Just Endnote
go "enl" = Just Endnote
go "endnotexml" = Just EndnotXml
go "xml" = Just EndnotXml
go "wos" = Just Isi
go "isi" = Just Isi
go "medline" = Just Medline
go "copac" = Just Copac
go "mods" = Just Mods
#endif
go _ = Nothing
doCites :: Pandoc -> IO Pandoc
doCites doc = do
doc' <- processCites' doc
let warnings = query findWarnings doc'
mapM_ (UTF8.hPutStrLn stderr) warnings
return doc'
findWarnings :: Inline -> [String]
findWarnings (Span (_,["citeproc-not-found"],[("data-reference-id",ref)]) _) =
["pandoc-citeproc: reference " ++ ref ++ " not found" | ref /= "*"]
findWarnings (Span (_,["citeproc-no-output"],_) _) =
["pandoc-citeproc: reference with no printed form"]
findWarnings _ = []
data Option =
Help | Version | Convert | Format String | Bib2YAML | Bib2JSON
deriving (Ord, Eq, Show)
options :: [OptDescr Option]
options =
[ Option ['h'] ["help"] (NoArg Help) "show usage information"
, Option ['V'] ["version"] (NoArg Version) "show program version"
, Option ['y'] ["bib2yaml"] (NoArg Bib2YAML) "convert bibliography to YAML"
, Option ['j'] ["bib2json"] (NoArg Bib2JSON) "convert bibliography to JSON"
, Option ['f'] ["format"] (ReqArg Format "FORMAT") "bibliography format"
]
warnDuplicateKeys :: [Reference] -> IO [Reference]
warnDuplicateKeys refs = mapM_ warnDup dupKeys >> return refs
where warnDup k = UTF8.hPutStrLn stderr $ "biblio2yaml: duplicate key " ++ k
allKeys = map (unLiteral . refId) refs
dupKeys = [x | (x:_:_) <- group (sort allKeys)]
outputYamlBlock :: B.ByteString -> IO ()
outputYamlBlock contents = do
UTF8.putStrLn "---\nreferences:"
B.putStr contents
UTF8.putStrLn "..."
-- turn
-- id: ! "\u043F\u0443\u043D\u043A\u04423"
-- into
-- id: пункт3
unescapeTags :: B.ByteString -> B.ByteString
unescapeTags bs = case parseOnly (many $ tag <|> other) bs of
Left e -> error e
Right r -> B.concat r
unescapeUnicode :: B.ByteString -> B.ByteString
unescapeUnicode bs = case parseOnly (many other) bs of
Left e -> error e
Right r -> B.concat r
tag :: Attoparsec.Parser B.ByteString
tag = do
_ <- string $ B8.pack ": ! "
c <- char '\'' <|> char '"'
cs <- manyTill (escaped c <|> other) (char c)
return $ B8.pack ": " <> B8.singleton c <> B.concat cs <> B8.singleton c
escaped :: Char -> Attoparsec.Parser B.ByteString
escaped c = string $ B8.pack ['\\',c]
other :: Attoparsec.Parser B.ByteString
other = uchar <|> Attoparsec.takeWhile1 notspecial <|> regchar
where notspecial = not . inClass ":!\\\"'"
uchar :: Attoparsec.Parser B.ByteString
uchar = do
_ <- char '\\'
num <- (2 <$ char 'x') <|> (4 <$ char 'u') <|> (8 <$ char 'U')
cs <- count num $ satisfy $ inClass "0-9a-fA-F"
let n = read ('0':'x':cs)
return $ encodeUtf8 $ T.pack [chr n]
regchar :: Attoparsec.Parser B.ByteString
regchar = B8.singleton <$> anyChar