In my experience LPeg does a good job of being comprehensible for most reasonably-sized use cases. It can even parse the grammar of Lua itself. I've personally had not that much trouble translating BNF grammars to PEG, though the result is typically longer. It still satisfies the goal of separating parsing logic from data logic, which is a big step.
For more info on the issues with PEGs -- and a paper showing how to correctly translate general LL(1p) grammars to PEG -- see:
LPeg also has what it calls match-time captures (http://www.inf.puc-rio.br/~roberto/lpeg/#matchtime), which can be used to parse non-context free grammars like common TLV (tag, length, value) formats. For example, I've written a pure LPeg parser for parsing PKIX objects like X.509 certificates. Example edited code snippets with some high-level and low-level bits:
-- returns DER object pattern that captures inner value
local function Cobject(identifier, patt)
local match
if lpeg.type(patt) then
match = function (s)
return lpeg.match(patt * -P(1), s)
end
elseif type(patt) == "function" then
match = patt
elseif patt == nil then
match = function (s)
return s
end
else
error(sformat("expected function, pattern or nil, got %s", type(patt)), 2)
end
return Cmt(identifier, function (s, pos)
local n, pos = assert(unpacklength(s, pos))
local s1 = s:sub(pos, pos + n - 1)
pos = pos + n
return (function (pos, v, ...)
if v then
return pos, v, ...
else
return false
end
end)(pos, match(s1))
end)
end
local BIT_STRING = Cobject(P"\x03", function (s)
local pad = s:byte(1) -- first octet is number of padding bits
assert(pad == 0, "BIT STRING not octet aligned") -- we only support DER
return s:sub(2)
end)
local IA5String = Cobject(P"\x16")
local OID = function (oid)
if oid then
local s = packoid(pkix.txt2oid(oid))
return P(sformat("\x06%s%s", packlength(#s), s)) * Cc(oid)
else
return Cobject(P"\x06", function (s)
return assert(unpackoid(s))
end)
end
end
local SEQUENCE = function (patt)
return Cobject(P"\x30", patt)
end
local TBSCertificate = SEQUENCE(Ct(
Cg(Version, "version") *
Cg(CertificateSerialNumber, "serialNumber") *
Cg(AlgorithmIdentifier, "signature") *
Cg(Name, "issuer") *
Cg(Validity, "validity") *
Cg(Name, "subject") *
Cg(SubjectPublicKeyInfo, "subjectPublicKeyInfo") *
Cg(UniqueIdentifier(1), "issuerUniqueID")^-1 *
Cg(UniqueIdentifier(2), "subjectUniqueID")^-1 *
Cg(Extensions, "extensions")^-1 *
Cg(P(1)^1, "trash")^-1
))
local Signature = BIT_STRING
local Certificate = SEQUENCE(Ct(
Cg(TBSCertificate, "tbsCertificate") *
Cg(AlgorithmIdentifier, "signatureAlgorithm") *
Cg(Signature, "signature")
))
For more info on the issues with PEGs -- and a paper showing how to correctly translate general LL(1p) grammars to PEG -- see:
https://jeffreykegler.github.io/Ocean-of-Awareness-blog/indi...