From 87e0af97ccdf182bd5c14ca851189d77b37241be Mon Sep 17 00:00:00 2001 From: Yuri Tatishchev Date: Sun, 3 May 2026 01:06:37 -0700 Subject: [PATCH] add proposed grammar doc --- doc/fwl_grammar.md | 697 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 697 insertions(+) create mode 100644 doc/fwl_grammar.md diff --git a/doc/fwl_grammar.md b/doc/fwl_grammar.md new file mode 100644 index 0000000..893434d --- /dev/null +++ b/doc/fwl_grammar.md @@ -0,0 +1,697 @@ +# FWL Grammar Specification (MVP) + +## Overview + +FWL is a typed, functional DSL that compiles to nftables JSON. Programs are +sequences of top-level declarations. The grammar uses explicit braces and +semicolons throughout — no indentation sensitivity. Types are mandatory on all +top-level declarations for MVP; inference is deferred to a later version. + +The target nftables table is a single table named `fwl` by default +(configurable via a top-level `config` declaration). Both filter and NAT +policies compile into this one table. + +--- + +## Notation + +``` +::= production +| alternative +{ x } zero or more repetitions of x +[ x ] optional x +``` + +String terminals are written in `"double quotes"`. Regex-like character classes +use `[a-z]`, etc. + +--- + +## Top-Level Structure + +```ebnf +program ::= { config } { decl } + +config ::= "config" "{" { configProp ";" } "}" + +configProp ::= "table" "=" stringLit +``` + +Every non-`config` declaration is terminated by `";"`. + +--- + +## Declarations + +```ebnf +decl ::= interfaceDecl + | zoneDecl + | importDecl + | letDecl + | patternDecl + | flowDecl + | ruleDecl + | policyDecl +``` + +### Interface + +```ebnf +interfaceDecl ::= "interface" ident ":" ifaceKind "{" { ifaceProp ";" } "}" ";" + +ifaceKind ::= "WAN" | "LAN" | "WireGuard" | ident + +ifaceProp ::= "dynamic" + | "cidr4" "=" cidrSet + | "cidr6" "=" cidrSet + +cidrSet ::= "{" cidrLit { "," cidrLit } "}" +``` + +### Zone + +```ebnf +zoneDecl ::= "zone" ident "=" "{" ident { "," ident } "}" ";" +``` + +### Import + +```ebnf +importDecl ::= "import" ident ":" type "from" stringLit ";" +``` + +### Let + +```ebnf +letDecl ::= "let" ident ":" type "=" expr ";" +``` + +### Pattern + +Named patterns are first-class; they may appear anywhere a structural pattern +appears, including nested inside constructor patterns, `Frame(...)`, and other +named patterns. + +```ebnf +patternDecl ::= "pattern" ident ":" type "=" packetPat ";" +``` + +Named patterns are resolved during type-checking, not by macro-expanding at +parse time. Recursive named patterns are a type error. + +### Flow + +```ebnf +flowDecl ::= "flow" ident ":" "FlowPattern" "=" flowExpr ";" + +flowExpr ::= seqExpr + +seqExpr ::= flowAtom + | flowAtom "." seqExpr + | flowAtom "." seqExpr "within" duration +``` + +A `within` clause applies to the entire sequence to its left and binds most +tightly to the innermost `.`. For MVP, `within` is only valid at the top level +of a `flowExpr`. + +```ebnf +flowAtom ::= ident +``` + +### Rule + +Rules are reusable, named packet-processing functions. They receive a `Frame` +and return an `Action` (possibly via effects). + +```ebnf +ruleDecl ::= "rule" ident ":" type "=" lambdaExpr ";" + +lambdaExpr ::= "\" ident "->" expr +``` + +A `rule` body must be a lambda at the top level for MVP. + +### Policy + +Policies are the entry points tied to nftables hooks. A policy body is a +bare arm-block (no `case ... of` wrapper; the matched value is always the +bound `Frame`-like parameter of the policy). + +```ebnf +policyDecl ::= "policy" ident ":" type + "on" "{" hookSpec "}" + "=" armBlock ";" + +hookSpec ::= hookProp "," hookProp "," hookProp + | hookProp "," hookProp "," hookProp "," -- trailing comma OK + +hookProp ::= "hook" "=" hook + | "table" "=" tableName + | "priority" "=" priority + +hook ::= "Input" | "Forward" | "Output" | "Prerouting" | "Postrouting" +tableName ::= "Filter" | "NAT" +priority ::= "Filter" | "DstNat" | "SrcNat" | "Mangle" | intLit +``` + +--- + +## Arm Blocks + +Used uniformly inside `rule` bodies (via `case`) and `policy` bodies. + +```ebnf +armBlock ::= "{" { arm } "}" + +arm ::= "|" pat guardOpt "->" expr ";" + +guardOpt ::= ε + | "if" expr +``` + +--- + +## Patterns + +Patterns describe packet structure and bind names. All membership/comparison +predicates are guards (see § Expressions), not patterns — except for field +constraints inside record patterns, which are written as field predicates. + +```ebnf +pat ::= "_" -- wildcard + | ident -- variable binding + | namedPat -- first-class named pattern + | ctorPat -- e.g., IPv4(ip, ...) + | recordPat -- e.g., tcp { dport = :22 } + | tuplePat -- e.g., (udp, payload) + | framePat -- Frame(path, inner) + | bytePat -- e.g., [0x01 _*] + +-- A named pattern reference; resolved at type-check time. +-- Binds NO additional names itself (names are bound in the pattern's definition). +namedPat ::= ident -- must refer to a declared pattern + +ctorPat ::= ident "(" patList ")" +patList ::= pat { "," pat } + +recordPat ::= ident "{" [ fieldPatList ] "}" +fieldPatList::= fieldPat { "," fieldPat } +fieldPat ::= ident "=" literal -- equality constraint + | ident -- bind field to same-named variable + | ident "as" ident -- bind field to fresh variable + +tuplePat ::= "(" pat { "," pat } ")" + +-- Frame pattern: optional interface-path specifier, then inner packet pattern. +framePat ::= "Frame" "(" [ pathPat "," ] pat ")" + +-- Interface-path: source, destination, or both. +pathPat ::= endpointPat + | endpointPat "->" endpointPat + +endpointPat ::= "_" + | ident -- exact interface name + | ident "in" ident -- interface is member of zone +``` + +**Note on `∈`:** the parser accepts both the Unicode `∈` and the ASCII keyword +`in` as synonyms in all positions. The AST stores a single `MemberOf` +constructor. + +### Byte Patterns + +Used in `pattern` declarations for payload matching. + +```ebnf +bytePat ::= "[" { byteElem } "]" +byteElem ::= hexByte -- e.g., 0x01 + | "_" -- any single byte + | "_*" -- zero or more bytes +``` + +--- + +## Expressions + +```ebnf +expr ::= letExpr + | ifExpr + | doExpr + | caseExpr + | infixExpr + +letExpr ::= "let" ident "=" expr "in" expr + +ifExpr ::= "if" expr "then" expr "else" expr + +doExpr ::= "do" "{" doStmt { ";" doStmt } "}" +doStmt ::= ident "<-" expr -- effectful bind + | expr -- effectful sequence + +caseExpr ::= "case" expr "of" armBlock + +infixExpr ::= prefixExpr { infixOp prefixExpr } + +prefixExpr ::= appExpr + | "!" prefixExpr + +appExpr ::= atom { atom } -- function application + +atom ::= ident + | qualName + | literal + | tupleLit + | setLit + | mapLit + | performExpr + | "(" expr ")" + +performExpr ::= "perform" qualName "(" [ argList ] ")" + +tupleLit ::= "(" expr "," expr { "," expr } ")" +setLit ::= "{" [ expr { "," expr } ] "}" +mapLit ::= "{" mapEntry { "," mapEntry } "}" +mapEntry ::= expr "->" expr + +argList ::= expr { "," expr } + +qualName ::= ident { "." ident } + +infixOp ::= "&&" | "||" + | "==" | "!=" | "<" | "<=" | ">" | ">=" + | "in" | "∈" -- set/zone membership + | "++" -- string/list concat + | ">>" -- effect sequencing + | ">>=" -- monadic bind +``` + +**Operator precedence** (high to low): + +| Level | Operators | Assoc | +|-------|-----------------------|-------| +| 7 | application | left | +| 6 | `==` `!=` `<` `<=` `>` `>=` `in` `∈` | none | +| 5 | `&&` | right | +| 4 | `\|\|` | right | +| 3 | `++` | right | +| 2 | `>>=` | left | +| 1 | `>>` | left | + +--- + +## Types + +```ebnf +type ::= funType + +funType ::= effectType + | effectType "->" funType + +effectType ::= "<" [ ident { "," ident } ] ">" simpleType + | simpleType + +simpleType ::= ident [ "<" typeList ">" ] -- parameterised type + | "(" type { "," type } ")" -- tuple type + | "(" type ")" -- grouped + +typeList ::= type { "," type } +``` + +Effect rows use angle brackets: ` Action`. + +For MVP, effect annotations are required on `rule` declarations that contain +`perform` expressions and are optional on `policy` declarations. + +--- + +## Actions + +`Action` is a built-in type. Its constructors are: + +```ebnf +action ::= "Allow" + | "Drop" + | "Continue" + | "Masquerade" + | "DNAT" "(" expr ")" + | "DNATMap" "(" expr ")" + | "Log" "(" logLevel "," expr ")" + +logLevel ::= "Info" | "Warn" | "Error" +``` + +`Continue` is a legal action value and compiles to nothing (a no-op pass- +through). It is used to make exhaustive arm blocks typecheck when earlier arms +handle all interesting cases. A policy arm that returns `Continue` as the last +arm is a type error (unreachable or missing terminator); a `rule` arm may +return `Continue` to signal "pass control back to the caller." + +--- + +## Effects + +The built-in effects available for MVP are: + +| Effect | Operations | +|------------|---------------------------------------------------| +| `FlowMatch`| `FlowMatch.check(flowId, pattern) : MatchResult` | +| `Log` | `Log.emit(level, msg) : ()` | +| `FIB` | `FIB.daddrLocal(ip) : Bool` | + +`MatchResult` constructors: `Matched`, `Unmatched`. + +Additional effects may be declared by the user in a future version. + +--- + +## Literals + +```ebnf +literal ::= intLit + | stringLit + | boolLit + | ipv4Lit + | ipv6Lit + | cidrLit + | portLit + | durationLit + | hexByte + +intLit ::= ["-"] digit+ +stringLit ::= '"' { strChar } '"' +boolLit ::= "true" | "false" +ipv4Lit ::= octet "." octet "." octet "." octet +ipv6Lit ::= -- standard IPv6 notation including "::" compression +cidrLit ::= (ipv4Lit | ipv6Lit) "/" digit+ +portLit ::= ":" digit+ -- e.g., :22, :8080 +durationLit ::= digit+ timeUnit +timeUnit ::= "s" | "ms" | "m" | "h" +hexByte ::= "0x" hexDigit hexDigit +octet ::= digit+ -- 0-255 +``` + +--- + +## Lexical Rules + +```ebnf +ident ::= letter { letter | digit | "_" } + -- must not be a reserved word + +reserved ::= "config" | "interface" | "zone" | "import" | "let" | "in" + | "pattern" | "flow" | "rule" | "policy" | "on" + | "case" | "of" | "if" | "then" | "else" | "do" + | "perform" | "within" | "as" + | "WAN" | "LAN" | "WireGuard" + | "Input" | "Forward" | "Output" | "Prerouting" | "Postrouting" + | "Filter" | "NAT" | "Mangle" | "DstNat" | "SrcNat" + | "Allow" | "Drop" | "Continue" | "Masquerade" | "DNAT" + | "DNATMap" | "Log" | "Info" | "Warn" | "Error" + | "Matched" | "Unmatched" + | "dynamic" | "cidr4" | "cidr6" | "table" | "hook" | "priority" + | "true" | "false" + | "FlowPattern" | "Frame" + +comment ::= "--" { any char except newline } + | "{-" { any char } "-}" + +whitespace ::= space | tab | newline | comment +``` + +Identifiers beginning with an uppercase letter are treated as constructor +names by convention; those beginning with lowercase are variables. The lexer +does not enforce this — it is a naming convention only, checked during +type-checking. + +--- + +## Resolved Inconsistencies from Proposal + +The following decisions were made to normalize the proposal's syntax: + +| Topic | Proposal state | MVP decision | +|-------|---------------|--------------| +| Interface body | Multiline, no delimiters | Braced block with `;` separators | +| Policy body | `where` with indented arms | `=` followed by arm-block | +| Rule body | `\frame -> case frame of \| ...` | `\ident -> expr`; `case` is a normal expression | +| Policy vs rule | Distinct surface syntax | Policies use a bare arm-block; rules use `case` explicitly | +| `Frame<{}>` | Unclear `<{}>` parameter | Parsed but ignored for MVP; written as `Frame` in practice | +| Named patterns in sub-positions | Unclear | First-class everywhere; resolved at type-check time | +| `∈` operator | Unicode only | Both `∈` and `in` accepted everywhere | +| `Continue` | Unclear semantics | Legal `Action` constructor; compiles to nothing; type error if last arm of a policy block | +| Single nftables table | Not specified | Default table name `fwl`; configurable via `config { table = "name"; }` | +| `handle` syntax | Mentioned but unspecified | Deferred; MVP only has `perform` | +| Effect annotations | Inconsistent (`<>` vs `{}`) | Angle brackets `` everywhere | +| Guard vs pattern membership | Mixed | Structural matching in patterns only; `in`/`∈` in guards only (except `fieldPat`) | + +--- + +## Canonical Examples + +The following examples must all parse under the grammar above. + +### 1. Interface and Zone + +```fwl +interface wan : WAN { dynamic; }; +interface lan : LAN { + cidr4 = { 10.17.1.0/24 }; + cidr6 = { fe80::/10, fd12:3456::/48 }; +}; +interface wg0 : WireGuard {}; + +zone lan_zone = { lan, wg0 }; +``` + +### 2. Import and Let + +```fwl +import rfc1918 : CIDRSet from "builtin:rfc1918"; + +let forwards : Map<(Protocol, Port), (IP, Port)> = { + (tcp, :8080) -> (10.17.1.10, :80), + (tcp, :2222) -> (10.17.1.11, :22) +}; +``` + +### 3. Pattern and Flow + +```fwl +pattern WGInitiation : (UDPHeader, Bytes) = + (udp { length = 156 }, [0x01 _*]); + +pattern WGResponse : (UDPHeader, Bytes) = + (udp { length = 100 }, [0x02 _*]); + +flow WireGuardHandshake : FlowPattern = + WGInitiation . WGResponse within 5s; +``` + +### 4. Rule with Effects + +```fwl +rule blockOutboundWG : Frame -> Action = + \frame -> + case frame of { + | Frame(iif in lan_zone -> wan, IPv4(ip, UDP(udp, payload))) + if matches(WGInitiation, (udp, payload)) -> + case perform FlowMatch.check(flowOf(ip, wg), WireGuardHandshake) of { + | Matched -> + do { + perform Log.emit(Warn, "WG blocked: " ++ show(ip.src)); + Drop + }; + | _ -> Continue; + }; + | _ -> Continue; + }; +``` + +### 5. Filter Policy + +```fwl +policy input : Frame + on { hook = Input, table = Filter, priority = Filter } + = { + | _ if ct.state in { Established, Related } -> Allow; + | Frame(lo, _) -> Allow; + | Frame(_, IPv6(ip6, ICMPv6(_, _))) + if ip6.src in fe80::/10 -> Allow; + | Frame(_, IPv4(_, TCP(tcp, _))) + if tcp.dport == :22 -> Allow; + | Frame(_, IPv4(_, UDP(udp, _))) + if udp.dport == :51944 -> Allow; + | _ -> Drop; + }; +``` + +### 6. NAT Policy + +```fwl +policy nat_prerouting : Frame + on { hook = Prerouting, table = NAT, priority = DstNat } + = { + | Frame(_, IPv4(ip, _)) -> + if perform FIB.daddrLocal(ip.dst) + then DNATMap(forwards) + else Allow; + | _ -> Allow; + }; + +policy nat_postrouting : Frame + on { hook = Postrouting, table = NAT, priority = SrcNat } + = { + | Frame(_ -> wan, IPv4(ip, _)) if ip.src in rfc1918 -> Masquerade; + | _ -> Allow; + }; +``` + +### 7. Forward Policy calling a Rule + +```fwl +policy forward : Frame + on { hook = Forward, table = Filter, priority = Filter } + = { + | _ if ct.state in { Established, Related } -> Allow; + | frame if iif in lan_zone && oif == wan -> blockOutboundWG(frame); + | _ if ct.status == DNAT -> Allow; + | Frame(iif in lan_zone -> wan, _) -> Allow; + | Frame(iif in lan_zone -> lan_zone, _) -> Allow; + | Frame(wan -> lan_zone, IPv4(ip, TCP(tcp, _))) + if (ip.dst, tcp.dport) in forwards -> Allow; + | _ -> Drop; + }; +``` + +--- + +## Haskell AST Sketch + +The following gives the direct mapping from grammar to Haskell types. + +```haskell +-- src/FWL/AST.hs + +data Program = Program [Config] [Decl] + +data Config = Config { configTable :: Maybe String } + +data Decl + = DInterface Name IfaceKind [IfaceProp] + | DZone Name [Name] + | DImport Name Type String + | DLet Name Type Expr + | DPattern Name Type PacketPat + | DFlow Name FlowExpr + | DRule Name Type Expr -- expr must be LamExpr + | DPolicy Name Type PolicyMeta ArmBlock + +data PolicyMeta = PolicyMeta + { pmHook :: Hook + , pmTable :: TableName + , pmPriority :: Priority } + +data Hook = Input | Forward | Output | Prerouting | Postrouting +data TableName= Filter | NAT +data Priority = PFilter | PDstNat | PSrcNat | PMangle | PInt Int + +data IfaceKind = WAN | LAN | WireGuard | IKUser Name +data IfaceProp = IPDynamic | IPCidr4 [CIDR] | IPCidr6 [CIDR] + +-- Patterns + +data Pat + = PWild + | PVar Name + | PNamed Name -- named pattern reference (first-class) + | PCtor Name [Pat] + | PRecord Name [FieldPat] + | PTuple [Pat] + | PFrame (Maybe PathPat) Pat + | PBytes [ByteElem] + +data FieldPat + = FPEq Name Literal + | FPBind Name -- bind field to same-named var + | FPAs Name Name -- bind field to fresh var + +data PathPat = PathPat (Maybe EndpointPat) (Maybe EndpointPat) +data EndpointPat + = EPWild + | EPName Name + | EPMember Name Name -- iif `in` zoneName + +data ByteElem = BEHex Word8 | BEWild | BEWildStar + +-- Flow + +data FlowExpr + = FAtom Name + | FSeq FlowExpr FlowExpr (Maybe Duration) + +type Duration = (Int, TimeUnit) +data TimeUnit = Seconds | Millis | Minutes | Hours + +-- Types + +data Type + = TName Name [Type] + | TTuple [Type] + | TFun Type Type + | TEffect [Name] Type + +-- Expressions + +data Expr + = EVar Name + | EQual [Name] + | ELit Literal + | ELam Name Expr + | EApp Expr Expr + | ECase Expr ArmBlock + | EIf Expr Expr Expr + | EDo [DoStmt] + | ELet Name Expr Expr + | ETuple [Expr] + | ESet [Expr] + | EMap [(Expr, Expr)] + | EPerform [Name] [Expr] -- perform QualName(args) + | EInfix InfixOp Expr Expr + | EPrefix PrefixOp Expr + +data InfixOp + = OpAnd | OpOr | OpEq | OpNeq | OpLt | OpLte | OpGt | OpGte + | OpIn | OpConcat | OpThen | OpBind + +data PrefixOp = OpNot + +data DoStmt + = DSBind Name Expr + | DSExpr Expr + +type ArmBlock = [Arm] +data Arm = Arm Pat (Maybe Expr) Expr -- pattern, guard, body + +-- Actions (constructors of the Action type; parsed as Expr constructors) +-- Allow | Drop | Continue | Masquerade +-- | DNAT Expr | DNATMap Expr | Log LogLevel Expr + +data LogLevel = LInfo | LWarn | LError + +-- Literals + +data Literal + = LInt Int + | LString String + | LBool Bool + | LIPv4 (Word8,Word8,Word8,Word8) + | LIPv6 [Word16] + | LCIDR Literal Int + | LPort Int + | LDuration Int TimeUnit + | LHex Word8 + +type Name = String +type CIDR = (Literal, Int) +```