Files
fwl/doc/fwl_grammar.md
2026-05-03 17:45:40 -07:00

399 lines
12 KiB
Markdown

# FWL Grammar Specification
> **Version:** MVP
> **Last updated:** May 2026
> This document is the authoritative grammar reference for the Firewall Language (FWL).
> It supersedes the syntax examples in `proposal.md` and reflects the current parser implementation.
---
## Design Principles
- **Explicit delimiters everywhere** — all blocks use `{` `}` with trailing `;` on each item. No layout/indentation sensitivity.
- **Syntactic keywords are reserved** — only words that structurally delimit declarations or expressions are in `reservedNames`. Semantic values (action names, effect labels, constructors) are plain identifiers.
- **Types are explicit** — top-level declarations carry full type annotations in the MVP.
- **Patterns vs. guards are strictly separated** — structural decomposition happens in patterns; boolean predicates over bound names happen in guards.
- **IP addresses are integers** — IPv4 is a 32-bit value; IPv6 is a 128-bit value. Named priority constants (`Filter`, `SrcNat`, etc.) lower to their canonical integer values at parse time.
---
## Top-Level Program
```ebnf
program ::= { decl }
decl ::= interfaceDecl
| zoneDecl
| importDecl
| letDecl
| patternDecl
| flowDecl
| ruleDecl
| policyDecl
```
---
## Declarations
```ebnf
interfaceDecl ::= "interface" ident ":" ifaceKind "{" { ifaceProp ";" } "}" ";"
ifaceKind ::= "WAN" | "LAN" | "WireGuard" | ident
ifaceProp ::= "dynamic"
| "cidr4" "=" cidrSet
| "cidr6" "=" cidrSet
cidrSet ::= "{" cidrLit { "," cidrLit } "}"
zoneDecl ::= "zone" ident "=" "{" ident { "," ident } "}" ";"
importDecl ::= "import" ident ":" type "from" stringLit ";"
letDecl ::= "let" ident ":" type "=" expr ";"
patternDecl ::= "pattern" ident ":" type "=" pat ";"
flowDecl ::= "flow" ident ":" "FlowPattern" "=" flowExpr ";"
flowExpr ::= ident
| ident "." ident "within" duration
ruleDecl ::= "rule" ident ":" type "=" lambdaExpr ";"
policyDecl ::= "policy" ident ":" type
"on" "{"
"hook" "=" hook ","
"table" "=" tableName ","
"priority" "=" priority
"}"
"=" armBlock ";"
```
### Policy Metadata
```ebnf
hook ::= "Input" | "Forward" | "Output" | "Prerouting" | "Postrouting"
tableName ::= "Filter" | "NAT" | ident
-- Priority is always an integer in nftables JSON.
-- Named constants are resolved at parse time:
-- Raw = -300, ConnTrack = -200, Mangle = -150,
-- DstNat = -100, Filter = 0, SrcNat = 100
priority ::= "Filter" | "DstNat" | "SrcNat" | "Mangle"
| "Raw" | "ConnTrack"
| [ "-" ] nat
```
---
## Types
```ebnf
type ::= simpleType
| simpleType "->" type -- function type
| "<" effectList ">" type -- effectful function type
simpleType ::= ident -- type name (Frame, Action, IP, etc.)
| ident "<" typeList ">" -- generic: Map<K,V>, Bytes<{}>
| "(" type { "," type } ")" -- tuple type
typeList ::= type { "," type }
effectList ::= ident { "," ident }
```
> **Note:** `Frame`, `FlowPattern`, and all action/effect type names (`Action`, `CIDRSet`, etc.)
> are plain identifiers in the type parser — they are **not** reserved keywords.
---
## Expressions
```ebnf
lambdaExpr ::= "\" ident "->" expr
| expr
expr ::= ifExpr
| doExpr
| infixExpr
ifExpr ::= "if" expr "then" expr "else" expr
doExpr ::= "do" "{" stmt { ";" stmt } "}"
stmt ::= "let" ident "=" expr
| ident "<-" expr
| expr
infixExpr ::= prefixExpr { infixOp prefixExpr }
infixOp ::= "&&" | "||" | "==" | "!=" | "<" | "<=" | ">" | ">="
| "++" | ">>" | ">>=" | "∈" | "in"
prefixExpr ::= "!" prefixExpr | appExpr
appExpr ::= atom { atom }
atom ::= performExpr
| mapLit -- { expr -> expr, ... } tried before setLit
| setLit -- { expr, ... }
| tupleLit -- ( expr, expr, ... ) requires 2
| "(" expr ")"
| literal
| portLit -- :22 :8080
| qualName -- foo foo.bar foo.bar.baz
performExpr ::= "perform" qualName "(" argList? ")"
argList ::= expr { "," expr }
mapLit ::= "{" mapEntry { "," mapEntry } "}"
mapEntry ::= expr "->" expr
setLit ::= "{" expr { "," expr } "}"
tupleLit ::= "(" expr "," expr { "," expr } ")"
qualName ::= ident { "." ident }
```
---
## Patterns
```ebnf
pat ::= wildcardPat -- _
| framePat -- Frame(...)
| tuplePat -- (p, p, ...) requires 2
| bytesPat -- [ byteElem* ]
| recordPat -- Ctor { field = lit, ... }
| namedOrCtorPat -- Ctor(p,...) or bare identifier
wildcardPat ::= "_"
framePat ::= "Frame" "(" frameArgs ")"
frameArgs ::= pathPat "," pat -- with explicit path
| pat -- path inferred
pathPat ::= endpointPat? ( "->" endpointPat? )?
endpointPat ::= "_"
| ident "in" ident -- iif in lan_zone
| ident "∈" ident
| ident
tuplePat ::= "(" pat "," pat { "," pat } ")"
bytesPat ::= "[" byteElem* "]"
byteElem ::= hexByte -- 0xff
| "_" -- any byte
| "_" "*" -- zero or more bytes
recordPat ::= ident "{" fieldPat { "," fieldPat } "}"
fieldPat ::= ident "=" fieldLit -- exact match
| ident "in" expr -- membership
| ident "∈" expr
| ident "as" ident -- bind with alias
| ident -- bind to same name
-- fieldLit extends literal with port syntax
fieldLit ::= ":" nat | literal
namedOrCtorPat ::= ident "(" pat { "," pat } ")" -- constructor with args
| ident -- variable or nullary ctor
```
---
## Case Arms
```ebnf
armBlock ::= "{" { arm } "}"
arm ::= "|" pat ( "if" expr )? "->" expr ";"
```
---
## Literals
```ebnf
literal ::= ipOrCidrLit
| hexByte -- 0xff
| "true" | "false"
| stringLit -- "..."
| nat -- decimal integer
portLit ::= ":" nat -- :22, :8080, :51944
ipOrCidrLit ::= ipLit ( "/" nat )? -- optional prefix → CIDR
ipLit ::= ipv6Lit | ipv4Lit
-- IPv4: four decimal octets 0-255
ipv4Lit ::= octet "." octet "." octet "." octet
octet ::= nat -- 0..255
-- IPv6: full or compressed notation, optional embedded IPv4
-- All standard forms are supported:
-- full: 2001:0db8:85a3:0000:0000:8a2e:0370:7334
-- compressed: 2001:db8::8a2e:370:7334
-- loopback: ::1
-- any: ::
-- link-local: fe80::1
-- IPv4-mapped: ::ffff:192.168.1.1
ipv6Lit ::= ipv6Groups
ipv6Groups ::= "::" ipv6RightGroups? -- starts with ::
| ipv6LeftGroups ( "::" ipv6RightGroups? )?
ipv6LeftGroups ::= hex16 { ":" hex16 } -- stops before ::
ipv6RightGroups ::= ipv4EmbeddedGroups | ipv6LeftGroups
ipv4EmbeddedGroups ::= { hex16 ":" } octet "." octet "." octet "." octet
hex16 ::= hexDigit+ -- 1-4 hex digits, value 0x0000..0xffff
cidrLit ::= ipLit "/" nat -- must be a CIDR (prefix required)
hexByte ::= "0x" hexDigit hexDigit
duration ::= nat timeUnit
timeUnit ::= "s" | "ms" | "m" | "h"
```
### Internal IP Representation
IP addresses are stored as plain `Integer` values, not tuples or byte arrays:
| Type | Storage | Range |
|-------|----------|------------------|
| IPv4 | 32-bit `Integer` | `0x00000000`..`0xFFFFFFFF` |
| IPv6 | 128-bit `Integer` | `0x0`..`0xFFFF...FFFF` |
CIDR host-bit validation: `(addr .&. hostMask) == 0` where `hostMask = (1 << (bits - prefix)) - 1`.
---
## Reserved Keywords
Only these words are reserved (i.e. `identifier` will reject them):
```
config table interface zone import from
let in pattern flow rule policy on
case of if then else do perform
within as dynamic cidr4 cidr6
hook priority
WAN LAN WireGuard
Input Forward Output Prerouting Postrouting
Filter NAT Mangle DstNat SrcNat
Raw ConnTrack
true false
```
The following are **not** reserved and parse as plain identifiers in all positions
(type names, constructors, action values, effect labels):
```
Frame FlowPattern
Allow Drop Continue Masquerade DNAT DNATMap
Log Info Warn Error
Matched Unmatched
Action Packet IP Port Protocol
CIDRSet Map Bytes
```
---
## Priority Constants
Named priorities resolve to integers at parse time:
| Name | Integer value |
|-------------|---------------|
| `Raw` | -300 |
| `ConnTrack` | -200 |
| `Mangle` | -150 |
| `DstNat` | -100 |
| `Filter` | 0 |
| `SrcNat` | 100 |
Arbitrary integers (including negative, e.g. `-150`) are also accepted.
---
## Operator Precedence
From lowest to highest binding:
| Level | Operators | Associativity |
|-------|------------------------|---------------|
| 1 | `if … then … else` | — |
| 2 | `\|\|` | left |
| 3 | `&&` | left |
| 4 | `==` `!=` | none |
| 5 | `<` `<=` `>` `>=` | none |
| 6 | `∈` `in` | none |
| 7 | `++` `>>` `>>=` | left |
| 8 | `!` (prefix) | — |
| 9 | function application | left |
---
## Canonical Examples
### Interface and zone declarations
```fwl
interface wan : WAN { dynamic; };
interface lan : LAN { cidr4 = { 10.17.1.0/24 }; };
interface wg0 : WireGuard {};
zone lan_zone = { lan, wg0 };
```
### Map literal
```fwl
let forwards : Map<(Protocol, Port), (IP, Port)> = {
(tcp, :8080) -> (10.17.1.10, :80),
(tcp, :2222) -> (10.17.1.11, :22)
};
```
### Named patterns and flows
```fwl
pattern WGInitiation : (UDPHeader, Bytes<{}>) =
(udp { length = 156 }, [0x01 _*]);
flow WireGuardHandshake : FlowPattern =
WGInitiation . WGResponse within 5s;
```
### Rule with effects
```fwl
rule blockOutboundWG : Frame -> <FlowMatch, Log> Action =
\frame ->
case frame of {
| Frame(iif in lan_zone -> wan, IPv4(ip, UDP(udp, payload)))
if matches(WGInitiation, (udp, payload)) ->
case perform FlowMatch.check(flowOf(ip, wg), WireGuardHandshake) of {
| Matched -> do {
perform Log.emit(Warn, "WG blocked");
Drop
};
| _ -> Continue;
};
| _ -> Continue;
};
```
### Policy
```fwl
policy input : Frame
on { hook = Input, table = Filter, priority = Filter } =
{
| _ if ct.state in { Established, Related } -> Allow;
| Frame(lo, _) -> Allow;
| Frame(_, Ether(_, IPv4(_, TCP(tcp, _))))
if tcp.dport == :22 -> Allow;
| Frame(_, Ether(_, IPv4(_, UDP(udp, _))))
if udp.dport == :51944 -> Allow;
| _ -> Drop;
};
```