darkfi/zkas/
parser.rs

1/* This file is part of DarkFi (https://dark.fi)
2 *
3 * Copyright (C) 2020-2026 Dyne.org foundation
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Affero General Public License as
7 * published by the Free Software Foundation, either version 3 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU Affero General Public License for more details.
14 *
15 * You should have received a copy of the GNU Affero General Public License
16 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
17 */
18
19use std::{
20    borrow::Borrow, collections::HashMap, hash::Hash, io::Result, iter::Peekable, str::Chars,
21};
22
23use super::{
24    ast::{Arg, Constant, Literal, Statement, StatementType, Variable, Witness},
25    constants::{ALLOWED_FIELDS, MAX_K, MAX_NS_LEN},
26    error::ErrorEmitter,
27    lexer::{Token, TokenType},
28    LitType, Opcode, VarType,
29};
30
31/// zkas language builtin keywords.
32/// These can not be used anywhere except where they are expected.
33const KEYWORDS: [&str; 5] = ["k", "field", "constant", "witness", "circuit"];
34
35/// Forbidden namespaces
36const NOPE_NS: [&str; 4] = [".constant", ".literal", ".witness", ".circuit"];
37
38/// Valid constant types and their allowed names.
39const CONSTANT_TYPES: &[(&str, VarType, &[&str])] = &[
40    ("EcFixedPoint", VarType::EcFixedPoint, &["VALUE_COMMIT_RANDOM"]),
41    ("EcFixedPointShort", VarType::EcFixedPointShort, &["VALUE_COMMIT_VALUE"]),
42    ("EcFixedPointBase", VarType::EcFixedPointBase, &["VALUE_COMMIT_RANDOM_BASE", "NULLIFIER_K"]),
43];
44
45#[derive(Clone)]
46struct IndexMap<K, V> {
47    pub order: Vec<K>,
48    pub map: HashMap<K, V>,
49}
50
51impl<K, V> IndexMap<K, V> {
52    fn new() -> Self {
53        Self { order: vec![], map: HashMap::new() }
54    }
55}
56
57impl<K, V> IndexMap<K, V>
58where
59    K: Eq + Hash + Send + Sync + Clone + 'static,
60    V: Send + Sync + Clone + 'static,
61{
62    fn contains_key<Q: Hash + Eq + ?Sized>(&self, k: &Q) -> bool
63    where
64        K: Borrow<Q>,
65    {
66        self.map.contains_key(k)
67    }
68
69    fn get<Q: Hash + Eq + ?Sized>(&self, k: &Q) -> Option<&V>
70    where
71        K: Borrow<Q>,
72    {
73        self.map.get(k)
74    }
75
76    fn insert(&mut self, k: K, v: V) -> Option<V> {
77        self.order.push(k.clone());
78        self.map.insert(k, v)
79    }
80
81    fn scam_iter(&self) -> Vec<(K, V)> {
82        self.order.iter().map(|k| (k.clone(), self.get(k).unwrap().clone())).collect()
83    }
84}
85
86// Valid witness types
87impl TryFrom<&Token> for VarType {
88    type Error = String;
89
90    fn try_from(token: &Token) -> std::result::Result<Self, String> {
91        match token.token.as_str() {
92            "EcPoint" => Ok(Self::EcPoint),
93            "EcNiPoint" => Ok(Self::EcNiPoint),
94            "Base" => Ok(Self::Base),
95            "Scalar" => Ok(Self::Scalar),
96            "MerklePath" => Ok(Self::MerklePath),
97            "SparseMerklePath" => Ok(Self::SparseMerklePath),
98            "Uint32" => Ok(Self::Uint32),
99            "Uint64" => Ok(Self::Uint64),
100            x => Err(format!("{x} is an unsupported witness type")),
101        }
102    }
103}
104
105pub struct Parser {
106    tokens: Vec<Token>,
107    error: ErrorEmitter,
108}
109
110type Parsed = (String, u32, Vec<Constant>, Vec<Witness>, Vec<Statement>);
111
112/// Intermediate structure to hold parsed section tokens.
113/// The tokens gathered from each of the sections are stored here
114/// before being converted into AST nodes.
115struct SectionTokens {
116    constant: Vec<Token>,
117    witness: Vec<Token>,
118    circuit: Vec<Token>,
119}
120
121impl SectionTokens {
122    fn new() -> Self {
123        Self { constant: vec![], witness: vec![], circuit: vec![] }
124    }
125}
126
127impl Parser {
128    pub fn new(filename: &str, source: Chars, tokens: Vec<Token>) -> Self {
129        // For nice error reporting, we'll load everything into a string
130        // vector so we have references to lines.
131        let lines: Vec<String> = source.as_str().lines().map(|x| x.to_string()).collect();
132        let error = ErrorEmitter::new("Parser", filename, lines);
133
134        Self { tokens, error }
135    }
136
137    pub fn parse(&self) -> Result<Parsed> {
138        if self.tokens.is_empty() {
139            return Err(self.error.abort("Source file does not contain any valid tokens.", 0, 0))
140        }
141
142        if self.tokens[0].token_type != TokenType::Symbol {
143            return Err(self.error.abort(
144                "Source file does not start with a section. Expected `constant/witness/circuit`.",
145                0,
146                0,
147            ))
148        }
149
150        let mut iter = self.tokens.iter();
151
152        // Parse header (k and field declarations)
153        let declared_k = self.parse_header(&mut iter)?;
154
155        // Parse all sections and collect their tokens
156        let (namespace, section_tokens) = self.parse_sections(&mut iter)?;
157
158        // Build AST from section tokens
159        let constants = self.build_constants(&section_tokens.constant)?;
160        let witnesses = self.build_witnesses(&section_tokens.witness)?;
161        let statements = self.parse_ast_circuit(&section_tokens.circuit)?;
162
163        if statements.is_empty() {
164            return Err(self.error.abort("Circuit section is empty.", 0, 0))
165        }
166
167        Ok((namespace, declared_k, constants, witnesses, statements))
168    }
169
170    /// Parse the file header: k=N; field="...";
171    ///
172    /// The first thing that has to be declared in the source code is the
173    /// constant "k" which defines 2^k rows that the circuit needs to
174    /// successfully execute.
175    ///
176    /// Then we declare the field we're working in.
177    fn parse_header<'a>(&self, iter: &mut impl Iterator<Item = &'a Token>) -> Result<u32> {
178        let Some((k, equal, number, semicolon)) = Self::next_tuple4(iter) else {
179            return Err(self.error.abort("Source file does not start with k=n;", 0, 0))
180        };
181
182        self.expect_token_type(k, TokenType::Symbol)?;
183        self.expect_token_type(equal, TokenType::Assign)?;
184        self.expect_token_type(number, TokenType::Number)?;
185        self.expect_token_type(semicolon, TokenType::Semicolon)?;
186
187        if k.token != "k" {
188            return Err(self.error.abort("Source file does not start with k=n;", k.line, k.column))
189        }
190
191        // Ensure that the value for k can be parsed correctly into the token type.
192        let declared_k: u32 = number.token.parse().map_err(|e| {
193            self.error.abort(
194                &format!("k param is invalid, max allowed is {MAX_K}. Error: {e}"),
195                number.line,
196                number.column,
197            )
198        })?;
199
200        if declared_k > MAX_K {
201            return Err(self.error.abort(
202                &format!("k param is too high, max allowed is {MAX_K}"),
203                number.line,
204                number.column,
205            ))
206        }
207
208        // Parse field declaration
209        let Some((field, equal, field_name, semicolon)) = Self::next_tuple4(iter) else {
210            return Err(self.error.abort("Source file does not declare field after k", 0, 0))
211        };
212
213        self.expect_token_type(field, TokenType::Symbol)?;
214        self.expect_token_type(equal, TokenType::Assign)?;
215        self.expect_token_type(field_name, TokenType::String)?;
216        self.expect_token_type(semicolon, TokenType::Semicolon)?;
217
218        if field.token != "field" {
219            return Err(self.error.abort(
220                "Source file does not declare field after k",
221                field.line,
222                field.column,
223            ))
224        }
225
226        if !ALLOWED_FIELDS.contains(&field_name.token.as_str()) {
227            return Err(self.error.abort(
228                &format!(
229                    "Declared field \"{}\" is not supported. Use any of: {ALLOWED_FIELDS:?}",
230                    field_name.token
231                ),
232                field_name.line,
233                field_name.column,
234            ))
235        }
236
237        Ok(declared_k)
238    }
239
240    /// Parse all sections (constant, witness, circuit) and return their tokens.
241    ///
242    /// Sections "constant", "witness", and "circuit" are the sections we must
243    /// be declaring in our source code. When we find one, we'll take all the
244    /// tokens found in the section and place them in their respective vec.
245    ///
246    /// NOTE: Currently this logic depends on the fact that the sections are
247    /// closed off with braces. This should be revisited later when we decide
248    /// to add other lang functionality that also depends on using braces.
249    fn parse_sections<'a>(
250        &self,
251        iter: &mut impl Iterator<Item = &'a Token>,
252    ) -> Result<(String, SectionTokens)> {
253        let mut sections = SectionTokens::new();
254        let mut namespace: Option<String> = None;
255        let mut declared = (false, false, false); // constant, witness, circuit
256
257        while let Some(t) = iter.next() {
258            let section_tokens = match t.token.as_str() {
259                "constant" => {
260                    if declared.0 {
261                        return Err(self.error.abort(
262                            "Duplicate `constant` section found.",
263                            t.line,
264                            t.column,
265                        ))
266                    }
267                    declared.0 = true;
268                    &mut sections.constant
269                }
270                "witness" => {
271                    if declared.1 {
272                        return Err(self.error.abort(
273                            "Duplicate `witness` section found.",
274                            t.line,
275                            t.column,
276                        ))
277                    }
278                    declared.1 = true;
279                    &mut sections.witness
280                }
281                "circuit" => {
282                    if declared.2 {
283                        return Err(self.error.abort(
284                            "Duplicate `circuit` section found.",
285                            t.line,
286                            t.column,
287                        ))
288                    }
289                    declared.2 = true;
290                    &mut sections.circuit
291                }
292                x => {
293                    return Err(self.error.abort(
294                        &format!("Section `{x}` is not a valid section"),
295                        t.line,
296                        t.column,
297                    ))
298                }
299            };
300
301            // Absorb all tokens until closing brace
302            self.absorb_section_tokens(iter, section_tokens)?;
303
304            // Validate and extract namespace
305            namespace =
306                Some(self.validate_section_namespace(&t.token, section_tokens, namespace)?);
307        }
308
309        let ns =
310            namespace.ok_or_else(|| self.error.abort("Missing namespace in .zk source.", 0, 0))?;
311
312        if !declared.0 {
313            return Err(self.error.abort("Missing `constant` section in .zk source.", 0, 0))
314        }
315        if !declared.1 {
316            return Err(self.error.abort("Missing `witness` section in .zk source.", 0, 0))
317        }
318        if !declared.2 {
319            return Err(self.error.abort("Missing `circuit` section in .zk source.", 0, 0))
320        }
321
322        Ok((ns, sections))
323    }
324
325    /// Absorb tokens from iterator until a closing brace is found.
326    /// Validates that no keywords are used in improper places.
327    fn absorb_section_tokens<'a>(
328        &self,
329        iter: &mut impl Iterator<Item = &'a Token>,
330        dest: &mut Vec<Token>,
331    ) -> Result<()> {
332        for inner in iter {
333            if KEYWORDS.contains(&inner.token.as_str()) && inner.token_type == TokenType::Symbol {
334                return Err(self.error.abort(
335                    &format!("Keyword '{}' used in improper place.", inner.token),
336                    inner.line,
337                    inner.column,
338                ))
339            }
340
341            dest.push(inner.clone());
342            if inner.token_type == TokenType::RightBrace {
343                break
344            }
345        }
346        Ok(())
347    }
348
349    /// Validate namespace consistency across sections.
350    /// All sections must use the same namespace, and it must not be a reserved name.
351    fn validate_section_namespace(
352        &self,
353        section_name: &str,
354        tokens: &[Token],
355        existing_ns: Option<String>,
356    ) -> Result<String> {
357        if tokens.is_empty() {
358            return Err(self.error.abort(&format!("Section `{section_name}` has no tokens"), 0, 0))
359        }
360
361        let ns_token = &tokens[0];
362
363        if let Some(ns) = existing_ns {
364            if ns != ns_token.token {
365                return Err(self.error.abort(
366                    &format!("Found '{}' namespace, expected '{ns}'.", ns_token.token),
367                    ns_token.line,
368                    ns_token.column,
369                ))
370            }
371            return Ok(ns)
372        }
373
374        if NOPE_NS.contains(&ns_token.token.as_str()) {
375            return Err(self.error.abort(
376                &format!("'{}' cannot be a namespace.", ns_token.token),
377                ns_token.line,
378                ns_token.column,
379            ))
380        }
381
382        if ns_token.token.len() > MAX_NS_LEN {
383            return Err(self.error.abort(
384                &format!("Namespace too long, max {MAX_NS_LEN} bytes"),
385                ns_token.line,
386                ns_token.column,
387            ))
388        }
389
390        Ok(ns_token.token.clone())
391    }
392
393    /// Build constants from section tokens.
394    /// Validates constant types against the CONSTANT_TYPES table.
395    fn build_constants(&self, tokens: &[Token]) -> Result<Vec<Constant>> {
396        self.check_section_structure("constant", tokens)?;
397
398        let parsed = self.parse_typed_section("constant", tokens)?;
399        let mut ret = vec![];
400
401        // name = constant name
402        for (name, (name_token, type_token)) in parsed.scam_iter() {
403            self.validate_section_entry("Constant", &name, &name_token, &type_token)?;
404
405            // Look up the constant type in our table
406            let type_name = type_token.token.as_str();
407            let constant_def = CONSTANT_TYPES.iter().find(|(t, _, _)| *t == type_name);
408
409            match constant_def {
410                Some((_, var_type, valid_names)) => {
411                    if !valid_names.contains(&name_token.token.as_str()) {
412                        return Err(self.error.abort(
413                            &format!(
414                                "`{}` is not a valid {type_name} constant. Supported: {valid_names:?}",
415                                name_token.token
416                            ),
417                            name_token.line,
418                            name_token.column,
419                        ))
420                    }
421
422                    ret.push(Constant {
423                        name: name.to_string(),
424                        typ: *var_type,
425                        line: type_token.line,
426                        column: type_token.column,
427                    });
428                }
429                None => {
430                    return Err(self.error.abort(
431                        &format!("`{type_name}` is an unsupported constant type."),
432                        type_token.line,
433                        type_token.column,
434                    ))
435                }
436            }
437        }
438
439        Ok(ret)
440    }
441
442    /// Build witnesses from section tokens.
443    fn build_witnesses(&self, tokens: &[Token]) -> Result<Vec<Witness>> {
444        self.check_section_structure("witness", tokens)?;
445
446        let parsed = self.parse_typed_section("witness", tokens)?;
447        let mut ret = vec![];
448
449        // name = witness name
450        for (name, (name_token, type_token)) in parsed.scam_iter() {
451            self.validate_section_entry("Witness", &name, &name_token, &type_token)?;
452
453            match VarType::try_from(&type_token) {
454                Ok(typ) => {
455                    ret.push(Witness {
456                        name: name.to_string(),
457                        typ,
458                        line: name_token.line,
459                        column: name_token.column,
460                    });
461                }
462                Err(e) => return Err(self.error.abort(&e, type_token.line, type_token.column)),
463            }
464        }
465
466        Ok(ret)
467    }
468
469    /// Parse a typed section (constant or witness) into an IndexMap.
470    /// Both sections have the same structure: pairs of '<Type> <n>' separated by commas.
471    fn parse_typed_section(
472        &self,
473        section_name: &str,
474        tokens: &[Token],
475    ) -> Result<IndexMap<String, (Token, Token)>> {
476        let mut result = IndexMap::new();
477
478        // Skip namespace and braces: tokens[0] is namespace, tokens[1] is {, last is }
479        // This is everything between the braces: { ... }
480        let inner_tokens = &tokens[2..tokens.len() - 1];
481        let mut iter = inner_tokens.iter();
482
483        while let Some((typ, name, comma)) = Self::next_tuple3(&mut iter) {
484            if comma.token_type != TokenType::Comma {
485                return Err(self.error.abort("Separator is not a comma.", comma.line, comma.column))
486            }
487
488            // No variable shadowing
489            if result.contains_key(name.token.as_str()) {
490                return Err(self.error.abort(
491                    &format!(
492                        "Section `{section_name}` already contains the token `{}`.",
493                        &name.token
494                    ),
495                    name.line,
496                    name.column,
497                ))
498            }
499
500            result.insert(name.token.clone(), (name.clone(), typ.clone()));
501        }
502
503        if iter.next().is_some() {
504            return Err(self.error.abort(
505                &format!("Internal error, leftovers in '{section_name}' iterator"),
506                0,
507                0,
508            ))
509        }
510
511        Ok(result)
512    }
513
514    /// Common validation for constant/witness entries.
515    /// Ensures name and type tokens are symbols and match expected values.
516    fn validate_section_entry(
517        &self,
518        section_type: &str,
519        name: &str,
520        name_token: &Token,
521        type_token: &Token,
522    ) -> Result<()> {
523        if name_token.token != name {
524            return Err(self.error.abort(
525                &format!(
526                    "{section_type} name `{}` doesn't match token `{name}`.",
527                    name_token.token
528                ),
529                name_token.line,
530                name_token.column,
531            ))
532        }
533
534        if name_token.token_type != TokenType::Symbol {
535            return Err(self.error.abort(
536                &format!("{section_type} name `{}` is not a symbol.", name_token.token),
537                name_token.line,
538                name_token.column,
539            ))
540        }
541
542        if type_token.token_type != TokenType::Symbol {
543            return Err(self.error.abort(
544                &format!("{section_type} type `{}` is not a symbol.", type_token.token),
545                type_token.line,
546                type_token.column,
547            ))
548        }
549
550        Ok(())
551    }
552
553    /// Routine checks on section structure.
554    /// Validates that sections have proper opening/closing braces and correct element counts.
555    fn check_section_structure(&self, section: &str, tokens: &[Token]) -> Result<()> {
556        // Offsets 0 and 1 are accessed directly below, so we need a length of at
557        // least 2 in order to avoid an index-out-of-bounds panic.
558        if tokens.len() < 2 {
559            return Err(self.error.abort("Insufficient number of tokens in section.", 0, 0))
560        }
561        if tokens[0].token_type != TokenType::String {
562            return Err(self.error.abort(
563                "Section declaration must start with a naming string.",
564                tokens[0].line,
565                tokens[0].column,
566            ))
567        }
568
569        if tokens[1].token_type != TokenType::LeftBrace {
570            return Err(self.error.abort(
571                "Section must be opened with a left brace '{'",
572                tokens[0].line,
573                tokens[0].column,
574            ))
575        }
576
577        if tokens.last().unwrap().token_type != TokenType::RightBrace {
578            return Err(self.error.abort(
579                "Section must be closed with a right brace '}'",
580                tokens[0].line,
581                tokens[0].column,
582            ))
583        }
584
585        match section {
586            "constant" | "witness" => {
587                if tokens.len() == 3 {
588                    self.error.warn(&format!("{section} section is empty."), 0, 0);
589                }
590
591                if !tokens[2..tokens.len() - 1].len().is_multiple_of(3) {
592                    return Err(self.error.abort(
593                        &format!("Invalid number of elements in '{section}' section. Must be pairs of '<Type> <n>' separated with a comma ','."),
594                        tokens[0].line,
595                        tokens[0].column
596                    ))
597                }
598            }
599            "circuit" => {
600                if tokens.len() == 3 {
601                    return Err(self.error.abort("circuit section is empty.", 0, 0))
602                }
603
604                if tokens[tokens.len() - 2].token_type != TokenType::Semicolon {
605                    return Err(self.error.abort(
606                        "Circuit section does not end with a semicolon. Would never finish parsing.",
607                        tokens[tokens.len()-2].line,
608                        tokens[tokens.len()-2].column,
609                    ))
610                }
611            }
612            _ => unreachable!(),
613        };
614
615        Ok(())
616    }
617
618    /// Parse the circuit section into statements.
619    ///
620    /// The statement layouts/syntax in the language are as follows:
621    ///
622    /// ```text
623    /// C = poseidon_hash(pub_x, pub_y, value, token, serial);
624    /// | |          |                   |       |
625    /// V V          V                   V       V
626    /// variable    opcode              arg     arg
627    /// assign
628    ///
629    ///                    constrain_instance(C);
630    ///                       |               |
631    ///                       V               V
632    ///                     opcode           arg
633    ///
634    ///                                              inner opcode arg
635    ///                                               |
636    ///                  constrain_instance(ec_get_x(foo));
637    ///                        |                 |
638    ///                        V                 V
639    ///                     opcode          arg as opcode
640    /// ```
641    ///
642    /// In the latter, we want to support nested function calls, e.g.:
643    ///
644    /// ```text
645    /// constrain_instance(ec_get_x(token_commit));
646    /// ```
647    ///
648    /// The inner call's result would still get pushed on the heap,
649    /// but it will not be accessible in any other scope.
650    ///
651    /// In certain opcodes, we also support literal types, and the
652    /// opcodes can return a variable type after running the operation.
653    /// e.g.
654    /// ```text
655    /// one = witness_base(1);
656    /// zero = witness_base(0);
657    /// ```
658    ///
659    /// The literal type is used only in the function call's scope, but
660    /// the result is then accessible on the heap to be used by further
661    /// computation.
662    ///
663    /// Regarding multiple return values from opcodes, this is perhaps
664    /// not necessary for the current language scope, as this is a low
665    /// level representation. Note that it could be relatively easy to
666    /// modify the parsing logic to support that here. For now we'll
667    /// defer it, and if at some point we decide that the language is
668    /// too expressive and noisy, we'll consider having multiple return
669    /// types. It also very much depends on the type of functions/opcodes
670    /// that we want to support.
671    fn parse_ast_circuit(&self, tokens: &[Token]) -> Result<Vec<Statement>> {
672        self.check_section_structure("circuit", tokens)?;
673
674        // Split circuit tokens into statements (delimited by semicolons).
675        // Here, our statements tokens have been parsed and delimited by
676        // semicolons (;) in the source file. This iterator contains each
677        // of those statements as an array of tokens we then consume and
678        // build the AST further.
679        let mut circuit_stmts: Vec<Vec<Token>> = vec![];
680        let mut current_stmt: Vec<Token> = vec![];
681
682        for token in tokens[2..tokens.len() - 1].iter() {
683            if token.token_type == TokenType::Semicolon {
684                // Push completed statement to the heap
685                circuit_stmts.push(current_stmt);
686                current_stmt = vec![];
687                continue
688            }
689            current_stmt.push(token.clone());
690        }
691
692        // Vec of statements to return from this entire parsing operation.
693        let mut ret = vec![];
694
695        for statement in circuit_stmts {
696            if statement.is_empty() {
697                continue
698            }
699
700            self.validate_statement_brackets(&statement)?;
701
702            // Peekable iterator so we can see tokens in advance
703            // without consuming the iterator.
704            let mut iter = statement.iter().peekable();
705            let stmt = self.parse_statement(&mut iter)?;
706            ret.push(stmt);
707        }
708
709        Ok(ret)
710    }
711
712    /// Validate matching brackets in a statement.
713    /// Ensures parentheses and brackets are balanced and properly nested.
714    fn validate_statement_brackets(&self, statement: &[Token]) -> Result<()> {
715        let (mut left_paren, mut right_paren, mut left_bracket, mut right_bracket) = (0, 0, 0, 0);
716
717        for token in statement {
718            match token.token.as_str() {
719                "(" => left_paren += 1,
720                ")" => right_paren += 1,
721                "[" => left_bracket += 1,
722                "]" => right_bracket += 1,
723                _ => {}
724            }
725        }
726
727        if (left_paren == 0 && right_paren == 0) && (left_bracket == 0 && right_bracket == 0) {
728            return Err(self.error.abort(
729                "Statement must include a function call or array initialization. No parentheses or square brackets present.",
730                statement[0].line,
731                statement[0].column,
732            ))
733        }
734
735        if (left_bracket != right_bracket) || (left_paren != right_paren) {
736            return Err(self.error.abort(
737                "Parentheses or brackets are not matched.",
738                statement[0].line,
739                statement[0].column,
740            ))
741        }
742
743        // Is there a valid use-case for defining nested arrays? For now,
744        // if square brackets are present, raise an error unless there is
745        // exactly one pair.
746        if left_bracket > 1 {
747            return Err(self.error.abort(
748                "Only one pair of brackets allowed for array declaration",
749                statement[0].line,
750                statement[0].column,
751            ))
752        }
753
754        Ok(())
755    }
756
757    /// Parse a single statement from tokens.
758    /// Determines if this is an assignment (var = ...) or a direct call (opcode(...)).
759    fn parse_statement(
760        &self,
761        iter: &mut Peekable<std::slice::Iter<'_, Token>>,
762    ) -> Result<Statement> {
763        // Dummy statement that we'll hopefully fill now.
764        let mut stmt = Statement::default();
765
766        let Some(token) = iter.next() else {
767            return Err(self.error.abort("Empty statement", 0, 0))
768        };
769
770        // TODO: MAKE SURE IT'S A SYMBOL
771
772        // Check if this is an assignment (var = ...) or a direct call (opcode(...))
773        // This logic must be changed if we want to support multiple return values.
774        if let Some(next_token) = iter.peek() {
775            if next_token.token_type == TokenType::Assign {
776                // Assignment statement
777                stmt.line = token.line;
778                stmt.typ = StatementType::Assign;
779                stmt.lhs = Some(Variable {
780                    name: token.token.clone(),
781                    typ: VarType::Dummy,
782                    line: token.line,
783                    column: token.column,
784                });
785
786                // Skip over the `=` token.
787                iter.next();
788
789                // Get the opcode token
790                let Some(opcode_token) = iter.next() else {
791                    return Err(self.error.abort(
792                        "Expected opcode after assignment",
793                        token.line,
794                        token.column,
795                    ))
796                };
797
798                self.parse_opcode_call(opcode_token, iter, &mut stmt)?;
799            } else if next_token.token_type == TokenType::LeftParen {
800                // Direct call statement
801                stmt.line = token.line;
802                stmt.typ = StatementType::Call;
803                stmt.lhs = None;
804
805                self.parse_opcode_call(token, iter, &mut stmt)?;
806            } else if next_token.token_type == TokenType::LeftBracket {
807                // Array declaration.
808                // TODO: Support function calls in array declarations.
809                // Currently only literals can be used to construct an array.
810                return Err(self.error.abort(
811                    "Arrays are not implemented yet.",
812                    token.line,
813                    token.column,
814                ))
815            } else {
816                return Err(self.error.abort(
817                    &format!("Illegal token `{}`.", next_token.token),
818                    next_token.line,
819                    next_token.column,
820                ))
821            }
822        }
823
824        // At this stage of parsing, we should have assigned `stmt` a
825        // StatementType that is not a Noop. If we have failed to do so, we
826        // cannot proceed because Noops must never be passed to the compiler.
827        // This can occur when multiple independent statements are passed on
828        // one line, or if a statement is not terminated by a semicolon.
829        if stmt.typ == StatementType::Noop {
830            return Err(self.error.abort(
831                "Statement is a NOOP; not allowed. (Did you miss a semicolon?)",
832                token.line,
833                token.column,
834            ))
835        }
836
837        Ok(stmt)
838    }
839
840    /// Parse an opcode call and fill in the statement.
841    /// The assumption here is that the current token is a function call,
842    /// so we check if it's legit and start digging.
843    fn parse_opcode_call(
844        &self,
845        token: &Token,
846        iter: &mut Peekable<std::slice::Iter<'_, Token>>,
847        stmt: &mut Statement,
848    ) -> Result<()> {
849        let func_name = token.token.as_str();
850
851        // Ensure the current function is a symbol
852        if token.token_type != TokenType::Symbol {
853            return Err(self.error.abort("This token is not a symbol.", token.line, token.column))
854        }
855
856        if let Some(op) = Opcode::from_name(func_name) {
857            let rhs = self.parse_function_call(token, iter)?;
858            stmt.opcode = op;
859            stmt.rhs = rhs;
860            Ok(())
861        } else {
862            Err(self.error.abort(
863                &format!("Unimplemented opcode `{func_name}`."),
864                token.line,
865                token.column,
866            ))
867        }
868    }
869
870    /// Parse a function call and its arguments.
871    /// Handles nested function calls recursively, creating intermediate
872    /// variables for inner call results.
873    fn parse_function_call(
874        &self,
875        token: &Token,
876        iter: &mut Peekable<std::slice::Iter<'_, Token>>,
877    ) -> Result<Vec<Arg>> {
878        if let Some(next_token) = iter.peek() {
879            if next_token.token_type != TokenType::LeftParen {
880                return Err(self.error.abort(
881                    "Invalid function call opening. Must start with a '('.",
882                    next_token.line,
883                    next_token.column,
884                ))
885            }
886            // Skip the opening parenthesis
887            iter.next();
888        } else {
889            return Err(self.error.abort("Premature ending of statement.", token.line, token.column))
890        }
891
892        let mut ret = vec![];
893
894        // The next element in the iter now hopefully contains an opcode
895        // argument. If it's another opcode, we'll recurse into this
896        // function's logic.
897        // Otherwise, we look for variable and literal types.
898        while let Some(arg) = iter.next() {
899            // ============================
900            // Parse a nested function call
901            // ============================
902            if let Some(op_inner) = Opcode::from_name(&arg.token) {
903                if let Some(paren) = iter.peek() {
904                    if paren.token_type != TokenType::LeftParen {
905                        return Err(self.error.abort(
906                            "Invalid function call opening. Must start with a '('.",
907                            paren.line,
908                            paren.column,
909                        ))
910                    }
911
912                    // Recurse this function to get the params of the nested one.
913                    let args = self.parse_function_call(arg, iter)?;
914
915                    // Then we assign a "fake" variable that serves as a heap
916                    // reference.
917                    let var = Variable {
918                        name: format!("_op_inner_{}_{}", arg.line, arg.column),
919                        typ: VarType::Dummy,
920                        line: arg.line,
921                        column: arg.column,
922                    };
923
924                    let arg = Arg::Func(Statement {
925                        typ: StatementType::Assign,
926                        opcode: op_inner,
927                        lhs: Some(var),
928                        rhs: args,
929                        line: arg.line,
930                    });
931
932                    ret.push(arg);
933                    continue
934                }
935
936                return Err(self.error.abort(
937                    "Missing tokens in statement, there's a syntax error here.",
938                    arg.line,
939                    arg.column,
940                ))
941            }
942
943            // ==========================================
944            // Parse normal argument, not a function call
945            // ==========================================
946            if let Some(sep) = iter.next() {
947                // See if we have a variable or a literal type.
948                match arg.token_type {
949                    TokenType::Symbol => ret.push(Arg::Var(Variable {
950                        name: arg.token.clone(),
951                        typ: VarType::Dummy,
952                        line: arg.line,
953                        column: arg.column,
954                    })),
955
956                    TokenType::Number => {
957                        // Check if we can actually convert this into a number.
958                        arg.token.parse::<u64>().map_err(|e| {
959                            self.error.abort(
960                                &format!("Failed to convert literal into u64: {e}"),
961                                arg.line,
962                                arg.column,
963                            )
964                        })?;
965
966                        ret.push(Arg::Lit(Literal {
967                            name: arg.token.clone(),
968                            typ: LitType::Uint64,
969                            line: arg.line,
970                            column: arg.column,
971                        }))
972                    }
973
974                    TokenType::RightParen => {
975                        if let Some(comma) = iter.peek() {
976                            if comma.token_type == TokenType::Comma {
977                                iter.next();
978                            }
979                        }
980                        break
981                    }
982
983                    // Note: Unimplemented symbols throw an error now instead of a panic.
984                    // This assists with fuzz testing as existing features can still be tested
985                    // without causing the fuzzer to choke due to the panic created
986                    // by unimplmented!().
987                    _ => {
988                        return Err(self.error.abort(
989                            "Character is illegal/unimplemented in this context",
990                            arg.line,
991                            arg.column,
992                        ))
993                    }
994                };
995
996                if sep.token_type == TokenType::RightParen {
997                    if let Some(comma) = iter.peek() {
998                        if comma.token_type == TokenType::Comma {
999                            iter.next();
1000                        }
1001                    }
1002                    // Reached end of args
1003                    break
1004                }
1005
1006                if sep.token_type != TokenType::Comma {
1007                    return Err(self.error.abort(
1008                        "Argument separator is not a comma (`,`)",
1009                        sep.line,
1010                        sep.column,
1011                    ))
1012                }
1013            }
1014        }
1015
1016        Ok(ret)
1017    }
1018
1019    /// Check that a token has the expected type.
1020    fn expect_token_type(&self, token: &Token, expected: TokenType) -> Result<()> {
1021        if token.token_type != expected {
1022            return Err(self.error.abort(
1023                &format!("Expected {:?}, got {:?}", expected, token.token_type),
1024                token.line,
1025                token.column,
1026            ))
1027        }
1028        Ok(())
1029    }
1030
1031    /// Get next 3 items from an iterator as a tuple.
1032    fn next_tuple3<I, T>(iter: &mut I) -> Option<(T, T, T)>
1033    where
1034        I: Iterator<Item = T>,
1035    {
1036        let a = iter.next()?;
1037        let b = iter.next()?;
1038        let c = iter.next()?;
1039        Some((a, b, c))
1040    }
1041
1042    /// Get next 4 items from an iterator as a tuple.
1043    fn next_tuple4<I, T>(iter: &mut I) -> Option<(T, T, T, T)>
1044    where
1045        I: Iterator<Item = T>,
1046    {
1047        let a = iter.next()?;
1048        let b = iter.next()?;
1049        let c = iter.next()?;
1050        let d = iter.next()?;
1051        Some((a, b, c, d))
1052    }
1053}