1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

use std::fmt::Display;

use crate::WrapSpannable;
use crate::firstpass::{FirstPassResult, FirstPassState, GetReqAttributes, ReqAttributes};
use wagon_lexer::productions::{Productions, EbnfType};
use super::CallingArgs;
use super::{Parse, LexerBridge, ParseResult, Tokens, Spannable, WagParseError, Ident, Rewrite, rule::Rule, rhs::Rhs, symbol::Symbol, SpannableNode, Span, ResultPeek, ResultNext};

use wagon_macros::new_unspanned;

#[derive(PartialEq, Debug, Eq, Hash, Clone)]
#[new_unspanned]
/// A chunk of an [`Rhs`].
///
/// Chunks are symbols in () with optionally an EBNF token following it.
/// If there are no (), there is only 1 symbol, which may still optionally have an EBNF token.
///
/// # Grammar
/// <code>[Chunk] -> [ChunkP] [EbnfType]?;</code>
pub struct Chunk {
    /// The actual chunk part.
	pub chunk: ChunkP,
    /// The possible EBNF operator.
	pub ebnf: Option<EbnfType>,
}

#[derive(PartialEq, Debug, Eq, Hash, Clone)]
#[new_unspanned]
/// The actual chunk part.
///
/// # Grammar
/// <span><pre>
/// [ChunkP] -> [Symbol]
///        |  `"("` [Chunk]* `")"`
///        ;
/// </pre></span>
pub enum ChunkP {
    /// Just a [`Symbol`].
	Unit(SpannableNode<Symbol>),
    /// A group of [`Chunk`]s. Enclosed with `()`.
	Group(Vec<SpannableNode<Chunk>>)
}

/// A sort-of way to handle Rust enums being not fully first-class. This should be one of the [`Rule`] variants as a constructor.
type RuleConstructor = fn(String, Vec<SpannableNode<Ident>>, Vec<SpannableNode<Rhs>>) -> Rule;

impl Chunk {

    /// Given a chunk and ebnf operator, rewrite the chunk such that it expresses the same language without the operator.
    ///
    /// Adds new rules to the inserted `rules` vector which are helper rules to express the language.
	fn rewrite_ebnf(
            ebnf: &EbnfType, // The exact Ebnf operator
            ident: String, // The identifier for the rule we are rewriting
            args: Vec<SpannableNode<Ident>>, // The calling args for this rule (must be propagated)
            symbol: SpannableNode<Symbol>,  // The exact symbol in the rule we are rewriting (I.E. The `B` in S -> A B?)
            span: &Span, // The span information for this rule
            rule_func: RuleConstructor, // The constructor function for the rule we are rewriting (analytic or generative)
            rules: &mut Vec<SpannableNode<Rule>> // The vector to add helper rules to.
        ) {
        let chunks: Vec<SpannableNode<Rhs>> = match ebnf { // Assuming the rule is S -> A{op}
            EbnfType::Some => { // +
                let helper_ident = format!("{ident}·p"); // Requires a second helper rule to do the plus step 
                rules.push(SpannableNode::new(rule_func(helper_ident.clone(), args.clone(),
                    vec![ // A·x·y·p -> A A·x·y·p | ;
                        SpannableNode::new(Rhs {
                            weight: None,
                            chunks: vec![
                                SpannableNode::new(Self {
                                    ebnf: None,
                                    chunk: ChunkP::Unit(symbol.clone())
                                }, span.clone()),
                                Self::simple_ident_spanned_with_args(&helper_ident, span.clone(), args.clone())
                            ]
                        }, span.clone()),
                        Rhs::empty_spanned(span.clone())
                    ]
                ), span.to_owned()));
                vec![ // A·x·y -> A A·x·y·p;
                    SpannableNode::new(Rhs {
                        weight: None,
                        chunks: vec![
                            SpannableNode::new(Self {
                                ebnf: None,
                                chunk: ChunkP::Unit(symbol)
                            }, span.clone()),
                            Self::simple_ident_spanned_with_args(&helper_ident, span.clone(), args.clone())
                        ]
                    }, span.clone())
                ]
            },
            EbnfType::Many => { // *
                vec![ // A·x·y -> A A·x·y | ;
                    SpannableNode::new(Rhs {
                        weight: None,
                        chunks: vec![
                            SpannableNode::new(Self {
                                ebnf: None,
                                chunk: ChunkP::Unit(symbol)
                            }, span.clone()),
                            Self::simple_ident_spanned_with_args(&ident, span.clone(), args.clone())
                        ]
                    }, span.clone()),
                    Rhs::empty_spanned(span.clone())
                ]
            },
            EbnfType::Maybe => { // ?
                vec![ // A·x·y -> A | ;
                    SpannableNode::new(Rhs {
                        weight: None,
                        chunks: vec![
                            SpannableNode::new(Self {
                                ebnf: None,
                                chunk: ChunkP::Unit(symbol)
                            }, span.clone())
                        ]
                    }, span.clone()),
                    Rhs::empty_spanned(span.clone())
                ]
            },
        };
        rules.push(SpannableNode::new(rule_func(ident, args, chunks), span.to_owned())); // S -> A·x·y;
	}

    /// Rewrite according to the EBNF operator.
    ///
    /// If there is no EBNF operator, we do nothing.
    /// If there is, we extract the chunk that it operates on and rewrite it as a new, separate rule. We do this recursively.
    /// At the end, all EBNF operators are replaced by references to the new rules and we return a list of new rules to add to the grammar.
	pub(crate) fn rewrite(
            &mut self, 
            ident: String, // The identifier for the new rule.
            span: &Span,  // The span information for this rule
            rule_func: RuleConstructor, // The constructor for the type of rule
            depth: usize, // Recursive depth
            state: &mut FirstPassState
        ) -> FirstPassResult<(Vec<SpannableNode<Rule>>, ReqAttributes)> {
		let mut rules = Vec::new();
        let required_args = if let Some(e) = std::mem::take(&mut self.ebnf) { // There is an ebnf operator
            match self {
                Self { chunk: ChunkP::Unit(u), ..} => { // This is a singular chunk
                    Self::rewrite_unit_ebnf(u, &e, rule_func, depth, ident, span, &mut rules)
                },
                Self { chunk: ChunkP::Group(g), ..} => { // This is a group of chunks in ()
                    let real_g = std::mem::take(g);
                    self.rewrite_group(real_g, Some(&e), rule_func, depth, ident, span, state, &mut rules)?
                }
            }
        } else { // There is no ebnf operator
            match self {
                Self { chunk: ChunkP::Unit(u), ..} => {
                    Self::rewrite_unit_no_ebnf(u, depth)
                },
                Self { chunk: ChunkP::Group(g), ..} => {
                    let real_g = std::mem::take(g);
                    self.rewrite_group(real_g, None, rule_func, depth, ident, span, state, &mut rules)?
                }
            } 
        };
        Ok((rules, required_args))
	}

    /// Rewrite a singular chunk with an EBNF operator (I.E. `S -> A?`)
    fn rewrite_unit_ebnf(u: &mut SpannableNode<Symbol>, e: &EbnfType, rule_func: RuleConstructor, depth: usize, ident: String, span: &Span, rules: &mut Vec<SpannableNode<Rule>>) -> ReqAttributes {
        let calling_args = u.to_inner().calling_args(); // Get the calling args for this symbol
        let req_args = u.get_req_attributes(); // Get all the required attributes for this symbol
        // We modify the rule in place by taking out the symbol (in this case `A`) and replacing
        // it with that of the new helper rule that does the EBNF step.
        let mut yanked = std::mem::replace(u,
            SpannableNode::new(
                Symbol::NonTerminal(
                    SpannableNode::new(Ident::Unknown(ident.clone()), span.clone()), 
                    CallingArgs::new(),
                ), 
                span.clone()
            )
        );
        let symbol_args = yanked.to_inner_mut().rewrite().into_iter().collect();
        if let Symbol::NonTerminal(_, v) = u.to_inner_mut() { // We get a reference to the vector of calling arguments for the new symbol (which is always a NT).
            let main_args = if depth > 0 { // Unless this is our first pass
                let mut as_synth = CallingArgs::with_capacity(calling_args.len()); // Convert all the calling arguments to be synthesized attributes.
                for i in &calling_args {
                    let s = i.to_inner().extract_string();
                    as_synth.push(SpannableNode::new(Ident::Synth(s.to_string()), i.span()));
                }
                as_synth
            } else {
                calling_args // Otherwise, just use our old calling args
            };
            let _ = std::mem::replace(v, main_args); // And insert them into the vector.
        }
        Self::rewrite_ebnf(e, ident, symbol_args, yanked, span, rule_func, rules); // Construct the helper rules
        req_args // Return all the attributes that are required for the original symbol.
    }

    fn rewrite_unit_no_ebnf(u: &mut SpannableNode<Symbol>, depth: usize) -> ReqAttributes {
        let req = u.get_req_attributes(); // Simply get all the require attributes for this symbol
        if depth > 0 {
            u.to_inner_mut().rewrite(); // If this is a recursive call, rewrite calling attributes to synthesized.
        }
        req
    }

    #[allow(clippy::too_many_arguments)]
    fn rewrite_group(
            &mut self, 
            g: Vec<SpannableNode<Self>>, 
            e: Option<&EbnfType>, // Optionally, the type of Ebnf operator associated with this group
            rule_func: RuleConstructor, 
            depth: usize, 
            ident: String, 
            span: &Span, 
            state: &mut FirstPassState, 
            rules: &mut Vec<SpannableNode<Rule>>
        ) -> FirstPassResult<ReqAttributes> {
        let new_ident = if e.is_some() { // We have an ebnf operator, construct a new ident 
            format!("{ident}··{depth}")
        } else {
            ident.clone()
        };
        let mut new_rule = SpannableNode::new( // Create a new rule which is like this grouped chunk, but has instead the group as it's chunks.
            rule_func(
                new_ident.clone(), 
                CallingArgs::new(), // Should be as synthesized
                vec![
                    Rhs { weight: None, chunks: g }.into_spanned(span.clone())
                ]
            ), 
            span.clone()
        );
        let (new_rules, req_args) = new_rule.rewrite(depth+1, state)?; // Do a recursive rewrite of this new rule.
        let mut as_synth = CallingArgs::with_capacity(req_args.len()); // Create a list of all the required attributes for this new rule, but synthesized.
        for i in &req_args {
            let s = i.to_inner().extract_string();
            as_synth.push(SpannableNode::new(Ident::Synth(s.to_string()), i.span())); 
        }
        match new_rule.to_inner_mut() {
            Rule::Analytic(_, v, _) | Rule::Generate(_, v, _) => {
                *v = as_synth.clone();
            },
            _ => {}
        }
        let symbol_args = if depth > 0 { // If this is a recusrive call, the args for the symbol should be synthesized
            as_synth.clone()
        } else { 
            req_args.iter().cloned().collect() // Otherwise they should be as the original.
        };
        self.chunk = ChunkP::Unit(Symbol::simple_ident_spanned_with_args(&ident, span.clone(), symbol_args)); // Should be as expected
        if let Some(eb) = e {
            let symbol = Symbol::simple_ident_spanned_with_args(&new_ident, span.clone(), as_synth.clone()); // Should be as synthesized
            Self::rewrite_ebnf(eb, ident, as_synth, symbol, span, rule_func, rules); // Should be as synthesized
        }
        rules.push(new_rule);
        rules.extend(new_rules);
        Ok(req_args)
    }

    /// Check if this chunk is a terminal
    // pub(crate) fn is_terminal(&self) -> bool {
    //     match &self.chunk {
    //         ChunkP::Unit(s) => s.node.is_terminal(),
    //         ChunkP::Group(_) => false,
    //     }
    // }

    /// Automatically create a `Chunk` that is just a terminal. See [`Symbol::simple_terminal`].
    #[must_use] 
    pub fn simple_terminal(term: &str) -> Self {
        Self { 
            ebnf: None, 
            chunk: ChunkP::Unit(Symbol::simple_terminal(term).into()) 
        }
    }

    /// Automatically create a `Chunk` that is just an ident. See [`Symbol::simple_ident`].
    #[must_use] 
    pub fn simple_ident(ident: &str) -> Self {
        Self {
            ebnf: None,
            chunk: ChunkP::Unit(Symbol::simple_ident(ident).into())
        }
    }

    /// Automatically create a spanned `Chunk` that is just an ident.
    // pub(crate) fn simple_ident_spanned(ident: &str, span: Span) -> SpannableNode<Self> {
    //     Self::simple_ident_spanned_with_args(ident, span, Vec::new())
    // }

    pub(crate) fn simple_ident_spanned_with_args(ident: &str, span: Span, args: Vec<SpannableNode<Ident>>) -> SpannableNode<Self> {
        SpannableNode::new(Self {
            ebnf: None,
            chunk: ChunkP::Unit(Symbol::simple_ident_spanned_with_args(ident, span.clone(), args))
        }, span)
    }

    /// Automatically create an empty chunk.
    #[must_use] 
    pub fn empty() -> Self {
        Self {
            ebnf: None,
            chunk: ChunkP::Unit(Symbol::Epsilon.into())
        }
    }

    /// Automatically create a spanned empty chunk.
    pub(crate) fn empty_spanned(span: Span) -> SpannableNode<Self> {
        SpannableNode::new(
            Self {
                ebnf: None,
                chunk: ChunkP::Unit(SpannableNode::new(Symbol::Epsilon, span.clone()))
            }, span
        )
    }

    /// Extract all the symbols that are in this chunk.
    #[must_use] 
    pub fn extract_symbols(self) -> Vec<SpannableNode<Symbol>> {
        match self {
            Self {chunk: ChunkP::Unit(s), ..} => vec![s],
            Self {chunk: ChunkP::Group(g), ..} => {
                let mut ret = Vec::with_capacity(g.len());
                for chunk in g {
                    ret.extend(chunk.into_inner().extract_symbols());
                }
                ret
            }
        }
    }
}

impl Parse for Chunk {
	fn parse(lexer: &mut LexerBridge) -> ParseResult<Self> { 
		let chunk = match lexer.peek_result()? {
			Tokens::ProductionToken(Productions::LPar) => {
				let mut ret = Vec::new();
				lexer.next();
				while lexer.peek_result()? != &Tokens::ProductionToken(Productions::RPar) {
					ret.push(SpannableNode::parse(lexer)?);
				}
				lexer.next();
				ChunkP::Group(ret)
			},
			Tokens::ProductionToken(Productions::Semi) => { // Empty rule
				return Ok(Self::empty())
			}
			_ => {
				ChunkP::Unit(SpannableNode::parse(lexer)?)
			}
		};
		if let Tokens::ProductionToken(Productions::Ebnf(_)) = lexer.peek_result()? {
			if let Tokens::ProductionToken(Productions::Ebnf(x)) = lexer.next_result()? {
				Ok(Self {chunk, ebnf: Some(x)})
			} else { 
    			Err(WagParseError::Fatal((lexer.span(), "Something went terribly wrong. Unwrapped non-ebnf when should have unwrapped ebnf".to_string())))  
    		}
		} else {
			Ok(Self {chunk, ebnf: None})
		}
	}
}

impl GetReqAttributes for Chunk {
    fn get_req_attributes(&self) -> ReqAttributes {
        match &self.chunk {
            ChunkP::Unit(s) => s.get_req_attributes(),
            ChunkP::Group(g) => {
                let mut req = ReqAttributes::new();
                for c in g {
                    req.extend(c.get_req_attributes());
                }
                req
            },
        }
    }
}

use itertools::Itertools;
impl Display for Chunk {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(ebnf) = &self.ebnf {
            write!(f, "{}{ebnf}", self.chunk)
        } else {
            write!(f, "{}", self.chunk)
        }
    }
}

impl Display for ChunkP {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Unit(s) => write!(f, "{s}"),
            Self::Group(g) => write!(f, "({})", g.iter().join(" ")),
        }
    }
}