forked from melvinzhang/bit-scheme
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalexpander.scm
2286 lines (2065 loc) · 85.4 KB
/
alexpander.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;; alexpander.scm: a macro expander for scheme.
;; $Id: alexpander.scm,v 1.65 2007/11/05 02:50:34 al Exp $
;; Copyright 2002-2004,2006,2007 Al Petrofsky <[email protected]>
;; LICENSING (3-clause BSD or GNU GPL 2 and up)
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions
;; are met:
;;
;; Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in
;; the documentation and/or other materials provided with the
;; distribution.
;;
;; Neither the name of the author nor the names of its contributors
;; may be used to endorse or promote products derived from this
;; software without specific prior written permission.
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;; HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;; BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
;; OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
;; AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
;; WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;; POSSIBILITY OF SUCH DAMAGE.
;; Alternatively, you may redistribute, use, or modify this software
;; according to the terms of the GNU General Public License as
;; published by the Free Software Foundation (fsf.org); either version
;; 2, or (at your option) any later version.
;; INTRODUCTION:
;; This file implements a macro-expander for r5rs scheme (plus some
;; interesting extensions). There is no magic here to hook this into
;; your native eval system: this is a simple data-in, data-out program
;; that takes a macro-using program represented as scheme data and
;; produces an equivalent macro-free program represented as scheme
;; data.
;; This is mostly intended as a demonstration. Although it certainly
;; could be useful for adding macros to a simple scheme system that
;; lacks any macros, it may not be feasible to get it to interact
;; properly with a low-level macro system or a module system.
;; The expander is written in portable r5rs scheme, except for one use
;; of the pretty-print procedure which you can easily comment out.
;; To try it out, just load the file and execute (alexpander-repl).
;; Skip to the "BASIC USAGE" section for more information.
;; To find the latest version of this program, try here:
;; http://petrofsky.org/src/alexpander.scm
;;
;; To find older versions or the log messages between versions, try here:
;; http://petrofsky.org/src/RCS/alexpander.scm,v
;; If you are wondering what "r5rs scheme" is, see:
;; Richard Kelsey, William Clinger, and Jonathan Rees, "Revised^5
;; report on the algorithmic language Scheme", Higher-Order and
;; Symbolic Computation, 11(1):7-105, 1998. Available at:
;; PDF: http://www-swiss.ai.mit.edu/~jaffer/r5rs.pdf
;; LaTeX source: ftp://swiss.csail.mit.edu/pub/scheme-reports/r5rs.tar.gz
;; EXTENSIONS:
;; The expander supports all the features of the r5rs macro system,
;; plus several extensions in the way syntaxes can be specified and
;; used, which are best summarized in BNF:
;; Modified r5rs productions:
;; <expression> ---> <variable> | <literal> | <procedure call>
;; | <lambda expression> | <conditional> | <assignment>
;; | <derived expression> | <macro use> | <macro block>
;; | <keyword>
;;
;; <syntax definition> ---> (define-syntax <keyword> <syntax or expression>)
;; | (begin <syntax definition>*)
;; | <macro use>
;;
;; <syntax spec> ---> (<keyword> <syntax or expression>)
;;
;; <macro use> ---> (<syntax> <datum>*)
;;
;; <definition> ---> (define <variable> <expression>)
;; | (define (<variable> <def formals>) <body>)
;; | (define <expression>)
;; | (begin <definition>*)
;; | <macro use>
;; | <syntax definition>
;;
;; <command or definition> ---> <command> | <definition>
;; | (begin <command or definition>*)
;; | <top-level macro block>
;; | <macro use>
;; New productions:
;; <syntax or expression> ---> <syntax> | <expression>
;;
;; <syntax> ---> <transformer spec>
;; | <keyword>
;; | <macro use>
;; | <syntax macro block>
;;
;; <syntax macro block> ---> (<syntax-only block stuff> <syntax>)
;;
;; <top-level macro block>
;; ---> (<syntax-only block stuff> <command or definition>)
;;
;; <syntax-only block stuff>
;; ---> <let-or-letrec-syntax> (<syntax spec>*) <syntax definition>*
;;
;; <let-or-letrec-syntax> ---> let-syntax | letrec-syntax
;; These extensions all have the obvious meaning.
;; Okay, I'll elaborate on that a little bit. Consider the intializer
;; position of a syntax definition and the head position of a
;; list-format expression:
;; (define-syntax <keyword> <xxx>)
;; (<yyy> <foo>*)
;; In r5rs, <xxx> must be a transformer. <Yyy> may be an expression,
;; in which case the enclosing expression is taken to be a procedure
;; call and the <foo>s are the expressions for the operands, or <yyy>
;; may be a keyword bound to a syntax (a builtin or transformer), in
;; which case the <foo>s are processed according to that syntax.
;; The core generalization in our system is that both <xxx> and <yyy>
;; may be any type of expression or syntax. The four forms of syntax
;; allowed are: a transformer (as allowed in the <xxx> position in
;; r5rs), a keyword (as allowed in the <yyy> position in r5rs), a
;; macro use that expands into a syntax, and a macro block (let-syntax
;; or letrec-syntax) whose body is a syntax.
;; Some examples:
;;
;; ;; a macro with a local macro
;; (let-syntax ((foo (let-syntax ((bar (syntax-rules () ((bar x) (- x)))))
;; (syntax-rules () ((foo) (bar 2))))))
;; (foo))
;; => -2
;;
;; ;; an anonymous let transformer, used directly in a macro call.
;; ((syntax-rules ()
;; ((let ((var init) ...) . body)
;; ((lambda (var ...) . body)
;; init ...)))
;; ((x 1) (y 2))
;; (+ x y))
;; => 3
;;
;; ;; a keyword used to initialize a keyword
;; (let-syntax ((q quote)) (q x)) => x
;;
;; ;; Binding a keyword to an expression (which could also be thought
;; ;; of as creating a macro that is called without arguments).
;; (let ((n 0))
;; (let-syntax ((x (set! n (+ n 1))))
;; (begin x x x n)))
;; => 3
;;
;; (let-syntax ((x append)) ((x x))) => ()
;; Internal syntax definitions.
;; Internal syntax definitions are supported wherever they would make
;; sense (see the BNF), and they have the letrec-syntax semantics you
;; would expect. It is legal for the initializer of an internal
;; variable definition to use one of the internal syntax definitions
;; in the same body:
;; (let ()
;; (define x (y))
;; (define-syntax y (syntax-rules () ((y) 1)))
;; x)
;; => 1
;; It's also legal for internal syntax definitions to be mutually
;; recursive transformers, but it is an error for the expansion of a
;; syntax definition's initializer to require the result of another
;; initializer:
;; (let ()
;; (define-syntax m1 (syntax-rules () ((m1) #f) ((m1 . args) (m2 . args))))
;; (define-syntax m2 (syntax-rules () ((m2 arg . args) (m1 . args))))
;; (m1 foo bar baz))
;; => #f
;; (let ()
;; (define-syntax simple-transformer
;; (syntax-rules ()
;; ((simple-transformer pattern template)
;; (syntax-rules () (pattern template)))))
;; (define-syntax m (simple-transformer (m x) (- x)))
;; (m 1))
;; => error ("Premature use of keyword bound by an internal define-syntax")
;; (let ()
;; (define-syntax simple-transformer
;; (syntax-rules ()
;; ((simple-transformer pattern template)
;; (syntax-rules () (pattern template)))))
;; (let ()
;; (define-syntax m (simple-transformer (m x) (- x)))
;; (m 1)))
;; => -1
;; Top-level macro blocks.
;; At the top level, if a macro block (i.e., a let-syntax or
;; letrec-syntax form) has only one body element, or if all of the
;; body elements before the last one are internal syntax definitions,
;; then the last body element need not be an expression (as would be
;; required in r5rs). Instead, it may be anything allowed at top
;; level: an expression, a definition, a begin sequence of top-level
;; forms, or another macro block containing a top-level form.
;; (let-syntax ((- quote))
;; (define x (- 1)))
;;
;; (list x (- 1))
;; => (1 -1)
;; Note that, unlike the similar extension in Chez scheme 6.0, this is
;; still r5rs-compatible, because we only treat definitions within the
;; last body element as top-level definitions (and r5rs does not allow
;; internal definitions within a body's last element, even if it is a
;; begin form):
;; (define x 1)
;; (define (f) x)
;; (let-syntax ()
;; (define x 2)
;; (f))
;; => 1, in r5rs and alexpander, but 2 in Chez scheme
;; (define x 1)
;; (define (f) x)
;; (let-syntax ()
;; (begin
;; (define x 2)
;; (f)))
;; => 2, in alexpander and in Chez scheme, but an error in r5rs.
;; Syntax-rules ellipsis
;; Per SRFI-46, syntax-rules transformers can specify the
;; identifier to be used as the ellipsis (such a specification is
;; treated as a hygienic binding), and a list pattern may contain
;; subpatterns after an ellipsis as well as before it:
;; <transformer spec> ---> (syntax-rules (<identifier>*) <syntax rule>*)
;; | (syntax-rules <ellipsis> (<identifier>*) <syntax rule>*)
;;
;; <syntax rule> ---> (<pattern> <template>)
;;
;; <pattern> ---> <pattern identifier>
;; | (<pattern>*)
;; | (<pattern>+ . <pattern>)
;; | (<pattern>* <pattern> <ellipsis> <pattern>*)
;; | #(<pattern>*)
;; | #(<pattern>* <pattern> <ellipsis> <pattern>*)
;; | <pattern datum>
;;
;; <pattern identifier> ---> <identifier>
;;
;; <ellipsis> ---> <identifier>
;; Expressions among internal definitions.
;; A definition of the form (define <expression>) causes the
;; expression to be evaluated at the conclusion of any enclosing set
;; of internal definitons. That is, at top level, (define
;; <expression>) is equivalent to just plain <expression>. As for
;; internal definitions, the following are equivalent:
;; (let ()
;; (define v1 <init1>)
;; (define <expr1>)
;; (define <expr2>)
;; (define v2 <init2>)
;; (define <expr3>)
;; (begin
;; <expr4>
;; <expr5>))
;;
;; (let ()
;; (define v1 <init1>)
;; (define v2 <init2>)
;; (begin
;; <expr1>
;; <expr2>
;; <expr3>
;; <expr4>
;; <expr5>))
;; (Yes, it would probably be better to have a separate builtin for
;; this rather than to overload define.)
;; This feature makes it possible to implement a define-values that
;; works properly both at top-level and among internal definitions:
;; (define define-values-temp #f)
;;
;; (define-syntax define-values
;; (syntax-rules ()
;; ((define-values (var ...) init)
;; (begin
;; (define define-values-temp (call-with-values (lambda () init) list))
;; (define var #f) ...
;; (define
;; (set!-values (var ...) (apply values define-values-temp)))))))
;; (Set!-values is implementable using just r5rs features and is left
;; as an exercise.)
;; When used among internal definitions, the definition of
;; define-values-temp in define-values's output creates a local
;; binding, and thus the top-level binding of define-values-temp is
;; irrelevant. When used at top-level, the definition of
;; define-values-temp in the output does not create a binding.
;; Instead, it mutates the top-level binding of define-values-temp.
;; Thus, all top-level uses of define-values share a single temp
;; variable. For internal-definition-level uses of define-values, a
;; single shared temp would not be sufficient, but things work out
;; okay because hygienic renaming causes each such use to create a
;; distinct temp variable.
;; The version below works the same way, but hides from the top-level
;; environment the temp that is shared by top-level uses of
;; define-values. For a bit of tutorial and rationale about this
;; technique, see usenet article
;; <[email protected]>:
;; (define-syntax define-values
;; (let-syntax ((temp (syntax-rules ())))
;; (syntax-rules ()
;; ((define-values (var ...) init)
;; (begin
;; (define temp (call-with-values (lambda () init) list))
;; (define var #f) ...
;; (define (set!-values (var ...) (apply values temp))))))))
;; Improved nested unquote-splicing.
;; Quasiquote is extended to make commas and comma-ats distributive
;; over a nested comma-at, as in Common Lisp's backquote. See my
;; 2004-09-03 usenet article <[email protected]>,
;; Bawden's 1999 quasiquotation paper, and Appendix C of Steele's
;; "Common Lisp the Language 2nd edition".
;; <splicing unquotation 1> ---> ,@<qq template 0>
;; | (unquote-splicing <qq template 0>)
;;
;; <splicing unquotation D> ---> ,@<qq template D-1>
;; | ,<splicing unquotaion D-1>
;; | ,@<splicing unquotaion D-1>
;; | (unquote-splicing <qq template D-1>)
;; | (unquote <splicing unquotaion D-1>)
;; | (unquote-splicing <splicing unquotaion D-1>)
;; When a comma at-sign and the expression that follows it are being
;; replaced by the elements of the list that resulted from the
;; expression's evaluation, any sequence of commas and comma at-signs
;; that immediately preceded the comma at-sign is also removed and is
;; added to the front of each of the replacements.
;; (let ((x '(a b c))) ``(,,x ,@,x ,,@x ,@,@x))
;; => `(,(a b c) ,@(a b c) ,a ,b ,c ,@a ,@b ,@c)
;;
;; ``(,,@'() ,@,@(list))
;; => `()
;;
;; `````(a ,(b c ,@,,@,@(list 'a 'b 'c)))
;; => ````(a ,(b c ,@,,@a ,@,,@b ,@,,@c))
;;
;; (let ((vars '(x y)))
;; (eval `(let ((x '(1 2)) (y '(3 4)))
;; `(foo ,@,@vars))
;; (null-environment 5)))
;; => (foo 1 2 3 4)
;; BASIC USAGE:
;; There are four supported ways to use this:
;; 1. (alexpander-repl)
;; This starts a read-expand-print-loop. Type in a program and
;; see its expansion as you go.
;;
;; 2. (expand-program list-of-the-top-level-forms-of-a-program)
;; Returns a list of the top-level forms of an equivalent
;; macro-free program.
;;
;; 3. (expand-top-level-forms! forms mstore)
;; Returns some macro-expanded forms and mutates mstore.
;; To use this, first create an initial mutable store with
;; (null-mstore). Then you can pass a program in piecemeal, with
;; the effects of top-level define-syntaxes saved in mstore
;; between calls to expand-top-level-forms!.
;;
;; 4. (expand-top-level-forms forms store loc-n k)
;; The purely-functional interface.
;; This returns by making a tail call to k:
;; (k expanded-forms new-store new-loc-n)
;; Use null-store and null-loc-n for store and loc-n arguments
;; when calling expand-top-level-forms with the first forms in a
;; program.
;;
;; For options 3 and 4, you need to prepend null-output to the
;; resulting program. Null-output contains some definitions like
;; (define _eqv?_7 eqv?), which create alternate names for some of the
;; builtin procedures. These names are used by the standard case and
;; quasiquote macros so that they can keep working even if you
;; redefine one of the standard procedures.
;; The output programs use a small subset of the r5rs syntax, namely:
;; BEGIN, DEFINE, DELAY, IF, LAMBDA, LETREC, QUOTE, AND SET!.
;; Furthermore, begin is only used for expressions; lambdas and
;; letrecs always have a single body expression and no internal
;; definitions; and defines are always of the simple (define
;; <variable> <expression>) form. If you want even simpler output,
;; with no letrecs, see expand-program-to-simple.
;; Any uses or definitions in the original program of a top-level
;; variable whose name begins with "_", or whose name is one of the
;; eight primitives in the output language, will be renamed. This
;; will only cause a problem if the program is trying to use some
;; nonstandard library variable that starts with "_". That is, even
;; though some of a program's top-level variable names may get
;; changed, any r5rs-conformant program will still be translated to an
;; equivalent macro-free r5rs program.
;; INTERNALS
;; [NOTE: this documentation is certainly not complete, and it kind of
;; dissolves after a few pages from verbose paragraphs into cryptic
;; sentence fragments. Nonetheless, it might be enough to help
;; someone figure out the code.]
;; ENVIRONMENTS AND STORES
;; The two principal data structures are the environment and the
;; store.
;; These work similarly to the runtime environment and store described
;; in r5rs: in both that system and in ours, to determine the meaning
;; of an identifier, we lookup which location the environment
;; associates with the identifier, and then check what value the store
;; associates with that location.
;; In the runtime system, the identifiers mapped by the environment
;; are all variables, and the values in the store are the scheme
;; values the variables currently hold. Environments may be locally
;; extended by LAMBDA to map some identifiers to new locations that
;; initially hold the values passed to the procedure. Environments
;; may also be locally extended by internal DEFINE (a.k.a LETREC) to
;; map some identifiers to new locations that are empty and illegal to
;; access or SET! until the evaluation of all the initializers has
;; completed (at which time the results are stored into the
;; locations). The store is modified when a SET! or top-level DEFINE
;; is evaluated, or when a set of internal DEFINE initializers'
;; evaluations completes, but environments are immutable. The static
;; top-level environment maps every variable name to some location,
;; although most of these locations are illegal to access until the
;; evaluation of the initializer of the first top-level DEFINE of the
;; variable has completed. (The exceptions are the locations to which
;; the standard procedure names are bound: these locations may be
;; accessed at any time, but they may not be SET! until after the
;; first top-level DEFINE of the procedure name.)
;; (R5rs actually does not completely specify how the top-level
;; environment works, and allows one to consider the top-level
;; environment to be dynamically extended, but the model I just
;; described fits within the r5rs parameters and plays well with our
;; macro system. To recap: the difference between SET! and top-level
;; DEFINE is not that top-level DEFINE is able to create a new
;; binding, rather, the difference is that top-level DEFINE is allowed
;; to store into any location and SET! is not always allowed to store
;; into some locations.)
;; In our syntactic system, a value in the store may be either a
;; syntax (a builtin or a macro transformer), a variable name, or the
;; expanded code for an expression. When we encounter a use of an
;; identifier, we go through the environment and the store to fetch
;; its value. If the value is a variable name, we emit that variable
;; name. If the value is some code, we emit that code. If the value
;; is a syntax, we proceed according to the rules of that syntax. As
;; in the runtime system, environments are immutable and the static
;; top-level environment is infinite. Environments may be locally
;; extended by LAMBDA or internal DEFINE to map some identifiers to
;; new locations that hold variable names. Environments may also be
;; extended by LET-SYNTAX to map some identifiers to new locations
;; that initially hold the syntaxes and/or code resulting from the
;; expansion of the initializers. Lastly, environments may be
;; extended by internal DEFINE-SYNTAX (a.k.a LETREC-SYNTAX) to map
;; some identifiers to new locations that are empty and illegal to
;; access until the expansion of their initializers has completed (at
;; which time the resulting syntaxes and/or code are stored into the
;; locations). The store is modified by top-level DEFINE and
;; DEFINE-SYNTAX, and when a set of internal DEFINE-SYNTAX
;; initializers' expansions completes. The store is not altered by a
;; SET!, because a SET! does not change the fact that the identifier
;; is a variable: from our perspective a SET! of a variable is simply
;; a use of the variable. A top-level DEFINE only alters the store if
;; an identifier whose location previously held a syntax is now being
;; defined as a variable.
;; The static top-level environment maps every name to some location.
;; Initially, the locations to which the environment maps the names of
;; the ten builtins (BEGIN DEFINE DEFINE-SYNTAX IF LAMBDA QUOTE SET!
;; DELAY LET-SYNTAX SYNTAX-RULES) hold as their values those builtin
;; syntaxes. All other names are bound to locations that hold the
;; corresponding top-level variable name.
;; I said the top-level environment contains a binding for "every
;; name" rather than for "every identifier", because the new
;; identifiers created by a syntax-rules macro expansion are given
;; numbers rather than names, and the top-level environment has no
;; bindings for these. If such an identifier is used in an
;; environment that does not bind it to any location, then the
;; location to which the template literal was bound in the environment
;; of the macro is used instead. (To be prepared for such a
;; contingency, this location is stored along with the numeric id in
;; the "renamed-sid" (see below) that a macro expansion inserts into
;; the code.)
;; REPRESENTATION OF ENVIRONMENTS AND STORES
;; An environment is represented by an alist mapping ids to local
;; (non-top-level) locations. All environments are derived from the
;; top-level environment, so any symbolic id not in the alist is
;; implicitly mapped to the corresponding top-level location.
;; An id (identifier) is what we bind to a location in an environment.
;; Original ids are the symbols directly occuring in the source code.
;; Renamed ids are created by macro expansions and are represented by
;; integers.
;; id: original-id | renamed-id
;; original-id: symbol
;; renamed-id: integer
;; The static top-level environment maps every symbol to a location.
;; For simplicity, each of those locations is represented by the
;; symbol that is bound to it. All other locations (those created by
;; lambda, let-syntax, and internal definitions) are represented by
;; integers.
;; env: ((id . local-location) ...)
;; store: ((location . val) ...)
;; location: toplevel-location | local-location ;; a.k.a. symloc and intloc.
;; toplevel-location: symbol
;; local-location: integer
;; val: variable | syntax | code
;; variable: #(toplevel-location) | #(symbol local-location)
;; code: (output) ; output is the expanded code for an expression.
;; syntax: builtin | transformer
;; builtin: symbol
;; transformer: (synrules env)
;; synrules: the unaltered sexp of the syntax-rules form.
;; REPRESENTATION OF THE CODE UNDERGOING EXPANSION (SEXPS).
;; Any variable named SEXP in the expander code holds a representation
;; of some code undergoing expansion. It mostly looks like the
;; ordinary representation of scheme code, but it may contain some
;; identifiers that are encoded as two- or three-element vectors
;; called renamed-sids. Any actual vector in the code will be
;; represented as a one-element vector whose element is a list of the
;; actual elements, i.e., each vector #(elt ...) is mapped to #((elt
;; ...)), so that we can distinguish these vectors from renamed-sids.
;; In contrast, a variable named OUTPUT is a bit of almost-finished
;; code. In this format, symbols and vectors within a quote
;; expression are represented normally. All variable names are
;; represented as vectors of the form #(symbol) or #(symbol integer).
;; These vectors are converted to suitable, non-clashing symbols by
;; the symbolize function, which is the final step of expansion.
;; A sid is the representation of an id within a sexp.
;; sid: original-id | renamed-sid
;; A renamed-sid includes the id's original name, which we will need
;; if the id gets used in a QUOTE expression. The renamed-sid also
;; includes the location of the local binding (if any) of the template
;; literal that created the id: this is the location to use if the id
;; gets used freely (i.e., in an environment with no binding for it).
;; renamed-sid: #(original-id renamed-id)
;; | #(original-id renamed-id local-location)
;; Procedures that take a SEXP argument usually also take an ID-N
;; argument, which is the next higher number after the largest
;; renamed-id that occurs in the SEXP argument. (This is to enable
;; adding new ids without conflict.)
;;
;; Similarly, a STORE argument is usually accompanied by a LOC-N
;; argument, which is the next higher number after the largest
;; local-location in the STORE argument.
;; SUMMARY OF MAJOR FUNCTIONS:
;; (lookup-sid sid env) => location
;; (lookup-location location store) => val | #f ;; #f means letrec violation.
;; (lookup2 sid env store) => val ;; lookup-sid + lookup-location + fail if #f.
;; (extend-env env id location) => env
;; (extend-store store intloc val) => store
;; (substitute-in-store store loc val) => store
;; (compile-syntax-rules synrules env) => transformer
;; (apply-transformer trans sexp id-n env k) => (k sexp id-n)
;; (expand-any sexp id-n env store loc-n lsd? ek sk dk bk)
;; => (ek output)
;; | (sk syntax sexp store loc-n)
;; | (dk builtin sexp id-n env store loc-n)
;; | (bk sexp id-n env store loc-n)
;; (expand-expr sexp id-n env store loc-n) => output
;; (expand-val sexp id-n env store loc-n k) => (k val store loc-n)
;; (expand-top-level-sexps sexps store loc-n k)
;; => (k outputs store loc-n)
;; (expand-body sexps id-n env store loc-n lsd? ek sk dk bk)
;; => same as expand-any
;; (expand-syntax-bindings bindings id-n syntax-env ienv store loc-n k)
;; => (k store loc-n)
(define (sid? sexp) (or (symbol? sexp) (renamed-sid? sexp)))
(define (renamed-sid? sexp) (and (vector? sexp) (< 1 (vector-length sexp))))
(define (svector? sexp) (and (vector? sexp) (= 1 (vector-length sexp))))
(define (svector->list sexp) (vector-ref sexp 0))
(define (list->svector l) (vector l))
(define (make-sid name renamed-id location)
(if (eq? name location)
(vector name renamed-id)
(vector name renamed-id location)))
(define (sid-name sid) (if (symbol? sid) sid (vector-ref sid 0)))
(define (sid-id sid) (if (symbol? sid) sid (vector-ref sid 1)))
(define (sid-location sid)
(if (symbol? sid) sid (vector-ref sid (if (= 2 (vector-length sid)) 0 2))))
(define (list1? x) (and (pair? x) (null? (cdr x))))
(define (list2? x) (and (pair? x) (list1? (cdr x))))
;; Map-vecs does a deep map of x, replacing any vector v with (f v).
;; We assume that f never returns #f.
;; If a subpart contains no vectors, we don't waste space copying it.
;; (Yes, this is grossly premature optimization.)
(define (map-vecs f x)
;; mv2 returns #f if there are no vectors in x.
(define (mv2 x)
(if (vector? x)
(f x)
(and (pair? x)
(let ((a (car x)) (b (cdr x)))
(let ((a-mapped (mv2 a)))
(if a-mapped
(cons a-mapped (mv b))
(let ((b-mapped (mv2 b)))
(and b-mapped (cons a b-mapped)))))))))
(define (mv x) (or (mv2 x) x))
(mv x))
(define (wrap-vec v) (list->svector (wrap-vecs (vector->list v))))
(define (wrap-vecs input) (map-vecs wrap-vec input))
(define (unwrap-vec v-sexp)
(if (= 1 (vector-length v-sexp))
(list->vector (unwrap-vecs (svector->list v-sexp)))
(vector-ref v-sexp 0)))
(define (unwrap-vecs sexp) (map-vecs unwrap-vec sexp))
;; The store maps locations to vals.
;; vals are variables, syntaxes, or code.
(define (make-code output) (list output))
(define (make-builtin name) name)
(define (make-transformer synrules env) (list synrules env))
(define (var? val) (vector? val))
(define (code? val) (list1? val))
(define (code-output code) (car code))
(define (syntax? val) (or (symbol? val) (list2? val)))
(define (builtin? syntax) (symbol? syntax))
(define (builtin-name builtin) builtin)
(define (transformer? syntax) (not (builtin? syntax)))
(define (transformer-synrules trans) (car trans))
(define (transformer-env trans) (cadr trans))
(define (acons key val alist) (cons (cons key val) alist))
(define empty-env '())
(define empty-store '())
;; Lookup-sid looks up a sid in an environment.
;; If there is no binding in the environment, then:
;; 1. For an original-id, we return the like-named location, because
;; the static top-level environment maps every name to a location.
;; 2. For a renamed id, we return the location to which the template
;; literal that created it was bound.
(define (lookup-sid sid env)
(cond ((assv (sid-id sid) env) => cdr)
;; This works for both cases 1 and 2 above.
(else (sid-location sid))))
;; Lookup-location looks up a location in the store.
;; If there is no value explictly listed in the store, then:
;; 1. For a top-level (named) location, return a top-level variable.
;; 2. For a local location, return #f. This can only happen for a
;; location allocated by letrec-syntax or internal define-syntax
;; and used before it is initialized,
;; e.g. (letrec-syntax ((x x)) 1).
(define (lookup-location location store)
(cond ((assv location store) => cdr)
((symbol? location) (symloc->var location))
(else #f)))
(define (lookup2 sid env store)
(or (lookup-location (lookup-sid sid env) store)
(error (string-append "Premature use of keyword bound by letrec-syntax"
" (or an internal define-syntax): ")
sid)))
(define (extend-env env id location) (acons id location env))
(define (extend-store store loc val) (acons loc val store))
;; Extend-store just adds to the front of the alist, whereas
;; substitute-in-store actually bothers to remove the old entry, and
;; to not add a new entry if it is just the default.
;; Substitute-in-store is only used by top-level define and
;; define-syntax. Because nothing is ever mutated, we could just use
;; extend-store all the time, but we are endeavoring to keep down the
;; size of the store to make it more easily printed and examined.
(define (substitute-in-store store loc val)
(let ((store (if (assv loc store)
(let loop ((store store))
(let ((p (car store)))
(if (eqv? loc (car p))
(cdr store)
(cons p (loop (cdr store))))))
store)))
(if (and (symbol? loc) (eq? val (symloc->var loc)))
store
(acons loc val store))))
(define (make-var1 name/loc)
(vector name/loc))
(define (make-var2 name loc)
(vector name loc))
(define (var-name var)
(vector-ref var 0))
(define (var-loc var)
(vector-ref var (- (vector-length var) 1)))
(define (symloc->var sym)
(make-var1 sym))
(define (intloc->var intloc sid)
(make-var2 (sid-name sid) intloc))
(define (loc->var loc sid)
(if (symbol? loc)
(symloc->var loc)
(intloc->var loc sid)))
(define (make-begin outputs)
(if (list1? outputs) (car outputs) (cons 'begin outputs)))
(define (make-letrec bindings expr)
(if (null? bindings) expr (list 'letrec bindings expr)))
(define (expand-lambda formals expr id-n env store loc-n)
;; (a b . c) => (a b c)
(define (flatten-dotted x)
(if (pair? x) (cons (car x) (flatten-dotted (cdr x))) (list x)))
;; (a b c) => (a b . c)
(define (dot-flattened x)
(if (null? (cdr x)) (car x) (cons (car x) (dot-flattened (cdr x)))))
(let* ((dotted? (not (list? formals)))
(flattened (if dotted? (flatten-dotted formals) formals)))
(define (check x)
(or (sid? x) (error "Non-identifier: " x " in lambda formals: " formals))
(if (member x (cdr (member x flattened)))
(error "Duplicate variable: " x " in lambda formals: " formals)))
(begin
(for-each check flattened)
(let loop ((formals flattened) (rvars '())
(env env) (store store) (loc-n loc-n))
(if (not (null? formals))
(let* ((var (intloc->var loc-n (car formals)))
(env (extend-env env (sid-id (car formals)) loc-n))
(store (extend-store store loc-n var)))
(loop (cdr formals) (cons var rvars) env store (+ 1 loc-n)))
(let* ((vars (reverse rvars))
(vars (if dotted? (dot-flattened vars) vars)))
(list vars (expand-expr expr id-n env store loc-n))))))))
(define (check-syntax-bindings bindings)
(or (list? bindings) (error "Non-list syntax bindings list: " bindings))
(for-each (lambda (b) (or (and (list2? b) (sid? (car b)))
(error "Malformed syntax binding: " b)))
bindings)
(do ((bs bindings (cdr bs)))
((null? bs))
(let ((dup (assoc (caar bs) (cdr bs))))
(if dup (error "Duplicate bindings for a keyword: "
(car bs) " and: " dup)))))
;; returns (k store loc-n)
(define (expand-syntax-bindings bindings id-n syntax-env ienv store loc-n k)
(let loop ((bs bindings) (vals '()) (store store) (loc-n loc-n))
(if (not (null? bs))
(expand-val (cadar bs) id-n syntax-env store loc-n
(lambda (val store loc-n)
(loop (cdr bs) (cons val vals) store loc-n)))
(let loop ((store store) (vals (reverse vals)) (bs bindings))
(if (not (null? vals))
(let* ((loc (lookup-sid (caar bs) ienv))
(store (extend-store store loc (car vals))))
(loop store (cdr vals) (cdr bs)))
(k store loc-n))))))
;; (expand-any sexp id-n env store loc-n lsd? ek sk dk bk)
;;
;; Ek, sk, dk, and bk are continuations for expressions, syntaxes,
;; definitions and begins:
;;
;; If sexp is an expression, returns (ek output).
;;
;; If sexp is a syntax, returns (sk syntax error-sexp store loc-n).
;; The error-sexp is just for use in error messages if the syntax is
;; subsequently misused. It is the sid that was bound to the
;; syntax, unless the syntax is an anonymous transformer, as in
;; ((syntax-rules () ((_ x) 'x)) foo), in which case the error-sexp
;; will be the entire syntax-rules form.
;;
;; If sexp is a definition, returns (dk builtin sexp id-n env store
;; loc-n), where builtin is define or define-syntax.
;;
;; If sexp is a begin, returns (bk sexp id-n env store loc-n).
;;
;; The car of the sexp passed to dk or bk is just for error reporting:
;; it is the sid that was bound to begin, define, or define-syntax.
;;
;; Expand-any signals an error if a malformed e, s, d, or b is
;; encountered. It also signals an error if ek, sk, dk, or bk is #f
;; and the corresponding thing is encountered; however, if a begin is
;; encountered and bk is #f, the begin is expanded as an expression
;; and passed to ek.
;;
;; lsd? == Let-Syntax around Definitions is okay. If lsd? is #f and a
;; let-syntax is encountered, it is assumed to start an expression or
;; syntax, so if ek and sk are #f an error will be signalled. lsd? is
;; only true at top-level. (Let-syntax around internal definitions is
;; just too semantically bizarre.)
(define (expand-any sexp id-n env store loc-n lsd? ek sk dk bk)
(define (get-k k sexp name)
(or k (error (string-append name " used in bad context: ")
sexp)))
(define (get-ek sexp) (get-k ek sexp "Expression"))
(define (get-sk sexp) (get-k sk sexp "Syntax"))
(define (get-dk sexp) (get-k dk sexp "Definition"))
(define (get-bk sexp) (get-k bk sexp "Begin"))
(let again ((sexp sexp) (id-n id-n) (store store) (loc-n loc-n))
(define (expand-subexpr sexp) (expand-expr sexp id-n env store loc-n))
(define (handle-syntax-use syntax head store loc-n)
(let* ((tail (cdr sexp)) (sexp (cons head tail)))
(if (transformer? syntax)
(apply-transformer syntax sexp id-n env
(lambda (sexp id-n) (again sexp id-n store loc-n)))
(let ((builtin (builtin-name syntax)) (len (length tail)))
(define (handle-macro-block)
(or ek sk lsd?
(error "Macro block used in bad context: " sexp))
(or (>= len 2) (error "Malformed macro block: " sexp))
(let ((bindings (car tail)) (body (cdr tail)))
(check-syntax-bindings bindings)
(let loop ((bs bindings) (loc-n loc-n) (ienv env))
(if (not (null? bs))
(loop (cdr bs) (+ loc-n 1)
(extend-env ienv (sid-id (caar bs)) loc-n))
(expand-syntax-bindings
bindings id-n env ienv store loc-n
(lambda (store loc-n)
(expand-body body id-n ienv store loc-n
lsd? ek sk
(and lsd? dk) (and lsd? bk))))))))
(define (handle-expr-builtin)
(define (expr-assert test)
(or test (error "Malformed " builtin " expression: " sexp)))
(cons builtin
(case builtin
((lambda)
(expr-assert (= len 2))
(expand-lambda (car tail) (cadr tail)
id-n env store loc-n))
((quote)
(expr-assert (= len 1))
(list (unwrap-vecs (car tail))))
((set!)
(expr-assert (and (= len 2) (sid? (car tail))))
(let ((var (lookup2 (car tail) env store)))
(or (var? var)
(error "Attempt to set a keyword: " sexp))
(list var (expand-subexpr (cadr tail)))))
((if)
(expr-assert (<= 2 len 3))
(map expand-subexpr tail))
((delay)
(expr-assert (= len 1))
(list (expand-subexpr (car tail)))))))
(case builtin
((let-syntax) (handle-macro-block))
((syntax-rules)
(if (< len 1) (error "Empty syntax-rules form: " sexp))
(let ((syn (compile-syntax-rules sexp env)))
((get-sk sexp) syn sexp store loc-n)))
((begin)
(or ek (get-bk sexp))
(cond (bk (bk sexp id-n env store loc-n))
((null? tail) (error "Empty begin expression: " sexp))
(else (ek (make-begin (map expand-subexpr tail))))))
((define define-syntax)
(or (and (= 2 len) (sid? (car tail)))
(and (= 1 len) (eq? builtin 'define))
(error "Malformed definition: " sexp))
((get-dk sexp) builtin sexp id-n env store loc-n))
(else (get-ek sexp) (ek (handle-expr-builtin))))))))
(define (handle-combination output)
(ek (if (and (pair? output) (eq? 'lambda (car output))
(null? (cadr output)) (null? (cdr sexp)))
;; simplifies ((lambda () <expr>)) to <expr>
(caddr output)
(cons output (map expand-subexpr (cdr sexp))))))
;;(pretty-print `(expand-any/again ,sexp))
(cond ((sid? sexp)
(let ((val (lookup2 sexp env store)))
(if (syntax? val)
((get-sk sexp) val sexp store loc-n)
((get-ek sexp) (if (code? val) (code-output val) val)))))
((and (pair? sexp) (list? sexp))
(expand-any (car sexp) id-n env store loc-n #f
(and ek handle-combination) handle-syntax-use #f #f))
((or (number? sexp) (boolean? sexp) (string? sexp) (char? sexp))
((get-ek sexp) sexp))
(else (error (cond ((pair? sexp) "Improper list: ")
((null? sexp) "Empty list: ")
((vector? sexp) "Vector: ")
(else "Non-S-Expression: "))
sexp
" used as an expression, syntax, or definition.")))))
;; Expands an expression or syntax and returns (k val store loc-n).
(define (expand-val sexp id-n env store loc-n k)
(expand-any sexp id-n env store loc-n #f
(lambda (output) (k (make-code output) store loc-n))
(lambda (syn error-sexp store loc-n) (k syn store loc-n))
#f #f))
(define (expand-expr sexp id-n env store loc-n)
(expand-any sexp id-n env store loc-n #f (lambda (x) x) #f #f #f))
;; args and return are as in expand-any.
(define (expand-body sexps id-n env store loc-n lsd? ek sk dk bk)
;; Expand-def expands a definition or begin sequence, adds entries
;; to the vds and sds lists of variable and syntax definitons, adds
;; entries to the exprs list of expressions from (define <expr>)
;; forms, extends env, and returns (k vds sds exprs id-n env store
;; loc-n).
;; If sexp is an expression, we just return (dek output) instead.
(define (expand-def sexp vds sds exprs id-n env store loc-n k dek)
(define (dk builtin sexp id-n env store loc-n)
(or ek (eq? builtin 'define-syntax)
(error "Non-syntax definition in a syntax body: " sexp))