diff --git a/.gitignore b/.gitignore index 832ad04bcb..a96178bf48 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,15 @@ local/firmware/ *.lock !local/recipes/system/cub/source/Cargo.lock +# Editor backup files (emacs ~, vim .swp, vim .swo) +# Autotools regen produces these in source/ dirs. They +# are not part of the upstream source and get regenerated +# on the next `repo cook`. Prevents future accidental +# commits of these ephemeral files. +*~ +.*.swp +.*.swo + # Internal tooling .sisyphus/ TASK_COMPLETION_SUMMARY.md diff --git a/local/docs/C7-STATUS.md b/local/docs/C7-STATUS.md index 48d2412a48..705d2fcf2d 100644 --- a/local/docs/C7-STATUS.md +++ b/local/docs/C7-STATUS.md @@ -148,6 +148,12 @@ All 24 KF6 patches: | `4243beb4a` | test-edit-kf6-recipes: 11 unit tests | | `e3e1faece` | test-cookbook-apply-patches-e2e: 4 integration tests | | `2357758ef` | postmortem: mark C-7 complete, C-8 ready | +| `d5def6a67d` | docs: C7-STATUS.md | +| `ffbbf4935c` | C-7 cleanup: lint-recipe 13 → 4 errors (R2 build-time carveout) | +| `d2c982dc2a` | fix: remove broken patches = [...] refs | +| `f1802f6f2b` | qtbase: remove NO-OP seds (lint-recipe 1 → 1) | +| `a123bf1c5d` | sddm: 19 sed chains migrated (lint-recipe 1 → 0) | +| `a399e7da08` | cleanup: remove stale tracked files (1.3M lines) | ## What this enables @@ -169,6 +175,48 @@ All 24 KF6 patches: successful cook) don't fail with "patch already applied" — the helper detects and skips. +## Final lint state (post-C-7) + +`make lint-recipe` is **0 errors / 173 recipes clean** as of +`a123bf1c5d` (sddm migration) — the last remaining 2 R2 +errors (sddm 19 seds, qtbase 2 seds) were both addressed +in the lint cleanup commits `f1802f6f2b` (qtbase NO-OP +seds removed) and `a123bf1c5d` (sddm fully migrated). + +The 2 remaining R1 errors (redbear-sessiond, libwayland +referencing missing patch files) were fixed in `d2c982dc2a` +by removing the broken `patches = [...]` lines. + +The lint rule R2 was also refined in `ffbbf4935c` to +distinguish upstream-source seds (`${COOKBOOK_SOURCE}/`) +from build-time seds (`${COOKBOOK_STAGE}/`, +`${COOKBOOK_BUILD}/`, `${COOKBOOK_SYSROOT}/`). Build-time +seds are exempt because they're build-time adjustments to +staged artifacts, not upstream source edits. + +## Stale tracked files (commit `a399e7da08`) + +617 tracked files removed (1.3M lines), 0 lines added. +Categories of stale tracked files removed: + +- **5 broken self-referential symlinks** in + `local/recipes/drivers/{ehcid,ohcid,uhcid,usb-core}/` + and `local/recipes/tui/mc/mc` (created by the now-removed + apply-patches.sh symlink-overlay system). +- **2 broken absolute-path symlinks** in + `local/recipes/gpu/drivers/{linux-kpi,redox-driver-sys}/source` + (pointed to a different filesystem layout). +- **13 tracked `~` files** (emacs backups from autotools regen) + in autotools-generated source dirs. +- **12 tracked-but-missing upstream WIP recipes** + (596 files) in `recipes/wip/` that no longer exist on disk. +- **4 files in top-level `gparted-git/`** (orphan staging dir). +- **1 tracked blob conflict** at `recipes/gpu/drivers`. + +`.gitignore` was extended with `*~`, `.*.swp`, `.*.swo` +patterns to prevent future accidental commits of ephemeral +editor / autotools-regen files. + ## Next steps (not C-7 anymore) 1. **C-8**: Delete extracted `source/` trees (5.4 GB) and @@ -176,16 +224,15 @@ All 24 KF6 patches: being built. The `local/recipes/**/source/` and `local/recipes/**/source.tar` patterns are already in `.gitignore` so deleting them is safe; the cookbook re- - extracts on next fetch. + extracts on next fetch. **User note (2026-06-13): DO NOT + clean up unzipped sources — they may contain the user's + in-flight WIP build state.** This is deferred until the + user's WIP is committed or discarded. -2. **`make lint-recipe` cleanup**: 10 remaining errors for - non-KF6 recipes (bison, m4, rust-native, sddm, - qt6-wayland-smoke, libwayland, redbear-sessiond). Each - needs its own migration or in-tree fork decision. - -3. **Real cook verification**: cook one of the migrated +2. **Real cook verification**: cook one of the migrated recipes (e.g. `kf6-karchive`) end-to-end and verify `stage.pkgar` byte-identical to the inline-sed version. This proves the migration preserves the exact build artifact. Blocked on toolchain infrastructure issues - unrelated to C-7. + unrelated to C-7 (libtoolize path bug, missing libffi + source, libiconv autotools chain). diff --git a/local/recipes/dev/bison/source/doc/bison.info.bak b/local/recipes/dev/bison/source/doc/bison.info.bak deleted file mode 100644 index fea740eb9e..0000000000 --- a/local/recipes/dev/bison/source/doc/bison.info.bak +++ /dev/null @@ -1,16450 +0,0 @@ -This is bison.info, produced by makeinfo version 7.3 from bison.texi. - -This manual (15 May 2026) is for GNU Bison (version 3.8.2), the GNU -parser generator. - - Copyright © 1988-1993, 1995, 1998-2015, 2018-2021 Free Software -Foundation, Inc. - - Permission is granted to copy, distribute and/or modify this - document under the terms of the GNU Free Documentation License, - Version 1.3 or any later version published by the Free Software - Foundation; with no Invariant Sections, with the Front-Cover texts - being "A GNU Manual," and with the Back-Cover Texts as in (a) - below. A copy of the license is included in the section entitled - "GNU Free Documentation License." - - (a) The FSF's Back-Cover Text is: "You have the freedom to copy and - modify this GNU manual. Buying copies from the FSF supports it in - developing GNU and promoting software freedom." -INFO-DIR-SECTION Software development -START-INFO-DIR-ENTRY -* bison: (bison). GNU parser generator (Yacc replacement). -END-INFO-DIR-ENTRY - - -File: bison.info, Node: Top, Next: Introduction, Up: (dir) - -Bison -***** - -This manual (15 May 2026) is for GNU Bison (version 3.8.2), the GNU -parser generator. - - Copyright © 1988-1993, 1995, 1998-2015, 2018-2021 Free Software -Foundation, Inc. - - Permission is granted to copy, distribute and/or modify this - document under the terms of the GNU Free Documentation License, - Version 1.3 or any later version published by the Free Software - Foundation; with no Invariant Sections, with the Front-Cover texts - being "A GNU Manual," and with the Back-Cover Texts as in (a) - below. A copy of the license is included in the section entitled - "GNU Free Documentation License." - - (a) The FSF's Back-Cover Text is: "You have the freedom to copy and - modify this GNU manual. Buying copies from the FSF supports it in - developing GNU and promoting software freedom." - -* Menu: - -* Introduction:: What GNU Bison is. -* Conditions:: Conditions for using Bison and its output. -* Copying:: The GNU General Public License says - how you can copy and share Bison. - -Tutorial sections: -* Concepts:: Basic concepts for understanding Bison. -* Examples:: Three simple explained examples of using Bison. - -Reference sections: -* Grammar File:: Writing Bison declarations and rules. -* Interface:: C-language interface to the parser function ‘yyparse’. -* Algorithm:: How the Bison parser works at run-time. -* Error Recovery:: Writing rules for error recovery. -* Context Dependency:: What to do if your language syntax is too - messy for Bison to handle straightforwardly. -* Debugging:: Understanding or debugging Bison parsers. -* Invocation:: How to run Bison (to produce the parser implementation). -* Other Languages:: Creating C++, D and Java parsers. -* History:: How Bison came to be -* Versioning:: Dealing with Bison versioning -* FAQ:: Frequently Asked Questions -* Table of Symbols:: All the keywords of the Bison language are explained. -* Glossary:: Basic concepts are explained. -* GNU Free Documentation License:: Copying and sharing this manual -* Bibliography:: Publications cited in this manual. -* Index of Terms:: Cross-references to the text. - - -- The Detailed Node Listing -- - -The Concepts of Bison - -* Language and Grammar:: Languages and context-free grammars, - as mathematical ideas. -* Grammar in Bison:: How we represent grammars for Bison's sake. -* Semantic Values:: Each token or syntactic grouping can have - a semantic value (the value of an integer, - the name of an identifier, etc.). -* Semantic Actions:: Each rule can have an action containing C code. -* GLR Parsers:: Writing parsers for general context-free languages. -* Locations:: Overview of location tracking. -* Bison Parser:: What are Bison's input and output, - how is the output used? -* Stages:: Stages in writing and running Bison grammars. -* Grammar Layout:: Overall structure of a Bison grammar file. - -Writing GLR Parsers - -* Simple GLR Parsers:: Using GLR parsers on unambiguous grammars. -* Merging GLR Parses:: Using GLR parsers to resolve ambiguities. -* GLR Semantic Actions:: Considerations for semantic values and deferred actions. -* Semantic Predicates:: Controlling a parse with arbitrary computations. - -Examples - -* RPN Calc:: Reverse Polish Notation Calculator; - a first example with no operator precedence. -* Infix Calc:: Infix (algebraic) notation calculator. - Operator precedence is introduced. -* Simple Error Recovery:: Continuing after syntax errors. -* Location Tracking Calc:: Demonstrating the use of @N and @$. -* Multi-function Calc:: Calculator with memory and trig functions. - It uses multiple data-types for semantic values. -* Exercises:: Ideas for improving the multi-function calculator. - -Reverse Polish Notation Calculator - -* Rpcalc Declarations:: Prologue (declarations) for rpcalc. -* Rpcalc Rules:: Grammar Rules for rpcalc, with explanation. -* Rpcalc Lexer:: The lexical analyzer. -* Rpcalc Main:: The controlling function. -* Rpcalc Error:: The error reporting function. -* Rpcalc Generate:: Running Bison on the grammar file. -* Rpcalc Compile:: Run the C compiler on the output code. - -Grammar Rules for ‘rpcalc’ - -* Rpcalc Input:: Explanation of the ‘input’ nonterminal -* Rpcalc Line:: Explanation of the ‘line’ nonterminal -* Rpcalc Exp:: Explanation of the ‘exp’ nonterminal - -Location Tracking Calculator: ‘ltcalc’ - -* Ltcalc Declarations:: Bison and C declarations for ltcalc. -* Ltcalc Rules:: Grammar rules for ltcalc, with explanations. -* Ltcalc Lexer:: The lexical analyzer. - -Multi-Function Calculator: ‘mfcalc’ - -* Mfcalc Declarations:: Bison declarations for multi-function calculator. -* Mfcalc Rules:: Grammar rules for the calculator. -* Mfcalc Symbol Table:: Symbol table management subroutines. -* Mfcalc Lexer:: The lexical analyzer. -* Mfcalc Main:: The controlling function. - -Bison Grammar Files - -* Grammar Outline:: Overall layout of the grammar file. -* Symbols:: Terminal and nonterminal symbols. -* Rules:: How to write grammar rules. -* Semantics:: Semantic values and actions. -* Tracking Locations:: Locations and actions. -* Named References:: Using named references in actions. -* Declarations:: All kinds of Bison declarations are described here. -* Multiple Parsers:: Putting more than one Bison parser in one program. - -Outline of a Bison Grammar - -* Prologue:: Syntax and usage of the prologue. -* Prologue Alternatives:: Syntax and usage of alternatives to the prologue. -* Bison Declarations:: Syntax and usage of the Bison declarations section. -* Grammar Rules:: Syntax and usage of the grammar rules section. -* Epilogue:: Syntax and usage of the epilogue. - -Grammar Rules - -* Rules Syntax:: Syntax of the rules. -* Empty Rules:: Symbols that can match the empty string. -* Recursion:: Writing recursive rules. - - -Defining Language Semantics - -* Value Type:: Specifying one data type for all semantic values. -* Multiple Types:: Specifying several alternative data types. -* Type Generation:: Generating the semantic value type. -* Union Decl:: Declaring the set of all semantic value types. -* Structured Value Type:: Providing a structured semantic value type. -* Actions:: An action is the semantic definition of a grammar rule. -* Action Types:: Specifying data types for actions to operate on. -* Midrule Actions:: Most actions go at the end of a rule. - This says when, why and how to use the exceptional - action in the middle of a rule. - -Actions in Midrule - -* Using Midrule Actions:: Putting an action in the middle of a rule. -* Typed Midrule Actions:: Specifying the semantic type of their values. -* Midrule Action Translation:: How midrule actions are actually processed. -* Midrule Conflicts:: Midrule actions can cause conflicts. - -Tracking Locations - -* Location Type:: Specifying a data type for locations. -* Actions and Locations:: Using locations in actions. -* Printing Locations:: Defining how locations are printed. -* Location Default Action:: Defining a general way to compute locations. - -Bison Declarations - -* Require Decl:: Requiring a Bison version. -* Token Decl:: Declaring terminal symbols. -* Precedence Decl:: Declaring terminals with precedence and associativity. -* Type Decl:: Declaring the choice of type for a nonterminal symbol. -* Symbol Decls:: Summary of the Syntax of Symbol Declarations. -* Initial Action Decl:: Code run before parsing starts. -* Destructor Decl:: Declaring how symbols are freed. -* Printer Decl:: Declaring how symbol values are displayed. -* Expect Decl:: Suppressing warnings about parsing conflicts. -* Start Decl:: Specifying the start symbol. -* Pure Decl:: Requesting a reentrant parser. -* Push Decl:: Requesting a push parser. -* Decl Summary:: Table of all Bison declarations. -* %define Summary:: Defining variables to adjust Bison's behavior. -* %code Summary:: Inserting code into the parser source. - -Parser C-Language Interface - -* Parser Function:: How to call ‘yyparse’ and what it returns. -* Push Parser Interface:: How to create, use, and destroy push parsers. -* Lexical:: You must supply a function ‘yylex’ - which reads tokens. -* Error Reporting:: Passing error messages to the user. -* Action Features:: Special features for use in actions. -* Internationalization:: How to let the parser speak in the user's - native language. - -The Lexical Analyzer Function ‘yylex’ - -* Calling Convention:: How ‘yyparse’ calls ‘yylex’. -* Special Tokens:: Signaling end-of-file and errors to the parser. -* Tokens from Literals:: Finding token kinds from string aliases. -* Token Values:: How ‘yylex’ must return the semantic value - of the token it has read. -* Token Locations:: How ‘yylex’ must return the text location - (line number, etc.) of the token, if the - actions want that. -* Pure Calling:: How the calling convention differs in a pure parser - (*note Pure Decl::). - -Error Reporting - -* Error Reporting Function:: You must supply a ‘yyerror’ function. -* Syntax Error Reporting Function:: You can supply a ‘yyreport_syntax_error’ function. - -Parser Internationalization - -* Enabling I18n:: Preparing your project to support internationalization. -* Token I18n:: Preparing tokens for internationalization in error messages. - -The Bison Parser Algorithm - -* Lookahead:: Parser looks one token ahead when deciding what to do. -* Shift/Reduce:: Conflicts: when either shifting or reduction is valid. -* Precedence:: Operator precedence works by resolving conflicts. -* Contextual Precedence:: When an operator's precedence depends on context. -* Parser States:: The parser is a finite-state-machine with stack. -* Reduce/Reduce:: When two rules are applicable in the same situation. -* Mysterious Conflicts:: Conflicts that look unjustified. -* Tuning LR:: How to tune fundamental aspects of LR-based parsing. -* Generalized LR Parsing:: Parsing arbitrary context-free grammars. -* Memory Management:: What happens when memory is exhausted. How to avoid it. - -Operator Precedence - -* Why Precedence:: An example showing why precedence is needed. -* Using Precedence:: How to specify precedence and associativity. -* Precedence Only:: How to specify precedence only. -* Precedence Examples:: How these features are used in the previous example. -* How Precedence:: How they work. -* Non Operators:: Using precedence for general conflicts. - -Tuning LR - -* LR Table Construction:: Choose a different construction algorithm. -* Default Reductions:: Disable default reductions. -* LAC:: Correct lookahead sets in the parser states. -* Unreachable States:: Keep unreachable parser states for debugging. - -Handling Context Dependencies - -* Semantic Tokens:: Token parsing can depend on the semantic context. -* Lexical Tie-ins:: Token parsing can depend on the syntactic context. -* Tie-in Recovery:: Lexical tie-ins have implications for how - error recovery rules must be written. - -Debugging Your Parser - -* Counterexamples:: Understanding conflicts. -* Understanding:: Understanding the structure of your parser. -* Graphviz:: Getting a visual representation of the parser. -* Xml:: Getting a markup representation of the parser. -* Tracing:: Tracing the execution of your parser. - -Tracing Your Parser - -* Enabling Traces:: Activating run-time trace support -* Mfcalc Traces:: Extending ‘mfcalc’ to support traces - -Invoking Bison - -* Bison Options:: All the options described in detail, - in alphabetical order by short options. -* Option Cross Key:: Alphabetical list of long options. -* Yacc Library:: Yacc-compatible ‘yylex’ and ‘main’. - -Bison Options - -* Operation Modes:: Options controlling the global behavior of ‘bison’ -* Diagnostics:: Options controlling the diagnostics -* Tuning the Parser:: Options changing the generated parsers -* Output Files:: Options controlling the output - -Parsers Written In Other Languages - -* C++ Parsers:: The interface to generate C++ parser classes -* D Parsers:: The interface to generate D parser classes -* Java Parsers:: The interface to generate Java parser classes - -C++ Parsers - -* A Simple C++ Example:: A short introduction to C++ parsers -* C++ Bison Interface:: Asking for C++ parser generation -* C++ Parser Interface:: Instantiating and running the parser -* C++ Semantic Values:: %union vs. C++ -* C++ Location Values:: The position and location classes -* C++ Parser Context:: You can supply a ‘report_syntax_error’ function. -* C++ Scanner Interface:: Exchanges between yylex and parse -* A Complete C++ Example:: Demonstrating their use - -C++ Location Values - -* C++ position:: One point in the source file -* C++ location:: Two points in the source file -* Exposing the Location Classes:: Using the Bison location class in your - project -* User Defined Location Type:: Required interface for locations - -A Complete C++ Example - -* Calc++ --- C++ Calculator:: The specifications -* Calc++ Parsing Driver:: An active parsing context -* Calc++ Parser:: A parser class -* Calc++ Scanner:: A pure C++ Flex scanner -* Calc++ Top Level:: Conducting the band - -D Parsers - -* D Bison Interface:: Asking for D parser generation -* D Semantic Values:: %token and %nterm vs. D -* D Location Values:: The position and location classes -* D Parser Interface:: Instantiating and running the parser -* D Parser Context Interface:: Circumstances of a syntax error -* D Scanner Interface:: Specifying the scanner for the parser -* D Action Features:: Special features for use in actions -* D Push Parser Interface:: Instantiating and running the push parser -* D Complete Symbols:: Using token constructors - -Java Parsers - -* Java Bison Interface:: Asking for Java parser generation -* Java Semantic Values:: %token and %nterm vs. Java -* Java Location Values:: The position and location classes -* Java Parser Interface:: Instantiating and running the parser -* Java Parser Context Interface:: Circumstances of a syntax error -* Java Scanner Interface:: Specifying the scanner for the parser -* Java Action Features:: Special features for use in actions -* Java Push Parser Interface:: Instantiating and running the push parser -* Java Differences:: Differences between C/C++ and Java Grammars -* Java Declarations Summary:: List of Bison declarations used with Java - -A Brief History of the Greater Ungulates - -* Yacc:: The original Yacc -* yacchack:: An obscure early implementation of reentrancy -* Byacc:: Berkeley Yacc -* Bison:: This program -* Other Ungulates:: Similar programs - -Bison Version Compatibility - -* Versioning:: Dealing with Bison versioning - -Frequently Asked Questions - -* Memory Exhausted:: Breaking the Stack Limits -* How Can I Reset the Parser:: ‘yyparse’ Keeps some State -* Strings are Destroyed:: ‘yylval’ Loses Track of Strings -* Implementing Gotos/Loops:: Control Flow in the Calculator -* Multiple start-symbols:: Factoring closely related grammars -* Enabling Relocatability:: Moving Bison/using it through network shares -* Secure? Conform?:: Is Bison POSIX safe? -* I can't build Bison:: Troubleshooting -* Where can I find help?:: Troubleshouting -* Bug Reports:: Troublereporting -* More Languages:: Parsers in C++, Java, and so on -* Beta Testing:: Experimenting development versions -* Mailing Lists:: Meeting other Bison users - -Copying This Manual - -* GNU Free Documentation License:: Copying and sharing this manual - - -File: bison.info, Node: Introduction, Next: Conditions, Prev: Top, Up: Top - -Introduction -************ - -“Bison” is a general-purpose parser generator that converts an annotated -context-free grammar into a deterministic LR or generalized LR (GLR) -parser employing LALR(1), IELR(1) or canonical LR(1) parser tables. -Once you are proficient with Bison, you can use it to develop a wide -range of language parsers, from those used in simple desk calculators to -complex programming languages. - - Bison is upward compatible with Yacc: all properly-written Yacc -grammars ought to work with Bison with no change. Anyone familiar with -Yacc should be able to use Bison with little trouble. You need to be -fluent in C, C++, D or Java programming in order to use Bison or to -understand this manual. - - We begin with tutorial chapters that explain the basic concepts of -using Bison and show three explained examples, each building on the -last. If you don't know Bison or Yacc, start by reading these chapters. -Reference chapters follow, which describe specific aspects of Bison in -detail. - - Bison was written originally by Robert Corbett. Richard Stallman -made it Yacc-compatible. Wilfred Hansen of Carnegie Mellon University -added multi-character string literals and other features. Since then, -Bison has grown more robust and evolved many other new features thanks -to the hard work of a long list of volunteers. For details, see the -‘THANKS’ and ‘ChangeLog’ files included in the Bison distribution. - - This edition corresponds to version 3.8.2 of Bison. - - -File: bison.info, Node: Conditions, Next: Copying, Prev: Introduction, Up: Top - -Conditions for Using Bison -************************** - -The distribution terms for Bison-generated parsers permit using the -parsers in nonfree programs. Before Bison version 2.2, these extra -permissions applied only when Bison was generating LALR(1) parsers in C. -And before Bison version 1.24, Bison-generated parsers could be used -only in programs that were free software. - - The other GNU programming tools, such as the GNU C compiler, have -never had such a requirement. They could always be used for nonfree -software. The reason Bison was different was not due to a special -policy decision; it resulted from applying the usual General Public -License to all of the Bison source code. - - The main output of the Bison utility--the Bison parser implementation -file--contains a verbatim copy of a sizable piece of Bison, which is the -code for the parser's implementation. (The actions from your grammar -are inserted into this implementation at one point, but most of the rest -of the implementation is not changed.) When we applied the GPL terms to -the skeleton code for the parser's implementation, the effect was to -restrict the use of Bison output to free software. - - We didn't change the terms because of sympathy for people who want to -make software proprietary. *Software should be free.* But we concluded -that limiting Bison's use to free software was doing little to encourage -people to make other software free. So we decided to make the practical -conditions for using Bison match the practical conditions for using the -other GNU tools. - - This exception applies when Bison is generating code for a parser. -You can tell whether the exception applies to a Bison output file by -inspecting the file for text beginning with "As a special exception...". -The text spells out the exact terms of the exception. - - -File: bison.info, Node: Copying, Next: Concepts, Prev: Conditions, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 3, 29 June 2007 - - Copyright © 2007 Free Software Foundation, Inc. - - Everyone is permitted to copy and distribute verbatim copies of this - license document, but changing it is not allowed. - -Preamble -======== - -The GNU General Public License is a free, copyleft license for software -and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - -TERMS AND CONDITIONS -==================== - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public - License. - - "Copyright" also means copyright-like laws that apply to other - kinds of works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this - License. Each licensee is addressed as "you". "Licensees" and - "recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the - work in a fashion requiring copyright permission, other than the - making of an exact copy. The resulting work is called a "modified - version" of the earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work - based on the Program. - - To "propagate" a work means to do anything with it that, without - permission, would make you directly or secondarily liable for - infringement under applicable copyright law, except executing it on - a computer or modifying a private copy. Propagation includes - copying, distribution (with or without modification), making - available to the public, and in some countries other activities as - well. - - To "convey" a work means any kind of propagation that enables other - parties to make or receive copies. Mere interaction with a user - through a computer network, with no transfer of a copy, is not - conveying. - - An interactive user interface displays "Appropriate Legal Notices" - to the extent that it includes a convenient and prominently visible - feature that (1) displays an appropriate copyright notice, and (2) - tells the user that there is no warranty for the work (except to - the extent that warranties are provided), that licensees may convey - the work under this License, and how to view a copy of this - License. If the interface presents a list of user commands or - options, such as a menu, a prominent item in the list meets this - criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work - for making modifications to it. "Object code" means any non-source - form of a work. - - A "Standard Interface" means an interface that either is an - official standard defined by a recognized standards body, or, in - the case of interfaces specified for a particular programming - language, one that is widely used among developers working in that - language. - - The "System Libraries" of an executable work include anything, - other than the work as a whole, that (a) is included in the normal - form of packaging a Major Component, but which is not part of that - Major Component, and (b) serves only to enable use of the work with - that Major Component, or to implement a Standard Interface for - which an implementation is available to the public in source code - form. A "Major Component", in this context, means a major - essential component (kernel, window system, and so on) of the - specific operating system (if any) on which the executable work - runs, or a compiler used to produce the work, or an object code - interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all - the source code needed to generate, install, and (for an executable - work) run the object code and to modify the work, including scripts - to control those activities. However, it does not include the - work's System Libraries, or general-purpose tools or generally - available free programs which are used unmodified in performing - those activities but which are not part of the work. For example, - Corresponding Source includes interface definition files associated - with source files for the work, and the source code for shared - libraries and dynamically linked subprograms that the work is - specifically designed to require, such as by intimate data - communication or control flow between those subprograms and other - parts of the work. - - The Corresponding Source need not include anything that users can - regenerate automatically from other parts of the Corresponding - Source. - - The Corresponding Source for a work in source code form is that - same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of - copyright on the Program, and are irrevocable provided the stated - conditions are met. This License explicitly affirms your unlimited - permission to run the unmodified Program. The output from running - a covered work is covered by this License only if the output, given - its content, constitutes a covered work. This License acknowledges - your rights of fair use or other equivalent, as provided by - copyright law. - - You may make, run and propagate covered works that you do not - convey, without conditions so long as your license otherwise - remains in force. You may convey covered works to others for the - sole purpose of having them make modifications exclusively for you, - or provide you with facilities for running those works, provided - that you comply with the terms of this License in conveying all - material for which you do not control copyright. Those thus making - or running the covered works for you must do so exclusively on your - behalf, under your direction and control, on terms that prohibit - them from making any copies of your copyrighted material outside - their relationship with you. - - Conveying under any other circumstances is permitted solely under - the conditions stated below. Sublicensing is not allowed; section - 10 makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological - measure under any applicable law fulfilling obligations under - article 11 of the WIPO copyright treaty adopted on 20 December - 1996, or similar laws prohibiting or restricting circumvention of - such measures. - - When you convey a covered work, you waive any legal power to forbid - circumvention of technological measures to the extent such - circumvention is effected by exercising rights under this License - with respect to the covered work, and you disclaim any intention to - limit operation or modification of the work as a means of - enforcing, against the work's users, your or third parties' legal - rights to forbid circumvention of technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you - receive it, in any medium, provided that you conspicuously and - appropriately publish on each copy an appropriate copyright notice; - keep intact all notices stating that this License and any - non-permissive terms added in accord with section 7 apply to the - code; keep intact all notices of the absence of any warranty; and - give all recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, - and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to - produce it from the Program, in the form of source code under the - terms of section 4, provided that you also meet all of these - conditions: - - a. The work must carry prominent notices stating that you - modified it, and giving a relevant date. - - b. The work must carry prominent notices stating that it is - released under this License and any conditions added under - section 7. This requirement modifies the requirement in - section 4 to "keep intact all notices". - - c. You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable - section 7 additional terms, to the whole of the work, and all - its parts, regardless of how they are packaged. This License - gives no permission to license the work in any other way, but - it does not invalidate such permission if you have separately - received it. - - d. If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has - interactive interfaces that do not display Appropriate Legal - Notices, your work need not make them do so. - - A compilation of a covered work with other separate and independent - works, which are not by their nature extensions of the covered - work, and which are not combined with it such as to form a larger - program, in or on a volume of a storage or distribution medium, is - called an "aggregate" if the compilation and its resulting - copyright are not used to limit the access or legal rights of the - compilation's users beyond what the individual works permit. - Inclusion of a covered work in an aggregate does not cause this - License to apply to the other parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms - of sections 4 and 5, provided that you also convey the - machine-readable Corresponding Source under the terms of this - License, in one of these ways: - - a. Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b. Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that - product model, to give anyone who possesses the object code - either (1) a copy of the Corresponding Source for all the - software in the product that is covered by this License, on a - durable physical medium customarily used for software - interchange, for a price no more than your reasonable cost of - physically performing this conveying of source, or (2) access - to copy the Corresponding Source from a network server at no - charge. - - c. Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, - and only if you received the object code with such an offer, - in accord with subsection 6b. - - d. Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to - the Corresponding Source in the same way through the same - place at no further charge. You need not require recipients - to copy the Corresponding Source along with the object code. - If the place to copy the object code is a network server, the - Corresponding Source may be on a different server (operated by - you or a third party) that supports equivalent copying - facilities, provided you maintain clear directions next to the - object code saying where to find the Corresponding Source. - Regardless of what server hosts the Corresponding Source, you - remain obligated to ensure that it is available for as long as - needed to satisfy these requirements. - - e. Convey the object code using peer-to-peer transmission, - provided you inform other peers where the object code and - Corresponding Source of the work are being offered to the - general public at no charge under subsection 6d. - - A separable portion of the object code, whose source code is - excluded from the Corresponding Source as a System Library, need - not be included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means - any tangible personal property which is normally used for personal, - family, or household purposes, or (2) anything designed or sold for - incorporation into a dwelling. In determining whether a product is - a consumer product, doubtful cases shall be resolved in favor of - coverage. For a particular product received by a particular user, - "normally used" refers to a typical or common use of that class of - product, regardless of the status of the particular user or of the - way in which the particular user actually uses, or expects or is - expected to use, the product. A product is a consumer product - regardless of whether the product has substantial commercial, - industrial or non-consumer uses, unless such uses represent the - only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, - procedures, authorization keys, or other information required to - install and execute modified versions of a covered work in that - User Product from a modified version of its Corresponding Source. - The information must suffice to ensure that the continued - functioning of the modified object code is in no case prevented or - interfered with solely because modification has been made. - - If you convey an object code work under this section in, or with, - or specifically for use in, a User Product, and the conveying - occurs as part of a transaction in which the right of possession - and use of the User Product is transferred to the recipient in - perpetuity or for a fixed term (regardless of how the transaction - is characterized), the Corresponding Source conveyed under this - section must be accompanied by the Installation Information. But - this requirement does not apply if neither you nor any third party - retains the ability to install modified object code on the User - Product (for example, the work has been installed in ROM). - - The requirement to provide Installation Information does not - include a requirement to continue to provide support service, - warranty, or updates for a work that has been modified or installed - by the recipient, or for the User Product in which it has been - modified or installed. Access to a network may be denied when the - modification itself materially and adversely affects the operation - of the network or violates the rules and protocols for - communication across the network. - - Corresponding Source conveyed, and Installation Information - provided, in accord with this section must be in a format that is - publicly documented (and with an implementation available to the - public in source code form), and must require no special password - or key for unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of - this License by making exceptions from one or more of its - conditions. Additional permissions that are applicable to the - entire Program shall be treated as though they were included in - this License, to the extent that they are valid under applicable - law. If additional permissions apply only to part of the Program, - that part may be used separately under those permissions, but the - entire Program remains governed by this License without regard to - the additional permissions. - - When you convey a copy of a covered work, you may at your option - remove any additional permissions from that copy, or from any part - of it. (Additional permissions may be written to require their own - removal in certain cases when you modify the work.) You may place - additional permissions on material, added by you to a covered work, - for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material - you add to a covered work, you may (if authorized by the copyright - holders of that material) supplement the terms of this License with - terms: - - a. Disclaiming warranty or limiting liability differently from - the terms of sections 15 and 16 of this License; or - - b. Requiring preservation of specified reasonable legal notices - or author attributions in that material or in the Appropriate - Legal Notices displayed by works containing it; or - - c. Prohibiting misrepresentation of the origin of that material, - or requiring that modified versions of such material be marked - in reasonable ways as different from the original version; or - - d. Limiting the use for publicity purposes of names of licensors - or authors of the material; or - - e. Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f. Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified - versions of it) with contractual assumptions of liability to - the recipient, for any liability that these contractual - assumptions directly impose on those licensors and authors. - - All other non-permissive additional terms are considered "further - restrictions" within the meaning of section 10. If the Program as - you received it, or any part of it, contains a notice stating that - it is governed by this License along with a term that is a further - restriction, you may remove that term. If a license document - contains a further restriction but permits relicensing or conveying - under this License, you may add to a covered work material governed - by the terms of that license document, provided that the further - restriction does not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you - must place, in the relevant source files, a statement of the - additional terms that apply to those files, or a notice indicating - where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in - the form of a separately written license, or stated as exceptions; - the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly - provided under this License. Any attempt otherwise to propagate or - modify it is void, and will automatically terminate your rights - under this License (including any patent licenses granted under the - third paragraph of section 11). - - However, if you cease all violation of this License, then your - license from a particular copyright holder is reinstated (a) - provisionally, unless and until the copyright holder explicitly and - finally terminates your license, and (b) permanently, if the - copyright holder fails to notify you of the violation by some - reasonable means prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is - reinstated permanently if the copyright holder notifies you of the - violation by some reasonable means, this is the first time you have - received notice of violation of this License (for any work) from - that copyright holder, and you cure the violation prior to 30 days - after your receipt of the notice. - - Termination of your rights under this section does not terminate - the licenses of parties who have received copies or rights from you - under this License. If your rights have been terminated and not - permanently reinstated, you do not qualify to receive new licenses - for the same material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or - run a copy of the Program. Ancillary propagation of a covered work - occurring solely as a consequence of using peer-to-peer - transmission to receive a copy likewise does not require - acceptance. However, nothing other than this License grants you - permission to propagate or modify any covered work. These actions - infringe copyright if you do not accept this License. Therefore, - by modifying or propagating a covered work, you indicate your - acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically - receives a license from the original licensors, to run, modify and - propagate that work, subject to this License. You are not - responsible for enforcing compliance by third parties with this - License. - - An "entity transaction" is a transaction transferring control of an - organization, or substantially all assets of one, or subdividing an - organization, or merging organizations. If propagation of a - covered work results from an entity transaction, each party to that - transaction who receives a copy of the work also receives whatever - licenses to the work the party's predecessor in interest had or - could give under the previous paragraph, plus a right to possession - of the Corresponding Source of the work from the predecessor in - interest, if the predecessor has it or can get it with reasonable - efforts. - - You may not impose any further restrictions on the exercise of the - rights granted or affirmed under this License. For example, you - may not impose a license fee, royalty, or other charge for exercise - of rights granted under this License, and you may not initiate - litigation (including a cross-claim or counterclaim in a lawsuit) - alleging that any patent claim is infringed by making, using, - selling, offering for sale, or importing the Program or any portion - of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this - License of the Program or a work on which the Program is based. - The work thus licensed is called the contributor's "contributor - version". - - A contributor's "essential patent claims" are all patent claims - owned or controlled by the contributor, whether already acquired or - hereafter acquired, that would be infringed by some manner, - permitted by this License, of making, using, or selling its - contributor version, but do not include claims that would be - infringed only as a consequence of further modification of the - contributor version. For purposes of this definition, "control" - includes the right to grant patent sublicenses in a manner - consistent with the requirements of this License. - - Each contributor grants you a non-exclusive, worldwide, - royalty-free patent license under the contributor's essential - patent claims, to make, use, sell, offer for sale, import and - otherwise run, modify and propagate the contents of its contributor - version. - - In the following three paragraphs, a "patent license" is any - express agreement or commitment, however denominated, not to - enforce a patent (such as an express permission to practice a - patent or covenant not to sue for patent infringement). To "grant" - such a patent license to a party means to make such an agreement or - commitment not to enforce a patent against the party. - - If you convey a covered work, knowingly relying on a patent - license, and the Corresponding Source of the work is not available - for anyone to copy, free of charge and under the terms of this - License, through a publicly available network server or other - readily accessible means, then you must either (1) cause the - Corresponding Source to be so available, or (2) arrange to deprive - yourself of the benefit of the patent license for this particular - work, or (3) arrange, in a manner consistent with the requirements - of this License, to extend the patent license to downstream - recipients. "Knowingly relying" means you have actual knowledge - that, but for the patent license, your conveying the covered work - in a country, or your recipient's use of the covered work in a - country, would infringe one or more identifiable patents in that - country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or - arrangement, you convey, or propagate by procuring conveyance of, a - covered work, and grant a patent license to some of the parties - receiving the covered work authorizing them to use, propagate, - modify or convey a specific copy of the covered work, then the - patent license you grant is automatically extended to all - recipients of the covered work and works based on it. - - A patent license is "discriminatory" if it does not include within - the scope of its coverage, prohibits the exercise of, or is - conditioned on the non-exercise of one or more of the rights that - are specifically granted under this License. You may not convey a - covered work if you are a party to an arrangement with a third - party that is in the business of distributing software, under which - you make payment to the third party based on the extent of your - activity of conveying the work, and under which the third party - grants, to any of the parties who would receive the covered work - from you, a discriminatory patent license (a) in connection with - copies of the covered work conveyed by you (or copies made from - those copies), or (b) primarily for and in connection with specific - products or compilations that contain the covered work, unless you - entered into that arrangement, or that patent license was granted, - prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting - any implied license or other defenses to infringement that may - otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement - or otherwise) that contradict the conditions of this License, they - do not excuse you from the conditions of this License. If you - cannot convey a covered work so as to satisfy simultaneously your - obligations under this License and any other pertinent obligations, - then as a consequence you may not convey it at all. For example, - if you agree to terms that obligate you to collect a royalty for - further conveying from those to whom you convey the Program, the - only way you could satisfy both those terms and this License would - be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have - permission to link or combine any covered work with a work licensed - under version 3 of the GNU Affero General Public License into a - single combined work, and to convey the resulting work. The terms - of this License will continue to apply to the part which is the - covered work, but the special requirements of the GNU Affero - General Public License, section 13, concerning interaction through - a network will apply to the combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new - versions of the GNU General Public License from time to time. Such - new versions will be similar in spirit to the present version, but - may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies that a certain numbered version of the GNU - General Public License "or any later version" applies to it, you - have the option of following the terms and conditions either of - that numbered version or of any later version published by the Free - Software Foundation. If the Program does not specify a version - number of the GNU General Public License, you may choose any - version ever published by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future - versions of the GNU General Public License can be used, that - proxy's public statement of acceptance of a version permanently - authorizes you to choose that version for the Program. - - Later license versions may give you additional or different - permissions. However, no additional obligations are imposed on any - author or copyright holder as a result of your choosing to follow a - later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY - APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE - COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" - WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE - RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. - SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL - NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES - AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR - DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR - CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE - THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA - BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD - PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER - PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF - THE POSSIBILITY OF SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided - above cannot be given local legal effect according to their terms, - reviewing courts shall apply local law that most closely - approximates an absolute waiver of all civil liability in - connection with the Program, unless a warranty or assumption of - liability accompanies a copy of the Program in return for a fee. - -END OF TERMS AND CONDITIONS -=========================== - -How to Apply These Terms to Your New Programs -============================================= - -If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) YEAR NAME OF AUTHOR - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or (at - your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - - Also add information on how to contact you by electronic and paper -mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - PROGRAM Copyright (C) YEAR NAME OF AUTHOR - This program comes with ABSOLUTELY NO WARRANTY; for details type ‘show w’. - This is free software, and you are welcome to redistribute it - under certain conditions; type ‘show c’ for details. - - The hypothetical commands ‘show w’ and ‘show c’ should show the -appropriate parts of the General Public License. Of course, your -program's commands might be different; for a GUI interface, you would -use an "about box". - - You should also get your employer (if you work as a programmer) or -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. For more information on this, and how to apply and follow -the GNU GPL, see . - - The GNU General Public License does not permit incorporating your -program into proprietary programs. If your program is a subroutine -library, you may consider it more useful to permit linking proprietary -applications with the library. If this is what you want to do, use the -GNU Lesser General Public License instead of this License. But first, -please read . - - -File: bison.info, Node: Concepts, Next: Examples, Prev: Copying, Up: Top - -1 The Concepts of Bison -*********************** - -This chapter introduces many of the basic concepts without which the -details of Bison will not make sense. If you do not already know how to -use Bison or Yacc, we suggest you start by reading this chapter -carefully. - -* Menu: - -* Language and Grammar:: Languages and context-free grammars, - as mathematical ideas. -* Grammar in Bison:: How we represent grammars for Bison's sake. -* Semantic Values:: Each token or syntactic grouping can have - a semantic value (the value of an integer, - the name of an identifier, etc.). -* Semantic Actions:: Each rule can have an action containing C code. -* GLR Parsers:: Writing parsers for general context-free languages. -* Locations:: Overview of location tracking. -* Bison Parser:: What are Bison's input and output, - how is the output used? -* Stages:: Stages in writing and running Bison grammars. -* Grammar Layout:: Overall structure of a Bison grammar file. - - -File: bison.info, Node: Language and Grammar, Next: Grammar in Bison, Up: Concepts - -1.1 Languages and Context-Free Grammars -======================================= - -In order for Bison to parse a language, it must be described by a -“context-free grammar”. This means that you specify one or more -“syntactic groupings” and give rules for constructing them from their -parts. For example, in the C language, one kind of grouping is called -an 'expression'. One rule for making an expression might be, "An -expression can be made of a minus sign and another expression". Another -would be, "An expression can be an integer". As you can see, rules are -often recursive, but there must be at least one rule which leads out of -the recursion. - - The most common formal system for presenting such rules for humans to -read is “Backus-Naur Form” or "BNF", which was developed in order to -specify the language Algol 60. Any grammar expressed in BNF is a -context-free grammar. The input to Bison is essentially -machine-readable BNF. - - There are various important subclasses of context-free grammars. -Although it can handle almost all context-free grammars, Bison is -optimized for what are called LR(1) grammars. In brief, in these -grammars, it must be possible to tell how to parse any portion of an -input string with just a single token of lookahead. For historical -reasons, Bison by default is limited by the additional restrictions of -LALR(1), which is hard to explain simply. *Note Mysterious Conflicts::, -for more information on this. You can escape these additional -restrictions by requesting IELR(1) or canonical LR(1) parser tables. -*Note LR Table Construction::, to learn how. - - Parsers for LR(1) grammars are “deterministic”, meaning roughly that -the next grammar rule to apply at any point in the input is uniquely -determined by the preceding input and a fixed, finite portion (called a -“lookahead”) of the remaining input. A context-free grammar can be -“ambiguous”, meaning that there are multiple ways to apply the grammar -rules to get the same inputs. Even unambiguous grammars can be -“nondeterministic”, meaning that no fixed lookahead always suffices to -determine the next grammar rule to apply. With the proper declarations, -Bison is also able to parse these more general context-free grammars, -using a technique known as GLR parsing (for Generalized LR). Bison's GLR -parsers are able to handle any context-free grammar for which the number -of possible parses of any given string is finite. - - In the formal grammatical rules for a language, each kind of -syntactic unit or grouping is named by a “symbol”. Those which are -built by grouping smaller constructs according to grammatical rules are -called “nonterminal symbols”; those which can't be subdivided are called -“terminal symbols” or “token kinds”. We call a piece of input -corresponding to a single terminal symbol a “token”, and a piece -corresponding to a single nonterminal symbol a “grouping”. - - We can use the C language as an example of what symbols, terminal and -nonterminal, mean. The tokens of C are identifiers, constants (numeric -and string), and the various keywords, arithmetic operators and -punctuation marks. So the terminal symbols of a grammar for C include -'identifier', 'number', 'string', plus one symbol for each keyword, -operator or punctuation mark: 'if', 'return', 'const', 'static', 'int', -'char', 'plus-sign', 'open-brace', 'close-brace', 'comma' and many more. -(These tokens can be subdivided into characters, but that is a matter of -lexicography, not grammar.) - - Here is a simple C function subdivided into tokens: - - int /* keyword 'int' */ - square (int x) /* identifier, open-paren, keyword 'int', - identifier, close-paren */ - { /* open-brace */ - return x * x; /* keyword 'return', identifier, asterisk, - identifier, semicolon */ - } /* close-brace */ - - The syntactic groupings of C include the expression, the statement, -the declaration, and the function definition. These are represented in -the grammar of C by nonterminal symbols 'expression', 'statement', -'declaration' and 'function definition'. The full grammar uses dozens -of additional language constructs, each with its own nonterminal symbol, -in order to express the meanings of these four. The example above is a -function definition; it contains one declaration, and one statement. In -the statement, each ‘x’ is an expression and so is ‘x * x’. - - Each nonterminal symbol must have grammatical rules showing how it is -made out of simpler constructs. For example, one kind of C statement is -the ‘return’ statement; this would be described with a grammar rule -which reads informally as follows: - - A 'statement' can be made of a 'return' keyword, an 'expression' - and a 'semicolon'. - -There would be many other rules for 'statement', one for each kind of -statement in C. - - One nonterminal symbol must be distinguished as the special one which -defines a complete utterance in the language. It is called the “start -symbol”. In a compiler, this means a complete input program. In the C -language, the nonterminal symbol 'sequence of definitions and -declarations' plays this role. - - For example, ‘1 + 2’ is a valid C expression--a valid part of a C -program--but it is not valid as an _entire_ C program. In the -context-free grammar of C, this follows from the fact that 'expression' -is not the start symbol. - - The Bison parser reads a sequence of tokens as its input, and groups -the tokens using the grammar rules. If the input is valid, the end -result is that the entire token sequence reduces to a single grouping -whose symbol is the grammar's start symbol. If we use a grammar for C, -the entire input must be a 'sequence of definitions and declarations'. -If not, the parser reports a syntax error. - - -File: bison.info, Node: Grammar in Bison, Next: Semantic Values, Prev: Language and Grammar, Up: Concepts - -1.2 From Formal Rules to Bison Input -==================================== - -A formal grammar is a mathematical construct. To define the language -for Bison, you must write a file expressing the grammar in Bison syntax: -a “Bison grammar” file. *Note Grammar File::. - - A nonterminal symbol in the formal grammar is represented in Bison -input as an identifier, like an identifier in C. By convention, it -should be in lower case, such as ‘expr’, ‘stmt’ or ‘declaration’. - - The Bison representation for a terminal symbol is also called a -“token kind”. Token kinds as well can be represented as C-like -identifiers. By convention, these identifiers should be upper case to -distinguish them from nonterminals: for example, ‘INTEGER’, -‘IDENTIFIER’, ‘IF’ or ‘RETURN’. A terminal symbol that stands for a -particular keyword in the language should be named after that keyword -converted to upper case. The terminal symbol ‘error’ is reserved for -error recovery. *Note Symbols::. - - A terminal symbol can also be represented as a character literal, -just like a C character constant. You should do this whenever a token -is just a single character (parenthesis, plus-sign, etc.): use that same -character in a literal as the terminal symbol for that token. - - A third way to represent a terminal symbol is with a C string -constant containing several characters. *Note Symbols::, for more -information. - - The grammar rules also have an expression in Bison syntax. For -example, here is the Bison rule for a C ‘return’ statement. The -semicolon in quotes is a literal character token, representing part of -the C syntax for the statement; the naked semicolon, and the colon, are -Bison punctuation used in every rule. - - stmt: RETURN expr ';' ; - -*Note Rules::. - - -File: bison.info, Node: Semantic Values, Next: Semantic Actions, Prev: Grammar in Bison, Up: Concepts - -1.3 Semantic Values -=================== - -A formal grammar selects tokens only by their classifications: for -example, if a rule mentions the terminal symbol 'integer constant', it -means that _any_ integer constant is grammatically valid in that -position. The precise value of the constant is irrelevant to how to -parse the input: if ‘x+4’ is grammatical then ‘x+1’ or ‘x+3989’ is -equally grammatical. - - But the precise value is very important for what the input means once -it is parsed. A compiler is useless if it fails to distinguish between -4, 1 and 3989 as constants in the program! Therefore, each token in a -Bison grammar has both a token kind and a “semantic value”. *Note -Semantics::, for details. - - The token kind is a terminal symbol defined in the grammar, such as -‘INTEGER’, ‘IDENTIFIER’ or ‘','’. It tells everything you need to know -to decide where the token may validly appear and how to group it with -other tokens. The grammar rules know nothing about tokens except their -kinds. - - The semantic value has all the rest of the information about the -meaning of the token, such as the value of an integer, or the name of an -identifier. (A token such as ‘','’ which is just punctuation doesn't -need to have any semantic value.) - - For example, an input token might be classified as token kind -‘INTEGER’ and have the semantic value 4. Another input token might have -the same token kind ‘INTEGER’ but value 3989. When a grammar rule says -that ‘INTEGER’ is allowed, either of these tokens is acceptable because -each is an ‘INTEGER’. When the parser accepts the token, it keeps track -of the token's semantic value. - - Each grouping can also have a semantic value as well as its -nonterminal symbol. For example, in a calculator, an expression -typically has a semantic value that is a number. In a compiler for a -programming language, an expression typically has a semantic value that -is a tree structure describing the meaning of the expression. - - -File: bison.info, Node: Semantic Actions, Next: GLR Parsers, Prev: Semantic Values, Up: Concepts - -1.4 Semantic Actions -==================== - -In order to be useful, a program must do more than parse input; it must -also produce some output based on the input. In a Bison grammar, a -grammar rule can have an “action” made up of C statements. Each time -the parser recognizes a match for that rule, the action is executed. -*Note Actions::. - - Most of the time, the purpose of an action is to compute the semantic -value of the whole construct from the semantic values of its parts. For -example, suppose we have a rule which says an expression can be the sum -of two expressions. When the parser recognizes such a sum, each of the -subexpressions has a semantic value which describes how it was built up. -The action for this rule should create a similar sort of value for the -newly recognized larger expression. - - For example, here is a rule that says an expression can be the sum of -two subexpressions: - - expr: expr '+' expr { $$ = $1 + $3; } ; - -The action says how to produce the semantic value of the sum expression -from the values of the two subexpressions. - - -File: bison.info, Node: GLR Parsers, Next: Locations, Prev: Semantic Actions, Up: Concepts - -1.5 Writing GLR Parsers -======================= - -In some grammars, Bison's deterministic LR(1) parsing algorithm cannot -decide whether to apply a certain grammar rule at a given point. That -is, it may not be able to decide (on the basis of the input read so far) -which of two possible reductions (applications of a grammar rule) -applies, or whether to apply a reduction or read more of the input and -apply a reduction later in the input. These are known respectively as -“reduce/reduce” conflicts (*note Reduce/Reduce::), and “shift/reduce” -conflicts (*note Shift/Reduce::). - - To use a grammar that is not easily modified to be LR(1), a more -general parsing algorithm is sometimes necessary. If you include -‘%glr-parser’ among the Bison declarations in your file (*note Grammar -Outline::), the result is a Generalized LR (GLR) parser. These parsers -handle Bison grammars that contain no unresolved conflicts (i.e., after -applying precedence declarations) identically to deterministic parsers. -However, when faced with unresolved shift/reduce and reduce/reduce -conflicts, GLR parsers use the simple expedient of doing both, -effectively cloning the parser to follow both possibilities. Each of -the resulting parsers can again split, so that at any given time, there -can be any number of possible parses being explored. The parsers -proceed in lockstep; that is, all of them consume (shift) a given input -symbol before any of them proceed to the next. Each of the cloned -parsers eventually meets one of two possible fates: either it runs into -a parsing error, in which case it simply vanishes, or it merges with -another parser, because the two of them have reduced the input to an -identical set of symbols. - - During the time that there are multiple parsers, semantic actions are -recorded, but not performed. When a parser disappears, its recorded -semantic actions disappear as well, and are never performed. When a -reduction makes two parsers identical, causing them to merge, Bison -records both sets of semantic actions. Whenever the last two parsers -merge, reverting to the single-parser case, Bison resolves all the -outstanding actions either by precedences given to the grammar rules -involved, or by performing both actions, and then calling a designated -user-defined function on the resulting values to produce an arbitrary -merged result. - -* Menu: - -* Simple GLR Parsers:: Using GLR parsers on unambiguous grammars. -* Merging GLR Parses:: Using GLR parsers to resolve ambiguities. -* GLR Semantic Actions:: Considerations for semantic values and deferred actions. -* Semantic Predicates:: Controlling a parse with arbitrary computations. - - -File: bison.info, Node: Simple GLR Parsers, Next: Merging GLR Parses, Up: GLR Parsers - -1.5.1 Using GLR on Unambiguous Grammars ---------------------------------------- - -In the simplest cases, you can use the GLR algorithm to parse grammars -that are unambiguous but fail to be LR(1). Such grammars typically -require more than one symbol of lookahead. - - Consider a problem that arises in the declaration of enumerated and -subrange types in the programming language Pascal. Here are some -examples: - - type subrange = lo .. hi; - type enum = (a, b, c); - -The original language standard allows only numeric literals and constant -identifiers for the subrange bounds (‘lo’ and ‘hi’), but Extended Pascal -(ISO/IEC 10206) and many other Pascal implementations allow arbitrary -expressions there. This gives rise to the following situation, -containing a superfluous pair of parentheses: - - type subrange = (a) .. b; - -Compare this to the following declaration of an enumerated type with -only one value: - - type enum = (a); - -(These declarations are contrived, but they are syntactically valid, and -more-complicated cases can come up in practical programs.) - - These two declarations look identical until the ‘..’ token. With -normal LR(1) one-token lookahead it is not possible to decide between -the two forms when the identifier ‘a’ is parsed. It is, however, -desirable for a parser to decide this, since in the latter case ‘a’ must -become a new identifier to represent the enumeration value, while in the -former case ‘a’ must be evaluated with its current meaning, which may be -a constant or even a function call. - - You could parse ‘(a)’ as an "unspecified identifier in parentheses", -to be resolved later, but this typically requires substantial -contortions in both semantic actions and large parts of the grammar, -where the parentheses are nested in the recursive rules for expressions. - - You might think of using the lexer to distinguish between the two -forms by returning different tokens for currently defined and undefined -identifiers. But if these declarations occur in a local scope, and ‘a’ -is defined in an outer scope, then both forms are possible--either -locally redefining ‘a’, or using the value of ‘a’ from the outer scope. -So this approach cannot work. - - A simple solution to this problem is to declare the parser to use the -GLR algorithm. When the GLR parser reaches the critical state, it -merely splits into two branches and pursues both syntax rules -simultaneously. Sooner or later, one of them runs into a parsing error. -If there is a ‘..’ token before the next ‘;’, the rule for enumerated -types fails since it cannot accept ‘..’ anywhere; otherwise, the -subrange type rule fails since it requires a ‘..’ token. So one of the -branches fails silently, and the other one continues normally, -performing all the intermediate actions that were postponed during the -split. - - If the input is syntactically incorrect, both branches fail and the -parser reports a syntax error as usual. - - The effect of all this is that the parser seems to "guess" the -correct branch to take, or in other words, it seems to use more -lookahead than the underlying LR(1) algorithm actually allows for. In -this example, LR(2) would suffice, but also some cases that are not -LR(k) for any k can be handled this way. - - In general, a GLR parser can take quadratic or cubic worst-case time, -and the current Bison parser even takes exponential time and space for -some grammars. In practice, this rarely happens, and for many grammars -it is possible to prove that it cannot happen. The present example -contains only one conflict between two rules, and the type-declaration -context containing the conflict cannot be nested. So the number of -branches that can exist at any time is limited by the constant 2, and -the parsing time is still linear. - - Here is a Bison grammar corresponding to the example above. It -parses a vastly simplified form of Pascal type declarations. - - %token TYPE DOTDOT ID - - %left '+' '-' - %left '*' '/' - - %% - type_decl: TYPE ID '=' type ';' ; - - type: - '(' id_list ')' - | expr DOTDOT expr - ; - - id_list: - ID - | id_list ',' ID - ; - - expr: - '(' expr ')' - | expr '+' expr - | expr '-' expr - | expr '*' expr - | expr '/' expr - | ID - ; - - When used as a normal LR(1) grammar, Bison correctly complains about -one reduce/reduce conflict. In the conflicting situation the parser -chooses one of the alternatives, arbitrarily the one declared first. -Therefore the following correct input is not recognized: - - type t = (a) .. b; - - The parser can be turned into a GLR parser, while also telling Bison -to be silent about the one known reduce/reduce conflict, by adding these -two declarations to the Bison grammar file (before the first ‘%%’): - - %glr-parser - %expect-rr 1 - -No change in the grammar itself is required. Now the parser recognizes -all valid declarations, according to the limited syntax above, -transparently. In fact, the user does not even notice when the parser -splits. - - So here we have a case where we can use the benefits of GLR, almost -without disadvantages. Even in simple cases like this, however, there -are at least two potential problems to beware. First, always analyze -the conflicts reported by Bison to make sure that GLR splitting is only -done where it is intended. A GLR parser splitting inadvertently may -cause problems less obvious than an LR parser statically choosing the -wrong alternative in a conflict. Second, consider interactions with the -lexer (*note Semantic Tokens::) with great care. Since a split parser -consumes tokens without performing any actions during the split, the -lexer cannot obtain information via parser actions. Some cases of lexer -interactions can be eliminated by using GLR to shift the complications -from the lexer to the parser. You must check the remaining cases for -correctness. - - In our example, it would be safe for the lexer to return tokens based -on their current meanings in some symbol table, because no new symbols -are defined in the middle of a type declaration. Though it is possible -for a parser to define the enumeration constants as they are parsed, -before the type declaration is completed, it actually makes no -difference since they cannot be used within the same enumerated type -declaration. - - -File: bison.info, Node: Merging GLR Parses, Next: GLR Semantic Actions, Prev: Simple GLR Parsers, Up: GLR Parsers - -1.5.2 Using GLR to Resolve Ambiguities --------------------------------------- - -Let's consider an example, vastly simplified from a C++ grammar.(1) - - %{ - #include - int yylex (void); - void yyerror (char const *); - %} - - %define api.value.type {char const *} - - %token TYPENAME ID - - %right '=' - %left '+' - - %glr-parser - - %% - - prog: - %empty - | prog stmt { printf ("\n"); } - ; - - stmt: - expr ';' %dprec 1 - | decl %dprec 2 - ; - - expr: - ID { printf ("%s ", $$); } - | TYPENAME '(' expr ')' - { printf ("%s ", $1); } - | expr '+' expr { printf ("+ "); } - | expr '=' expr { printf ("= "); } - ; - - decl: - TYPENAME declarator ';' - { printf ("%s ", $1); } - | TYPENAME declarator '=' expr ';' - { printf ("%s ", $1); } - ; - - declarator: - ID { printf ("\"%s\" ", $1); } - | '(' declarator ')' - ; - -This models a problematic part of the C++ grammar--the ambiguity between -certain declarations and statements. For example, - - T (x) = y+z; - -parses as either an ‘expr’ or a ‘stmt’ (assuming that ‘T’ is recognized -as a ‘TYPENAME’ and ‘x’ as an ‘ID’). Bison detects this as a -reduce/reduce conflict between the rules ‘expr : ID’ and ‘declarator : -ID’, which it cannot resolve at the time it encounters ‘x’ in the -example above. Since this is a GLR parser, it therefore splits the -problem into two parses, one for each choice of resolving the -reduce/reduce conflict. Unlike the example from the previous section -(*note Simple GLR Parsers::), however, neither of these parses "dies," -because the grammar as it stands is ambiguous. One of the parsers -eventually reduces ‘stmt : expr ';'’ and the other reduces ‘stmt : -decl’, after which both parsers are in an identical state: they've seen -‘prog stmt’ and have the same unprocessed input remaining. We say that -these parses have “merged.” - - At this point, the GLR parser requires a specification in the grammar -of how to choose between the competing parses. In the example above, -the two ‘%dprec’ declarations specify that Bison is to give precedence -to the parse that interprets the example as a ‘decl’, which implies that -‘x’ is a declarator. The parser therefore prints - - "x" y z + T - - The ‘%dprec’ declarations only come into play when more than one -parse survives. Consider a different input string for this parser: - - T (x) + y; - -This is another example of using GLR to parse an unambiguous construct, -as shown in the previous section (*note Simple GLR Parsers::). Here, -there is no ambiguity (this cannot be parsed as a declaration). -However, at the time the Bison parser encounters ‘x’, it does not have -enough information to resolve the reduce/reduce conflict (again, between -‘x’ as an ‘expr’ or a ‘declarator’). In this case, no precedence -declaration is used. Again, the parser splits into two, one assuming -that ‘x’ is an ‘expr’, and the other assuming ‘x’ is a ‘declarator’. -The second of these parsers then vanishes when it sees ‘+’, and the -parser prints - - x T y + - - Suppose that instead of resolving the ambiguity, you wanted to see -all the possibilities. For this purpose, you must merge the semantic -actions of the two possible parsers, rather than choosing one over the -other. To do so, you could change the declaration of ‘stmt’ as follows: - - stmt: - expr ';' %merge - | decl %merge - ; - -and define the ‘stmt_merge’ function as: - - static YYSTYPE - stmt_merge (YYSTYPE x0, YYSTYPE x1) - { - printf (" "); - return ""; - } - -with an accompanying forward declaration in the C declarations at the -beginning of the file: - - %{ - static YYSTYPE stmt_merge (YYSTYPE x0, YYSTYPE x1); - %} - -With these declarations, the resulting parser parses the first example -as both an ‘expr’ and a ‘decl’, and prints - - "x" y z + T x T y z + = - - Bison requires that all of the productions that participate in any -particular merge have identical ‘%merge’ clauses. Otherwise, the -ambiguity would be unresolvable, and the parser will report an error -during any parse that results in the offending merge. - - - The signature of the merger depends on the type of the symbol. In -the previous example, the merged-to symbol (‘stmt’) does not have a -specific type, and the merger is - - YYSTYPE stmt_merge (YYSTYPE x0, YYSTYPE x1); - -However, if ‘stmt’ had a declared type, e.g., - - %type stmt; - -or - - %union { - Node *node; - ... - }; - %type stmt; - -then the prototype of the merger must be: - - Node *stmt_merge (YYSTYPE x0, YYSTYPE x1); - -(This signature might be a mistake originally, and maybe it should have -been ‘Node *stmt_merge (Node *x0, Node *x1)’. If you have an opinion -about it, please let us know.) - - ---------- Footnotes ---------- - - (1) The sources of an extended version of this example are available -in C as ‘examples/c/glr’, and in C++ as ‘examples/c++/glr’. - - -File: bison.info, Node: GLR Semantic Actions, Next: Semantic Predicates, Prev: Merging GLR Parses, Up: GLR Parsers - -1.5.3 GLR Semantic Actions --------------------------- - -The nature of GLR parsing and the structure of the generated parsers -give rise to certain restrictions on semantic values and actions. - -1.5.3.1 Deferred semantic actions -................................. - -By definition, a deferred semantic action is not performed at the same -time as the associated reduction. This raises caveats for several Bison -features you might use in a semantic action in a GLR parser. - - In any semantic action, you can examine ‘yychar’ to determine the -kind of the lookahead token present at the time of the associated -reduction. After checking that ‘yychar’ is not set to ‘YYEMPTY’ or -‘YYEOF’, you can then examine ‘yylval’ and ‘yylloc’ to determine the -lookahead token's semantic value and location, if any. In a nondeferred -semantic action, you can also modify any of these variables to influence -syntax analysis. *Note Lookahead::. - - In a deferred semantic action, it's too late to influence syntax -analysis. In this case, ‘yychar’, ‘yylval’, and ‘yylloc’ are set to -shallow copies of the values they had at the time of the associated -reduction. For this reason alone, modifying them is dangerous. -Moreover, the result of modifying them is undefined and subject to -change with future versions of Bison. For example, if a semantic action -might be deferred, you should never write it to invoke ‘yyclearin’ -(*note Action Features::) or to attempt to free memory referenced by -‘yylval’. - -1.5.3.2 YYERROR -............... - -Another Bison feature requiring special consideration is ‘YYERROR’ -(*note Action Features::), which you can invoke in a semantic action to -initiate error recovery. During deterministic GLR operation, the effect -of ‘YYERROR’ is the same as its effect in a deterministic parser. The -effect in a deferred action is similar, but the precise point of the -error is undefined; instead, the parser reverts to deterministic -operation, selecting an unspecified stack on which to continue with a -syntax error. In a semantic predicate (see *note Semantic Predicates::) -during nondeterministic parsing, ‘YYERROR’ silently prunes the parse -that invoked the test. - -1.5.3.3 Restrictions on semantic values and locations -..................................................... - -GLR parsers require that you use POD (Plain Old Data) types for semantic -values and location types when using the generated parsers as C++ code. - - -File: bison.info, Node: Semantic Predicates, Prev: GLR Semantic Actions, Up: GLR Parsers - -1.5.4 Controlling a Parse with Arbitrary Predicates ---------------------------------------------------- - -In addition to the ‘%dprec’ and ‘%merge’ directives, GLR parsers allow -you to reject parses on the basis of arbitrary computations executed in -user code, without having Bison treat this rejection as an error if -there are alternative parses. For example, - - widget: - %?{ new_syntax } "widget" id new_args { $$ = f($3, $4); } - | %?{ !new_syntax } "widget" id old_args { $$ = f($3, $4); } - ; - -is one way to allow the same parser to handle two different syntaxes for -widgets. The clause preceded by ‘%?’ is treated like an ordinary -midrule action, except that its text is handled as an expression and is -always evaluated immediately (even when in nondeterministic mode). If -the expression yields 0 (false), the clause is treated as a syntax -error, which, in a nondeterministic parser, causes the stack in which it -is reduced to die. In a deterministic parser, it acts like ‘YYERROR’. - - As the example shows, predicates otherwise look like semantic -actions, and therefore you must take them into account when determining -the numbers to use for denoting the semantic values of right-hand side -symbols. Predicate actions, however, have no defined value, and may not -be given labels. - - There is a subtle difference between semantic predicates and ordinary -actions in nondeterministic mode, since the latter are deferred. For -example, we could try to rewrite the previous example as - - widget: - { if (!new_syntax) YYERROR; } - "widget" id new_args { $$ = f($3, $4); } - | { if (new_syntax) YYERROR; } - "widget" id old_args { $$ = f($3, $4); } - ; - -(reversing the sense of the predicate tests to cause an error when they -are false). However, this does _not_ have the same effect if ‘new_args’ -and ‘old_args’ have overlapping syntax. Since the midrule actions -testing ‘new_syntax’ are deferred, a GLR parser first encounters the -unresolved ambiguous reduction for cases where ‘new_args’ and ‘old_args’ -recognize the same string _before_ performing the tests of ‘new_syntax’. -It therefore reports an error. - - Finally, be careful in writing predicates: deferred actions have not -been evaluated, so that using them in a predicate will have undefined -effects. - - -File: bison.info, Node: Locations, Next: Bison Parser, Prev: GLR Parsers, Up: Concepts - -1.6 Locations -============= - -Many applications, like interpreters or compilers, have to produce -verbose and useful error messages. To achieve this, one must be able to -keep track of the “textual location”, or “location”, of each syntactic -construct. Bison provides a mechanism for handling these locations. - - Each token has a semantic value. In a similar fashion, each token -has an associated location, but the type of locations is the same for -all tokens and groupings. Moreover, the output parser is equipped with -a default data structure for storing locations (*note Tracking -Locations::, for more details). - - Like semantic values, locations can be reached in actions using a -dedicated set of constructs. In the example above, the location of the -whole grouping is ‘@$’, while the locations of the subexpressions are -‘@1’ and ‘@3’. - - When a rule is matched, a default action is used to compute the -semantic value of its left hand side (*note Actions::). In the same -way, another default action is used for locations. However, the action -for locations is general enough for most cases, meaning there is usually -no need to describe for each rule how ‘@$’ should be formed. When -building a new location for a given grouping, the default behavior of -the output parser is to take the beginning of the first symbol, and the -end of the last symbol. - - -File: bison.info, Node: Bison Parser, Next: Stages, Prev: Locations, Up: Concepts - -1.7 Bison Output: the Parser Implementation File -================================================ - -When you run Bison, you give it a Bison grammar file as input. The most -important output is a C source file that implements a parser for the -language described by the grammar. This parser is called a “Bison -parser”, and this file is called a “Bison parser implementation file”. -Keep in mind that the Bison utility and the Bison parser are two -distinct programs: the Bison utility is a program whose output is the -Bison parser implementation file that becomes part of your program. - - The job of the Bison parser is to group tokens into groupings -according to the grammar rules--for example, to build identifiers and -operators into expressions. As it does this, it runs the actions for -the grammar rules it uses. - - The tokens come from a function called the “lexical analyzer” that -you must supply in some fashion (such as by writing it in C). The Bison -parser calls the lexical analyzer each time it wants a new token. It -doesn't know what is "inside" the tokens (though their semantic values -may reflect this). Typically the lexical analyzer makes the tokens by -parsing characters of text, but Bison does not depend on this. *Note -Lexical::. - - The Bison parser implementation file is C code which defines a -function named ‘yyparse’ which implements that grammar. This function -does not make a complete C program: you must supply some additional -functions. One is the lexical analyzer. Another is an error-reporting -function which the parser calls to report an error. In addition, a -complete C program must start with a function called ‘main’; you have to -provide this, and arrange for it to call ‘yyparse’ or the parser will -never run. *Note Interface::. - - Aside from the token kind names and the symbols in the actions you -write, all symbols defined in the Bison parser implementation file -itself begin with ‘yy’ or ‘YY’. This includes interface functions such -as the lexical analyzer function ‘yylex’, the error reporting function -‘yyerror’ and the parser function ‘yyparse’ itself. This also includes -numerous identifiers used for internal purposes. Therefore, you should -avoid using C identifiers starting with ‘yy’ or ‘YY’ in the Bison -grammar file except for the ones defined in this manual. Also, you -should avoid using the C identifiers ‘malloc’ and ‘free’ for anything -other than their usual meanings. - - In some cases the Bison parser implementation file includes system -headers, and in those cases your code should respect the identifiers -reserved by those headers. On some non-GNU hosts, ‘’, -‘’, ‘’ (if available), and ‘’ are included -to declare memory allocators and integer types and constants. -‘’ is included if message translation is in use (*note -Internationalization::). Other system headers may be included if you -define ‘YYDEBUG’ (*note Tracing::) or ‘YYSTACK_USE_ALLOCA’ (*note Table -of Symbols::) to a nonzero value. - - -File: bison.info, Node: Stages, Next: Grammar Layout, Prev: Bison Parser, Up: Concepts - -1.8 Stages in Using Bison -========================= - -The actual language-design process using Bison, from grammar -specification to a working compiler or interpreter, has these parts: - - 1. Formally specify the grammar in a form recognized by Bison (*note - Grammar File::). For each grammatical rule in the language, - describe the action that is to be taken when an instance of that - rule is recognized. The action is described by a sequence of C - statements. - - 2. Write a lexical analyzer to process input and pass tokens to the - parser. The lexical analyzer may be written by hand in C (*note - Lexical::). It could also be produced using Lex, but the use of - Lex is not discussed in this manual. - - 3. Write a controlling function that calls the Bison-produced parser. - - 4. Write error-reporting routines. - - To turn this source code as written into a runnable program, you must -follow these steps: - - 1. Run Bison on the grammar to produce the parser. - - 2. Compile the code output by Bison, as well as any other source - files. - - 3. Link the object files to produce the finished product. - - -File: bison.info, Node: Grammar Layout, Prev: Stages, Up: Concepts - -1.9 The Overall Layout of a Bison Grammar -========================================= - -The input file for the Bison utility is a “Bison grammar file”. The -general form of a Bison grammar file is as follows: - - %{ - PROLOGUE - %} - - BISON DECLARATIONS - - %% - GRAMMAR RULES - %% - EPILOGUE - -The ‘%%’, ‘%{’ and ‘%}’ are punctuation that appears in every Bison -grammar file to separate the sections. - - The prologue may define types and variables used in the actions. You -can also use preprocessor commands to define macros used there, and use -‘#include’ to include header files that do any of these things. You -need to declare the lexical analyzer ‘yylex’ and the error printer -‘yyerror’ here, along with any other global identifiers used by the -actions in the grammar rules. - - The Bison declarations declare the names of the terminal and -nonterminal symbols, and may also describe operator precedence and the -data types of semantic values of various symbols. - - The grammar rules define how to construct each nonterminal symbol -from its parts. - - The epilogue can contain any code you want to use. Often the -definitions of functions declared in the prologue go here. In a simple -program, all the rest of the program can go here. - - -File: bison.info, Node: Examples, Next: Grammar File, Prev: Concepts, Up: Top - -2 Examples -********** - -Now we show and explain several sample programs written using Bison: a -Reverse Polish Notation calculator, an algebraic (infix) notation -calculator -- later extended to track "locations" -- and a -multi-function calculator. All produce usable, though limited, -interactive desk-top calculators. - - These examples are simple, but Bison grammars for real programming -languages are written the same way. You can copy these examples into a -source file to try them. - - - Bison comes with several examples (including for the different target -languages). If this package is properly installed, you shall find them -in ‘PREFIX/share/doc/bison/examples’, where PREFIX is the root of the -installation, probably something like ‘/usr/local’ or ‘/usr’. - -* Menu: - -* RPN Calc:: Reverse Polish Notation Calculator; - a first example with no operator precedence. -* Infix Calc:: Infix (algebraic) notation calculator. - Operator precedence is introduced. -* Simple Error Recovery:: Continuing after syntax errors. -* Location Tracking Calc:: Demonstrating the use of @N and @$. -* Multi-function Calc:: Calculator with memory and trig functions. - It uses multiple data-types for semantic values. -* Exercises:: Ideas for improving the multi-function calculator. - - -File: bison.info, Node: RPN Calc, Next: Infix Calc, Up: Examples - -2.1 Reverse Polish Notation Calculator -====================================== - -The first example(1) is that of a simple double-precision “Reverse -Polish Notation” calculator (a calculator using postfix operators). -This example provides a good starting point, since operator precedence -is not an issue. The second example will illustrate how operator -precedence is handled. - - The source code for this calculator is named ‘rpcalc.y’. The ‘.y’ -extension is a convention used for Bison grammar files. - -* Menu: - -* Rpcalc Declarations:: Prologue (declarations) for rpcalc. -* Rpcalc Rules:: Grammar Rules for rpcalc, with explanation. -* Rpcalc Lexer:: The lexical analyzer. -* Rpcalc Main:: The controlling function. -* Rpcalc Error:: The error reporting function. -* Rpcalc Generate:: Running Bison on the grammar file. -* Rpcalc Compile:: Run the C compiler on the output code. - - ---------- Footnotes ---------- - - (1) The sources of ‘rpcalc’ are available as ‘examples/c/rpcalc’. - - -File: bison.info, Node: Rpcalc Declarations, Next: Rpcalc Rules, Up: RPN Calc - -2.1.1 Declarations for ‘rpcalc’ -------------------------------- - -Here are the C and Bison declarations for the Reverse Polish Notation -calculator. As in C, comments are placed between ‘/*...*/’ or after -‘//’. - - /* Reverse Polish Notation calculator. */ - - %{ - #include - #include - int yylex (void); - void yyerror (char const *); - %} - - %define api.value.type {double} - %token NUM - - %% /* Grammar rules and actions follow. */ - - The declarations section (*note Prologue::) contains two preprocessor -directives and two forward declarations. - - The ‘#include’ directive is used to declare the exponentiation -function ‘pow’. - - The forward declarations for ‘yylex’ and ‘yyerror’ are needed because -the C language requires that functions be declared before they are used. -These functions will be defined in the epilogue, but the parser calls -them so they must be declared in the prologue. - - The second section, Bison declarations, provides information to Bison -about the tokens and their types (*note Bison Declarations::). - - The ‘%define’ directive defines the variable ‘api.value.type’, thus -specifying the C data type for semantic values of both tokens and -groupings (*note Value Type::). The Bison parser will use whatever type -‘api.value.type’ is defined as; if you don't define it, ‘int’ is the -default. Because we specify ‘{double}’, each token and each expression -has an associated value, which is a floating point number. C code can -use ‘YYSTYPE’ to refer to the value ‘api.value.type’. - - Each terminal symbol that is not a single-character literal must be -declared. (Single-character literals normally don't need to be -declared.) In this example, all the arithmetic operators are designated -by single-character literals, so the only terminal symbol that needs to -be declared is ‘NUM’, the token kind for numeric constants. - - -File: bison.info, Node: Rpcalc Rules, Next: Rpcalc Lexer, Prev: Rpcalc Declarations, Up: RPN Calc - -2.1.2 Grammar Rules for ‘rpcalc’ --------------------------------- - -Here are the grammar rules for the Reverse Polish Notation calculator. - - input: - %empty - | input line - ; - - line: - '\n' - | exp '\n' { printf ("%.10g\n", $1); } - ; - - exp: - NUM - | exp exp '+' { $$ = $1 + $2; } - | exp exp '-' { $$ = $1 - $2; } - | exp exp '*' { $$ = $1 * $2; } - | exp exp '/' { $$ = $1 / $2; } - | exp exp '^' { $$ = pow ($1, $2); } /* Exponentiation */ - | exp 'n' { $$ = -$1; } /* Unary minus */ - ; - %% - - The groupings of the rpcalc "language" defined here are the -expression (given the name ‘exp’), the line of input (‘line’), and the -complete input transcript (‘input’). Each of these nonterminal symbols -has several alternate rules, joined by the vertical bar ‘|’ which is -read as "or". The following sections explain what these rules mean. - - The semantics of the language is determined by the actions taken when -a grouping is recognized. The actions are the C code that appears -inside braces. *Note Actions::. - - You must specify these actions in C, but Bison provides the means for -passing semantic values between the rules. In each action, the -pseudo-variable ‘$$’ stands for the semantic value for the grouping that -the rule is going to construct. Assigning a value to ‘$$’ is the main -job of most actions. The semantic values of the components of the rule -are referred to as ‘$1’, ‘$2’, and so on. - -* Menu: - -* Rpcalc Input:: Explanation of the ‘input’ nonterminal -* Rpcalc Line:: Explanation of the ‘line’ nonterminal -* Rpcalc Exp:: Explanation of the ‘exp’ nonterminal - - -File: bison.info, Node: Rpcalc Input, Next: Rpcalc Line, Up: Rpcalc Rules - -2.1.2.1 Explanation of ‘input’ -.............................. - -Consider the definition of ‘input’: - - input: - %empty - | input line - ; - - This definition reads as follows: "A complete input is either an -empty string, or a complete input followed by an input line". Notice -that "complete input" is defined in terms of itself. This definition is -said to be “left recursive” since ‘input’ appears always as the leftmost -symbol in the sequence. *Note Recursion::. - - The first alternative is empty because there are no symbols between -the colon and the first ‘|’; this means that ‘input’ can match an empty -string of input (no tokens). We write the rules this way because it is -legitimate to type ‘Ctrl-d’ right after you start the calculator. It's -conventional to put an empty alternative first and to use the (optional) -‘%empty’ directive, or to write the comment ‘/* empty */’ in it (*note -Empty Rules::). - - The second alternate rule (‘input line’) handles all nontrivial -input. It means, "After reading any number of lines, read one more line -if possible." The left recursion makes this rule into a loop. Since -the first alternative matches empty input, the loop can be executed zero -or more times. - - The parser function ‘yyparse’ continues to process input until a -grammatical error is seen or the lexical analyzer says there are no more -input tokens; we will arrange for the latter to happen at end-of-input. - - -File: bison.info, Node: Rpcalc Line, Next: Rpcalc Exp, Prev: Rpcalc Input, Up: Rpcalc Rules - -2.1.2.2 Explanation of ‘line’ -............................. - -Now consider the definition of ‘line’: - - line: - '\n' - | exp '\n' { printf ("%.10g\n", $1); } - ; - - The first alternative is a token which is a newline character; this -means that rpcalc accepts a blank line (and ignores it, since there is -no action). The second alternative is an expression followed by a -newline. This is the alternative that makes rpcalc useful. The -semantic value of the ‘exp’ grouping is the value of ‘$1’ because the -‘exp’ in question is the first symbol in the alternative. The action -prints this value, which is the result of the computation the user asked -for. - - This action is unusual because it does not assign a value to ‘$$’. -As a consequence, the semantic value associated with the ‘line’ is -uninitialized (its value will be unpredictable). This would be a bug if -that value were ever used, but we don't use it: once rpcalc has printed -the value of the user's input line, that value is no longer needed. - - -File: bison.info, Node: Rpcalc Exp, Prev: Rpcalc Line, Up: Rpcalc Rules - -2.1.2.3 Explanation of ‘exp’ -............................ - -The ‘exp’ grouping has several rules, one for each kind of expression. -The first rule handles the simplest expressions: those that are just -numbers. The second handles an addition-expression, which looks like -two expressions followed by a plus-sign. The third handles subtraction, -and so on. - - exp: - NUM - | exp exp '+' { $$ = $1 + $2; } - | exp exp '-' { $$ = $1 - $2; } - ... - ; - - We have used ‘|’ to join all the rules for ‘exp’, but we could -equally well have written them separately: - - exp: NUM; - exp: exp exp '+' { $$ = $1 + $2; }; - exp: exp exp '-' { $$ = $1 - $2; }; - ... - - Most of the rules have actions that compute the value of the -expression in terms of the value of its parts. For example, in the rule -for addition, ‘$1’ refers to the first component ‘exp’ and ‘$2’ refers -to the second one. The third component, ‘'+'’, has no meaningful -associated semantic value, but if it had one you could refer to it as -‘$3’. The first rule relies on the implicit default action: ‘{ $$ = $1; -}’. - - When ‘yyparse’ recognizes a sum expression using this rule, the sum -of the two subexpressions' values is produced as the value of the entire -expression. *Note Actions::. - - You don't have to give an action for every rule. When a rule has no -action, Bison by default copies the value of ‘$1’ into ‘$$’. This is -what happens in the first rule (the one that uses ‘NUM’). - - The formatting shown here is the recommended convention, but Bison -does not require it. You can add or change white space as much as you -wish. For example, this: - - exp: NUM | exp exp '+' {$$ = $1 + $2; } | ... ; - -means the same thing as this: - - exp: - NUM - | exp exp '+' { $$ = $1 + $2; } - | ... - ; - -The latter, however, is much more readable. - - -File: bison.info, Node: Rpcalc Lexer, Next: Rpcalc Main, Prev: Rpcalc Rules, Up: RPN Calc - -2.1.3 The ‘rpcalc’ Lexical Analyzer ------------------------------------ - -The lexical analyzer's job is low-level parsing: converting characters -or sequences of characters into tokens. The Bison parser gets its -tokens by calling the lexical analyzer. *Note Lexical::. - - Only a simple lexical analyzer is needed for the RPN calculator. -This lexical analyzer skips blanks and tabs, then reads in numbers as -‘double’ and returns them as ‘NUM’ tokens. Any other character that -isn't part of a number is a separate token. Note that the token-code -for such a single-character token is the character itself. - - The return value of the lexical analyzer function is a numeric code -which represents a token kind. The same text used in Bison rules to -stand for this token kind is also a C expression for the numeric code of -the kind. This works in two ways. If the token kind is a character -literal, then its numeric code is that of the character; you can use the -same character literal in the lexical analyzer to express the number. -If the token kind is an identifier, that identifier is defined by Bison -as a C enum whose definition is the appropriate code. In this example, -therefore, ‘NUM’ becomes an enum for ‘yylex’ to use. - - The semantic value of the token (if it has one) is stored into the -global variable ‘yylval’, which is where the Bison parser will look for -it. (The C data type of ‘yylval’ is ‘YYSTYPE’, whose value was defined -at the beginning of the grammar via ‘%define api.value.type {double}’; -*note Rpcalc Declarations::.) - - A token kind code of zero is returned if the end-of-input is -encountered. (Bison recognizes any nonpositive value as indicating -end-of-input.) - - Here is the code for the lexical analyzer: - - /* The lexical analyzer returns a double floating point - number on the stack and the token NUM, or the numeric code - of the character read if not a number. It skips all blanks - and tabs, and returns 0 for end-of-input. */ - - #include - #include - - int - yylex (void) - { - int c = getchar (); - /* Skip white space. */ - while (c == ' ' || c == '\t') - c = getchar (); - /* Process numbers. */ - if (c == '.' || isdigit (c)) - { - ungetc (c, stdin); - if (scanf ("%lf", &yylval) != 1) - abort (); - return NUM; - } - /* Return end-of-input. */ - else if (c == EOF) - return YYEOF; - /* Return a single char. */ - else - return c; - } - - -File: bison.info, Node: Rpcalc Main, Next: Rpcalc Error, Prev: Rpcalc Lexer, Up: RPN Calc - -2.1.4 The Controlling Function ------------------------------- - -In keeping with the spirit of this example, the controlling function is -kept to the bare minimum. The only requirement is that it call -‘yyparse’ to start the process of parsing. - - int - main (void) - { - return yyparse (); - } - - -File: bison.info, Node: Rpcalc Error, Next: Rpcalc Generate, Prev: Rpcalc Main, Up: RPN Calc - -2.1.5 The Error Reporting Routine ---------------------------------- - -When ‘yyparse’ detects a syntax error, it calls the error reporting -function ‘yyerror’ to print an error message (usually but not always -‘"syntax error"’). It is up to the programmer to supply ‘yyerror’ -(*note Interface::), so here is the definition we will use: - - #include - - /* Called by yyparse on error. */ - void - yyerror (char const *s) - { - fprintf (stderr, "%s\n", s); - } - - After ‘yyerror’ returns, the Bison parser may recover from the error -and continue parsing if the grammar contains a suitable error rule -(*note Error Recovery::). Otherwise, ‘yyparse’ returns nonzero. We -have not written any error rules in this example, so any invalid input -will cause the calculator program to exit. This is not clean behavior -for a real calculator, but it is adequate for the first example. - - -File: bison.info, Node: Rpcalc Generate, Next: Rpcalc Compile, Prev: Rpcalc Error, Up: RPN Calc - -2.1.6 Running Bison to Make the Parser --------------------------------------- - -Before running Bison to produce a parser, we need to decide how to -arrange all the source code in one or more source files. For such a -simple example, the easiest thing is to put everything in one file, the -grammar file. The definitions of ‘yylex’, ‘yyerror’ and ‘main’ go at -the end, in the epilogue of the grammar file (*note Grammar Layout::). - - For a large project, you would probably have several source files, -and use ‘make’ to arrange to recompile them. - - With all the source in the grammar file, you use the following -command to convert it into a parser implementation file: - - $ bison FILE.y - -In this example, the grammar file is called ‘rpcalc.y’ (for "Reverse -Polish CALCulator"). Bison produces a parser implementation file named -‘FILE.tab.c’, removing the ‘.y’ from the grammar file name. The parser -implementation file contains the source code for ‘yyparse’. The -additional functions in the grammar file (‘yylex’, ‘yyerror’ and ‘main’) -are copied verbatim to the parser implementation file. - - -File: bison.info, Node: Rpcalc Compile, Prev: Rpcalc Generate, Up: RPN Calc - -2.1.7 Compiling the Parser Implementation File ----------------------------------------------- - -Here is how to compile and run the parser implementation file: - - # List files in current directory. - $ ls - rpcalc.tab.c rpcalc.y - - # Compile the Bison parser. - # ‘-lm’ tells compiler to search math library for ‘pow’. - $ cc -lm -o rpcalc rpcalc.tab.c - - # List files again. - $ ls - rpcalc rpcalc.tab.c rpcalc.y - - The file ‘rpcalc’ now contains the executable code. Here is an -example session using ‘rpcalc’. - - $ rpcalc - 4 9 + - ⇒ 13 - 3 7 + 3 4 5 *+- - ⇒ -13 - 3 7 + 3 4 5 * + - n Note the unary minus, ‘n’ - ⇒ 13 - 5 6 / 4 n + - ⇒ -3.166666667 - 3 4 ^ Exponentiation - ⇒ 81 - ^D End-of-file indicator - $ - - -File: bison.info, Node: Infix Calc, Next: Simple Error Recovery, Prev: RPN Calc, Up: Examples - -2.2 Infix Notation Calculator: ‘calc’ -===================================== - -We now modify rpcalc to handle infix operators instead of postfix.(1) -Infix notation involves the concept of operator precedence and the need -for parentheses nested to arbitrary depth. Here is the Bison code for -‘calc.y’, an infix desk-top calculator. - - /* Infix notation calculator. */ - - %{ - #include - #include - int yylex (void); - void yyerror (char const *); - %} - - /* Bison declarations. */ - %define api.value.type {double} - %token NUM - %left '-' '+' - %left '*' '/' - %precedence NEG /* negation--unary minus */ - %right '^' /* exponentiation */ - - %% /* The grammar follows. */ - input: - %empty - | input line - ; - - line: - '\n' - | exp '\n' { printf ("\t%.10g\n", $1); } - ; - - exp: - NUM - | exp '+' exp { $$ = $1 + $3; } - | exp '-' exp { $$ = $1 - $3; } - | exp '*' exp { $$ = $1 * $3; } - | exp '/' exp { $$ = $1 / $3; } - | '-' exp %prec NEG { $$ = -$2; } - | exp '^' exp { $$ = pow ($1, $3); } - | '(' exp ')' { $$ = $2; } - ; - %% - -The functions ‘yylex’, ‘yyerror’ and ‘main’ can be the same as before. - - There are two important new features shown in this code. - - In the second section (Bison declarations), ‘%left’ declares token -kinds and says they are left-associative operators. The declarations -‘%left’ and ‘%right’ (right associativity) take the place of ‘%token’ -which is used to declare a token kind name without -associativity/precedence. (These tokens are single-character literals, -which ordinarily don't need to be declared. We declare them here to -specify the associativity/precedence.) - - Operator precedence is determined by the line ordering of the -declarations; the higher the line number of the declaration (lower on -the page or screen), the higher the precedence. Hence, exponentiation -has the highest precedence, unary minus (‘NEG’) is next, followed by ‘*’ -and ‘/’, and so on. Unary minus is not associative, only precedence -matters (‘%precedence’. *Note Precedence::. - - The other important new feature is the ‘%prec’ in the grammar section -for the unary minus operator. The ‘%prec’ simply instructs Bison that -the rule ‘| '-' exp’ has the same precedence as ‘NEG’--in this case the -next-to-highest. *Note Contextual Precedence::. - - Here is a sample run of ‘calc.y’: - - $ calc - 4 + 4.5 - (34/(8*3+-3)) - 6.880952381 - -56 + 2 - -54 - 3 ^ 2 - 9 - - ---------- Footnotes ---------- - - (1) A similar example, but using an unambiguous grammar rather than -precedence and associativity annotations, is available as -‘examples/c/calc’. - - -File: bison.info, Node: Simple Error Recovery, Next: Location Tracking Calc, Prev: Infix Calc, Up: Examples - -2.3 Simple Error Recovery -========================= - -Up to this point, this manual has not addressed the issue of “error -recovery”--how to continue parsing after the parser detects a syntax -error. All we have handled is error reporting with ‘yyerror’. Recall -that by default ‘yyparse’ returns after calling ‘yyerror’. This means -that an erroneous input line causes the calculator program to exit. Now -we show how to rectify this deficiency. - - The Bison language itself includes the reserved word ‘error’, which -may be included in the grammar rules. In the example below it has been -added to one of the alternatives for ‘line’: - - line: - '\n' - | exp '\n' { printf ("\t%.10g\n", $1); } - | error '\n' { yyerrok; } - ; - - This addition to the grammar allows for simple error recovery in the -event of a syntax error. If an expression that cannot be evaluated is -read, the error will be recognized by the third rule for ‘line’, and -parsing will continue. (The ‘yyerror’ function is still called upon to -print its message as well.) The action executes the statement -‘yyerrok’, a macro defined automatically by Bison; its meaning is that -error recovery is complete (*note Error Recovery::). Note the -difference between ‘yyerrok’ and ‘yyerror’; neither one is a misprint. - - This form of error recovery deals with syntax errors. There are -other kinds of errors; for example, division by zero, which raises an -exception signal that is normally fatal. A real calculator program must -handle this signal and use ‘longjmp’ to return to ‘main’ and resume -parsing input lines; it would also have to discard the rest of the -current line of input. We won't discuss this issue further because it -is not specific to Bison programs. - - -File: bison.info, Node: Location Tracking Calc, Next: Multi-function Calc, Prev: Simple Error Recovery, Up: Examples - -2.4 Location Tracking Calculator: ‘ltcalc’ -========================================== - -This example extends the infix notation calculator with location -tracking. This feature will be used to improve the error messages. For -the sake of clarity, this example is a simple integer calculator, since -most of the work needed to use locations will be done in the lexical -analyzer. - -* Menu: - -* Ltcalc Declarations:: Bison and C declarations for ltcalc. -* Ltcalc Rules:: Grammar rules for ltcalc, with explanations. -* Ltcalc Lexer:: The lexical analyzer. - - -File: bison.info, Node: Ltcalc Declarations, Next: Ltcalc Rules, Up: Location Tracking Calc - -2.4.1 Declarations for ‘ltcalc’ -------------------------------- - -The C and Bison declarations for the location tracking calculator are -the same as the declarations for the infix notation calculator. - - /* Location tracking calculator. */ - - %{ - #include - int yylex (void); - void yyerror (char const *); - %} - - /* Bison declarations. */ - %define api.value.type {int} - %token NUM - - %left '-' '+' - %left '*' '/' - %precedence NEG - %right '^' - - %% /* The grammar follows. */ - -Note there are no declarations specific to locations. Defining a data -type for storing locations is not needed: we will use the type provided -by default (*note Location Type::), which is a four member structure -with the following integer fields: ‘first_line’, ‘first_column’, -‘last_line’ and ‘last_column’. By conventions, and in accordance with -the GNU Coding Standards and common practice, the line and column count -both start at 1. - - -File: bison.info, Node: Ltcalc Rules, Next: Ltcalc Lexer, Prev: Ltcalc Declarations, Up: Location Tracking Calc - -2.4.2 Grammar Rules for ‘ltcalc’ --------------------------------- - -Whether handling locations or not has no effect on the syntax of your -language. Therefore, grammar rules for this example will be very close -to those of the previous example: we will only modify them to benefit -from the new information. - - Here, we will use locations to report divisions by zero, and locate -the wrong expressions or subexpressions. - - input: - %empty - | input line - ; - - line: - '\n' - | exp '\n' { printf ("%d\n", $1); } - ; - - exp: - NUM - | exp '+' exp { $$ = $1 + $3; } - | exp '-' exp { $$ = $1 - $3; } - | exp '*' exp { $$ = $1 * $3; } - | exp '/' exp - { - if ($3) - $$ = $1 / $3; - else - { - $$ = 1; - fprintf (stderr, "%d.%d-%d.%d: division by zero", - @3.first_line, @3.first_column, - @3.last_line, @3.last_column); - } - } - | '-' exp %prec NEG { $$ = -$2; } - | exp '^' exp { $$ = pow ($1, $3); } - | '(' exp ')' { $$ = $2; } - - This code shows how to reach locations inside of semantic actions, by -using the pseudo-variables ‘@N’ for rule components, and the -pseudo-variable ‘@$’ for groupings. - - We don't need to assign a value to ‘@$’: the output parser does it -automatically. By default, before executing the C code of each action, -‘@$’ is set to range from the beginning of ‘@1’ to the end of ‘@N’, for -a rule with N components. This behavior can be redefined (*note -Location Default Action::), and for very specific rules, ‘@$’ can be -computed by hand. - - -File: bison.info, Node: Ltcalc Lexer, Prev: Ltcalc Rules, Up: Location Tracking Calc - -2.4.3 The ‘ltcalc’ Lexical Analyzer. ------------------------------------- - -Until now, we relied on Bison's defaults to enable location tracking. -The next step is to rewrite the lexical analyzer, and make it able to -feed the parser with the token locations, as it already does for -semantic values. - - To this end, we must take into account every single character of the -input text, to avoid the computed locations of being fuzzy or wrong: - - int - yylex (void) - { - int c; - - /* Skip white space. */ - while ((c = getchar ()) == ' ' || c == '\t') - ++yylloc.last_column; - - /* Step. */ - yylloc.first_line = yylloc.last_line; - yylloc.first_column = yylloc.last_column; - - /* Process numbers. */ - if (isdigit (c)) - { - yylval = c - '0'; - ++yylloc.last_column; - while (isdigit (c = getchar ())) - { - ++yylloc.last_column; - yylval = yylval * 10 + c - '0'; - } - ungetc (c, stdin); - return NUM; - } - - /* Return end-of-input. */ - if (c == EOF) - return YYEOF; - - /* Return a single char, and update location. */ - if (c == '\n') - { - ++yylloc.last_line; - yylloc.last_column = 0; - } - else - ++yylloc.last_column; - return c; - } - - Basically, the lexical analyzer performs the same processing as -before: it skips blanks and tabs, and reads numbers or single-character -tokens. In addition, it updates ‘yylloc’, the global variable (of type -‘YYLTYPE’) containing the token's location. - - Now, each time this function returns a token, the parser has its kind -as well as its semantic value, and its location in the text. The last -needed change is to initialize ‘yylloc’, for example in the controlling -function: - - int - main (void) - { - yylloc.first_line = yylloc.last_line = 1; - yylloc.first_column = yylloc.last_column = 0; - return yyparse (); - } - - Remember that computing locations is not a matter of syntax. Every -character must be associated to a location update, whether it is in -valid input, in comments, in literal strings, and so on. - - -File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Location Tracking Calc, Up: Examples - -2.5 Multi-Function Calculator: ‘mfcalc’ -======================================= - -Now that the basics of Bison have been discussed, it is time to move on -to a more advanced problem.(1) The above calculators provided only five -functions, ‘+’, ‘-’, ‘*’, ‘/’ and ‘^’. It would be nice to have a -calculator that provides other mathematical functions such as ‘sin’, -‘cos’, etc. - - It is easy to add new operators to the infix calculator as long as -they are only single-character literals. The lexical analyzer ‘yylex’ -passes back all nonnumeric characters as tokens, so new grammar rules -suffice for adding a new operator. But we want something more flexible: -built-in functions whose syntax has this form: - - FUNCTION_NAME (ARGUMENT) - -At the same time, we will add memory to the calculator, by allowing you -to create named variables, store values in them, and use them later. -Here is a sample session with the multi-function calculator: - - $ mfcalc - pi = 3.141592653589 - ⇒ 3.1415926536 - sin(pi) - ⇒ 0.0000000000 - alpha = beta1 = 2.3 - ⇒ 2.3000000000 - alpha - ⇒ 2.3000000000 - ln(alpha) - ⇒ 0.8329091229 - exp(ln(beta1)) - ⇒ 2.3000000000 - $ - - Note that multiple assignment and nested function calls are -permitted. - -* Menu: - -* Mfcalc Declarations:: Bison declarations for multi-function calculator. -* Mfcalc Rules:: Grammar rules for the calculator. -* Mfcalc Symbol Table:: Symbol table management subroutines. -* Mfcalc Lexer:: The lexical analyzer. -* Mfcalc Main:: The controlling function. - - ---------- Footnotes ---------- - - (1) The sources of ‘mfcalc’ are available as ‘examples/c/mfcalc’. - - -File: bison.info, Node: Mfcalc Declarations, Next: Mfcalc Rules, Up: Multi-function Calc - -2.5.1 Declarations for ‘mfcalc’ -------------------------------- - -Here are the C and Bison declarations for the multi-function calculator. - - %{ - #include /* For printf, etc. */ - #include /* For pow, used in the grammar. */ - #include "calc.h" /* Contains definition of 'symrec'. */ - int yylex (void); - void yyerror (char const *); - %} - - %define api.value.type union /* Generate YYSTYPE from these types: */ - %token NUM /* Double precision number. */ - %token VAR FUN /* Symbol table pointer: variable/function. */ - %nterm exp - - %precedence '=' - %left '-' '+' - %left '*' '/' - %precedence NEG /* negation--unary minus */ - %right '^' /* exponentiation */ - - The above grammar introduces only two new features of the Bison -language. These features allow semantic values to have various data -types (*note Multiple Types::). - - The special ‘union’ value assigned to the ‘%define’ variable -‘api.value.type’ specifies that the symbols are defined with their data -types. Bison will generate an appropriate definition of ‘YYSTYPE’ to -store these values. - - Since values can now have various types, it is necessary to associate -a type with each grammar symbol whose semantic value is used. These -symbols are ‘NUM’, ‘VAR’, ‘FUN’, and ‘exp’. Their declarations are -augmented with their data type (placed between angle brackets). For -instance, values of ‘NUM’ are stored in ‘double’. - - The Bison construct ‘%nterm’ is used for declaring nonterminal -symbols, just as ‘%token’ is used for declaring token kinds. Previously -we did not use ‘%nterm’ before because nonterminal symbols are normally -declared implicitly by the rules that define them. But ‘exp’ must be -declared explicitly so we can specify its value type. *Note Type -Decl::. - - -File: bison.info, Node: Mfcalc Rules, Next: Mfcalc Symbol Table, Prev: Mfcalc Declarations, Up: Multi-function Calc - -2.5.2 Grammar Rules for ‘mfcalc’ --------------------------------- - -Here are the grammar rules for the multi-function calculator. Most of -them are copied directly from ‘calc’; three rules, those which mention -‘VAR’ or ‘FUN’, are new. - - %% /* The grammar follows. */ - input: - %empty - | input line - ; - - line: - '\n' - | exp '\n' { printf ("%.10g\n", $1); } - | error '\n' { yyerrok; } - ; - - exp: - NUM - | VAR { $$ = $1->value.var; } - | VAR '=' exp { $$ = $3; $1->value.var = $3; } - | FUN '(' exp ')' { $$ = $1->value.fun ($3); } - | exp '+' exp { $$ = $1 + $3; } - | exp '-' exp { $$ = $1 - $3; } - | exp '*' exp { $$ = $1 * $3; } - | exp '/' exp { $$ = $1 / $3; } - | '-' exp %prec NEG { $$ = -$2; } - | exp '^' exp { $$ = pow ($1, $3); } - | '(' exp ')' { $$ = $2; } - ; - /* End of grammar. */ - %% - - -File: bison.info, Node: Mfcalc Symbol Table, Next: Mfcalc Lexer, Prev: Mfcalc Rules, Up: Multi-function Calc - -2.5.3 The ‘mfcalc’ Symbol Table -------------------------------- - -The multi-function calculator requires a symbol table to keep track of -the names and meanings of variables and functions. This doesn't affect -the grammar rules (except for the actions) or the Bison declarations, -but it requires some additional C functions for support. - - The symbol table itself consists of a linked list of records. Its -definition, which is kept in the header ‘calc.h’, is as follows. It -provides for either functions or variables to be placed in the table. - - /* Function type. */ - typedef double (func_t) (double); - - /* Data type for links in the chain of symbols. */ - struct symrec - { - char *name; /* name of symbol */ - int type; /* type of symbol: either VAR or FUN */ - union - { - double var; /* value of a VAR */ - func_t *fun; /* value of a FUN */ - } value; - struct symrec *next; /* link field */ - }; - - typedef struct symrec symrec; - - /* The symbol table: a chain of 'struct symrec'. */ - extern symrec *sym_table; - - symrec *putsym (char const *name, int sym_type); - symrec *getsym (char const *name); - - The new version of ‘main’ will call ‘init_table’ to initialize the -symbol table: - - struct init - { - char const *name; - func_t *fun; - }; - - struct init const funs[] = - { - { "atan", atan }, - { "cos", cos }, - { "exp", exp }, - { "ln", log }, - { "sin", sin }, - { "sqrt", sqrt }, - { 0, 0 }, - }; - - /* The symbol table: a chain of 'struct symrec'. */ - symrec *sym_table; - - /* Put functions in table. */ - static void - init_table (void) - { - for (int i = 0; funs[i].name; i++) - { - symrec *ptr = putsym (funs[i].name, FUN); - ptr->value.fun = funs[i].fun; - } - } - - By simply editing the initialization list and adding the necessary -include files, you can add additional functions to the calculator. - - Two important functions allow look-up and installation of symbols in -the symbol table. The function ‘putsym’ is passed a name and the kind -(‘VAR’ or ‘FUN’) of the object to be installed. The object is linked to -the front of the list, and a pointer to the object is returned. The -function ‘getsym’ is passed the name of the symbol to look up. If -found, a pointer to that symbol is returned; otherwise zero is returned. - - /* The mfcalc code assumes that malloc and realloc - always succeed, and that integer calculations - never overflow. Production-quality code should - not make these assumptions. */ - #include - #include /* malloc, realloc. */ - #include /* strlen. */ - - symrec * - putsym (char const *name, int sym_type) - { - symrec *res = (symrec *) malloc (sizeof (symrec)); - res->name = strdup (name); - res->type = sym_type; - res->value.var = 0; /* Set value to 0 even if fun. */ - res->next = sym_table; - sym_table = res; - return res; - } - - symrec * - getsym (char const *name) - { - for (symrec *p = sym_table; p; p = p->next) - if (strcmp (p->name, name) == 0) - return p; - return NULL; - } - - -File: bison.info, Node: Mfcalc Lexer, Next: Mfcalc Main, Prev: Mfcalc Symbol Table, Up: Multi-function Calc - -2.5.4 The ‘mfcalc’ Lexer ------------------------- - -The function ‘yylex’ must now recognize variables, numeric values, and -the single-character arithmetic operators. Strings of alphanumeric -characters with a leading letter are recognized as either variables or -functions depending on what the symbol table says about them. - - The string is passed to ‘getsym’ for look up in the symbol table. If -the name appears in the table, a pointer to its location and its type -(‘VAR’ or ‘FUN’) is returned to ‘yyparse’. If it is not already in the -table, then it is installed as a ‘VAR’ using ‘putsym’. Again, a pointer -and its type (which must be ‘VAR’) is returned to ‘yyparse’. - - No change is needed in the handling of numeric values and arithmetic -operators in ‘yylex’. - - #include - #include - - int - yylex (void) - { - int c = getchar (); - - /* Ignore white space, get first nonwhite character. */ - while (c == ' ' || c == '\t') - c = getchar (); - - if (c == EOF) - return YYEOF; - - /* Char starts a number => parse the number. */ - if (c == '.' || isdigit (c)) - { - ungetc (c, stdin); - if (scanf ("%lf", &yylval.NUM) != 1) - abort (); - return NUM; - } - -Bison generated a definition of ‘YYSTYPE’ with a member named ‘NUM’ to -store value of ‘NUM’ symbols. - - /* Char starts an identifier => read the name. */ - if (isalpha (c)) - { - static ptrdiff_t bufsize = 0; - static char *symbuf = 0; - ptrdiff_t i = 0; - do - { - /* If buffer is full, make it bigger. */ - if (bufsize <= i) - { - bufsize = 2 * bufsize + 40; - symbuf = realloc (symbuf, (size_t) bufsize); - } - /* Add this character to the buffer. */ - symbuf[i++] = (char) c; - /* Get another character. */ - c = getchar (); - } - while (isalnum (c)); - - ungetc (c, stdin); - symbuf[i] = '\0'; - - symrec *s = getsym (symbuf); - if (!s) - s = putsym (symbuf, VAR); - yylval.VAR = s; /* or yylval.FUN = s. */ - return s->type; - } - - /* Any other character is a token by itself. */ - return c; - } - - -File: bison.info, Node: Mfcalc Main, Prev: Mfcalc Lexer, Up: Multi-function Calc - -2.5.5 The ‘mfcalc’ Main ------------------------ - -The error reporting function is unchanged, and the new version of ‘main’ -includes a call to ‘init_table’ and sets the ‘yydebug’ on user demand -(*Note Tracing::, for details): - - /* Called by yyparse on error. */ - void yyerror (char const *s) - { - fprintf (stderr, "%s\n", s); - } - - int main (int argc, char const* argv[]) - { - /* Enable parse traces on option -p. */ - if (argc == 2 && strcmp(argv[1], "-p") == 0) - yydebug = 1; - init_table (); - return yyparse (); - } - - This program is both powerful and flexible. You may easily add new -functions, and it is a simple job to modify this code to install -predefined variables such as ‘pi’ or ‘e’ as well. - - -File: bison.info, Node: Exercises, Prev: Multi-function Calc, Up: Examples - -2.6 Exercises -============= - - 1. Add some new functions from ‘math.h’ to the initialization list. - - 2. Add another array that contains constants and their values. Then - modify ‘init_table’ to add these constants to the symbol table. It - will be easiest to give the constants type ‘VAR’. - - 3. Make the program report an error if the user refers to an - uninitialized variable in any way except to store a value in it. - - -File: bison.info, Node: Grammar File, Next: Interface, Prev: Examples, Up: Top - -3 Bison Grammar Files -********************* - -Bison takes as input a context-free grammar specification and produces a -C-language function that recognizes correct instances of the grammar. - - The Bison grammar file conventionally has a name ending in ‘.y’. -*Note Invocation::. - -* Menu: - -* Grammar Outline:: Overall layout of the grammar file. -* Symbols:: Terminal and nonterminal symbols. -* Rules:: How to write grammar rules. -* Semantics:: Semantic values and actions. -* Tracking Locations:: Locations and actions. -* Named References:: Using named references in actions. -* Declarations:: All kinds of Bison declarations are described here. -* Multiple Parsers:: Putting more than one Bison parser in one program. - - -File: bison.info, Node: Grammar Outline, Next: Symbols, Up: Grammar File - -3.1 Outline of a Bison Grammar -============================== - -A Bison grammar file has four main sections, shown here with the -appropriate delimiters: - - %{ - PROLOGUE - %} - - BISON DECLARATIONS - - %% - GRAMMAR RULES - %% - - EPILOGUE - - Comments enclosed in ‘/* ... */’ may appear in any of the sections. -As a GNU extension, ‘//’ introduces a comment that continues until end -of line. - -* Menu: - -* Prologue:: Syntax and usage of the prologue. -* Prologue Alternatives:: Syntax and usage of alternatives to the prologue. -* Bison Declarations:: Syntax and usage of the Bison declarations section. -* Grammar Rules:: Syntax and usage of the grammar rules section. -* Epilogue:: Syntax and usage of the epilogue. - - -File: bison.info, Node: Prologue, Next: Prologue Alternatives, Up: Grammar Outline - -3.1.1 The prologue ------------------- - -The PROLOGUE section contains macro definitions and declarations of -functions and variables that are used in the actions in the grammar -rules. These are copied to the beginning of the parser implementation -file so that they precede the definition of ‘yyparse’. You can use -‘#include’ to get the declarations from a header file. If you don't -need any C declarations, you may omit the ‘%{’ and ‘%}’ delimiters that -bracket this section. - - The PROLOGUE section is terminated by the first occurrence of ‘%}’ -that is outside a comment, a string literal, or a character constant. - - You may have more than one PROLOGUE section, intermixed with the -BISON DECLARATIONS. This allows you to have C and Bison declarations -that refer to each other. For example, the ‘%union’ declaration may use -types defined in a header file, and you may wish to prototype functions -that take arguments of type ‘YYSTYPE’. This can be done with two -PROLOGUE blocks, one before and one after the ‘%union’ declaration. - - %{ - #define _GNU_SOURCE - #include - #include "ptypes.h" - %} - - %union { - long n; - tree t; /* ‘tree’ is defined in ‘ptypes.h’. */ - } - - %{ - static void print_token (yytoken_kind_t token, YYSTYPE val); - %} - - ... - - When in doubt, it is usually safer to put prologue code before all -Bison declarations, rather than after. For example, any definitions of -feature test macros like ‘_GNU_SOURCE’ or ‘_POSIX_C_SOURCE’ should -appear before all Bison declarations, as feature test macros can affect -the behavior of Bison-generated ‘#include’ directives. - - -File: bison.info, Node: Prologue Alternatives, Next: Bison Declarations, Prev: Prologue, Up: Grammar Outline - -3.1.2 Prologue Alternatives ---------------------------- - -The functionality of PROLOGUE sections can often be subtle and -inflexible. As an alternative, Bison provides a ‘%code’ directive with -an explicit qualifier field, which identifies the purpose of the code -and thus the location(s) where Bison should generate it. For C/C++, the -qualifier can be omitted for the default location, or it can be one of -‘requires’, ‘provides’, ‘top’. *Note %code Summary::. - - Look again at the example of the previous section: - - %{ - #define _GNU_SOURCE - #include - #include "ptypes.h" - %} - - %union { - long n; - tree t; /* ‘tree’ is defined in ‘ptypes.h’. */ - } - - %{ - static void print_token (yytoken_kind_t token, YYSTYPE val); - %} - - ... - -Notice that there are two PROLOGUE sections here, but there's a subtle -distinction between their functionality. For example, if you decide to -override Bison's default definition for ‘YYLTYPE’, in which PROLOGUE -section should you write your new definition?(1) You should write it in -the first since Bison will insert that code into the parser -implementation file _before_ the default ‘YYLTYPE’ definition. In which -PROLOGUE section should you prototype an internal function, -‘trace_token’, that accepts ‘YYLTYPE’ and ‘yytoken_kind_t’ as arguments? -You should prototype it in the second since Bison will insert that code -_after_ the ‘YYLTYPE’ and ‘yytoken_kind_t’ definitions. - - This distinction in functionality between the two PROLOGUE sections -is established by the appearance of the ‘%union’ between them. This -behavior raises a few questions. First, why should the position of a -‘%union’ affect definitions related to ‘YYLTYPE’ and ‘yytoken_kind_t’? -Second, what if there is no ‘%union’? In that case, the second kind of -PROLOGUE section is not available. This behavior is not intuitive. - - To avoid this subtle ‘%union’ dependency, rewrite the example using a -‘%code top’ and an unqualified ‘%code’. Let's go ahead and add the new -‘YYLTYPE’ definition and the ‘trace_token’ prototype at the same time: - - %code top { - #define _GNU_SOURCE - #include - - /* WARNING: The following code really belongs - * in a '%code requires'; see below. */ - - #include "ptypes.h" - #define YYLTYPE YYLTYPE - typedef struct YYLTYPE - { - int first_line; - int first_column; - int last_line; - int last_column; - char *filename; - } YYLTYPE; - } - - %union { - long n; - tree t; /* ‘tree’ is defined in ‘ptypes.h’. */ - } - - %code { - static void print_token (yytoken_kind_t token, YYSTYPE val); - static void trace_token (yytoken_kind_t token, YYLTYPE loc); - } - - ... - -In this way, ‘%code top’ and the unqualified ‘%code’ achieve the same -functionality as the two kinds of PROLOGUE sections, but it's always -explicit which kind you intend. Moreover, both kinds are always -available even in the absence of ‘%union’. - - The ‘%code top’ block above logically contains two parts. The first -two lines before the warning need to appear near the top of the parser -implementation file. The first line after the warning is required by -‘YYSTYPE’ and thus also needs to appear in the parser implementation -file. However, if you've instructed Bison to generate a parser header -file (*note Decl Summary::), you probably want that line to appear -before the ‘YYSTYPE’ definition in that header file as well. The -‘YYLTYPE’ definition should also appear in the parser header file to -override the default ‘YYLTYPE’ definition there. - - In other words, in the ‘%code top’ block above, all but the first two -lines are dependency code required by the ‘YYSTYPE’ and ‘YYLTYPE’ -definitions. Thus, they belong in one or more ‘%code requires’: - - %code top { - #define _GNU_SOURCE - #include - } - - %code requires { - #include "ptypes.h" - } - %union { - long n; - tree t; /* ‘tree’ is defined in ‘ptypes.h’. */ - } - - %code requires { - #define YYLTYPE YYLTYPE - typedef struct YYLTYPE - { - int first_line; - int first_column; - int last_line; - int last_column; - char *filename; - } YYLTYPE; - } - - %code { - static void print_token (yytoken_kind_t token, YYSTYPE val); - static void trace_token (yytoken_kind_t token, YYLTYPE loc); - } - - ... - -Now Bison will insert ‘#include "ptypes.h"’ and the new ‘YYLTYPE’ -definition before the Bison-generated ‘YYSTYPE’ and ‘YYLTYPE’ -definitions in both the parser implementation file and the parser header -file. (By the same reasoning, ‘%code requires’ would also be the -appropriate place to write your own definition for ‘YYSTYPE’.) - - When you are writing dependency code for ‘YYSTYPE’ and ‘YYLTYPE’, you -should prefer ‘%code requires’ over ‘%code top’ regardless of whether -you instruct Bison to generate a parser header file. When you are -writing code that you need Bison to insert only into the parser -implementation file and that has no special need to appear at the top of -that file, you should prefer the unqualified ‘%code’ over ‘%code top’. -These practices will make the purpose of each block of your code -explicit to Bison and to other developers reading your grammar file. -Following these practices, we expect the unqualified ‘%code’ and ‘%code -requires’ to be the most important of the four PROLOGUE alternatives. - - At some point while developing your parser, you might decide to -provide ‘trace_token’ to modules that are external to your parser. -Thus, you might wish for Bison to insert the prototype into both the -parser header file and the parser implementation file. Since this -function is not a dependency required by ‘YYSTYPE’ or ‘YYLTYPE’, it -doesn't make sense to move its prototype to a ‘%code requires’. More -importantly, since it depends upon ‘YYLTYPE’ and ‘yytoken_kind_t’, -‘%code requires’ is not sufficient. Instead, move its prototype from -the unqualified ‘%code’ to a ‘%code provides’: - - %code top { - #define _GNU_SOURCE - #include - } - - %code requires { - #include "ptypes.h" - } - %union { - long n; - tree t; /* ‘tree’ is defined in ‘ptypes.h’. */ - } - - %code requires { - #define YYLTYPE YYLTYPE - typedef struct YYLTYPE - { - int first_line; - int first_column; - int last_line; - int last_column; - char *filename; - } YYLTYPE; - } - - %code provides { - void trace_token (yytoken_kind_t token, YYLTYPE loc); - } - - %code { - static void print_token (FILE *file, int token, YYSTYPE val); - } - - ... - -Bison will insert the ‘trace_token’ prototype into both the parser -header file and the parser implementation file after the definitions for -‘yytoken_kind_t’, ‘YYLTYPE’, and ‘YYSTYPE’. - - The above examples are careful to write directives in an order that -reflects the layout of the generated parser implementation and header -files: ‘%code top’, ‘%code requires’, ‘%code provides’, and then -‘%code’. While your grammar files may generally be easier to read if -you also follow this order, Bison does not require it. Instead, Bison -lets you choose an organization that makes sense to you. - - You may declare any of these directives multiple times in the grammar -file. In that case, Bison concatenates the contained code in -declaration order. This is the only way in which the position of one of -these directives within the grammar file affects its functionality. - - The result of the previous two properties is greater flexibility in -how you may organize your grammar file. For example, you may organize -semantic-type-related directives by semantic type: - - %code requires { #include "type1.h" } - %union { type1 field1; } - %destructor { type1_free ($$); } - %printer { type1_print (yyo, $$); } - - %code requires { #include "type2.h" } - %union { type2 field2; } - %destructor { type2_free ($$); } - %printer { type2_print (yyo, $$); } - -You could even place each of the above directive groups in the rules -section of the grammar file next to the set of rules that uses the -associated semantic type. (In the rules section, you must terminate -each of those directives with a semicolon.) And you don't have to worry -that some directive (like a ‘%union’) in the definitions section is -going to adversely affect their functionality in some counter-intuitive -manner just because it comes first. Such an organization is not -possible using PROLOGUE sections. - - This section has been concerned with explaining the advantages of the -four PROLOGUE alternatives over the original Yacc PROLOGUE. However, in -most cases when using these directives, you shouldn't need to think -about all the low-level ordering issues discussed here. Instead, you -should simply use these directives to label each block of your code -according to its purpose and let Bison handle the ordering. ‘%code’ is -the most generic label. Move code to ‘%code requires’, ‘%code -provides’, or ‘%code top’ as needed. - - ---------- Footnotes ---------- - - (1) However, defining ‘YYLTYPE’ via a C macro is not the recommended -way. *Note Location Type:: - - -File: bison.info, Node: Bison Declarations, Next: Grammar Rules, Prev: Prologue Alternatives, Up: Grammar Outline - -3.1.3 The Bison Declarations Section ------------------------------------- - -The BISON DECLARATIONS section contains declarations that define -terminal and nonterminal symbols, specify precedence, and so on. In -some simple grammars you may not need any declarations. *Note -Declarations::. - - -File: bison.info, Node: Grammar Rules, Next: Epilogue, Prev: Bison Declarations, Up: Grammar Outline - -3.1.4 The Grammar Rules Section -------------------------------- - -The “grammar rules” section contains one or more Bison grammar rules, -and nothing else. *Note Rules::. - - There must always be at least one grammar rule, and the first ‘%%’ -(which precedes the grammar rules) may never be omitted even if it is -the first thing in the file. - - -File: bison.info, Node: Epilogue, Prev: Grammar Rules, Up: Grammar Outline - -3.1.5 The epilogue ------------------- - -The EPILOGUE is copied verbatim to the end of the parser implementation -file, just as the PROLOGUE is copied to the beginning. This is the most -convenient place to put anything that you want to have in the parser -implementation file but which need not come before the definition of -‘yyparse’. For example, the definitions of ‘yylex’ and ‘yyerror’ often -go here. Because C requires functions to be declared before being used, -you often need to declare functions like ‘yylex’ and ‘yyerror’ in the -Prologue, even if you define them in the Epilogue. *Note Interface::. - - If the last section is empty, you may omit the ‘%%’ that separates it -from the grammar rules. - - The Bison parser itself contains many macros and identifiers whose -names start with ‘yy’ or ‘YY’, so it is a good idea to avoid using any -such names (except those documented in this manual) in the epilogue of -the grammar file. - - -File: bison.info, Node: Symbols, Next: Rules, Prev: Grammar Outline, Up: Grammar File - -3.2 Symbols, Terminal and Nonterminal -===================================== - -“Symbols” in Bison grammars represent the grammatical classifications of -the language. - - A “terminal symbol” (also known as a “token kind”) represents a class -of syntactically equivalent tokens. You use the symbol in grammar rules -to mean that a token in that class is allowed. The symbol is -represented in the Bison parser by a numeric code, and the ‘yylex’ -function returns a token kind code to indicate what kind of token has -been read. You don't need to know what the code value is; you can use -the symbol to stand for it. - - A “nonterminal symbol” stands for a class of syntactically equivalent -groupings. The symbol name is used in writing grammar rules. By -convention, it should be all lower case. - - Symbol names can contain letters, underscores, periods, and -non-initial digits and dashes. Dashes in symbol names are a GNU -extension, incompatible with POSIX Yacc. Periods and dashes make symbol -names less convenient to use with named references, which require -brackets around such names (*note Named References::). Terminal symbols -that contain periods or dashes make little sense: since they are not -valid symbols (in most programming languages) they are not exported as -token names. - - There are three ways of writing terminal symbols in the grammar: - - • A “named token kind” is written with an identifier, like an - identifier in C. By convention, it should be all upper case. Each - such name must be defined with a Bison declaration such as - ‘%token’. *Note Token Decl::. - - • A “character token kind” (or “literal character token”) is written - in the grammar using the same syntax used in C for character - constants; for example, ‘'+'’ is a character token kind. A - character token kind doesn't need to be declared unless you need to - specify its semantic value data type (*note Value Type::), - associativity, or precedence (*note Precedence::). - - By convention, a character token kind is used only to represent a - token that consists of that particular character. Thus, the token - kind ‘'+'’ is used to represent the character ‘+’ as a token. - Nothing enforces this convention, but if you depart from it, your - program will confuse other readers. - - All the usual escape sequences used in character literals in C can - be used in Bison as well, but you must not use the null character - as a character literal because its numeric code, zero, signifies - end-of-input (*note Calling Convention::). Also, unlike standard - C, trigraphs have no special meaning in Bison character literals, - nor is backslash-newline allowed. - - • A “literal string token” is written like a C string constant; for - example, ‘"<="’ is a literal string token. A literal string token - doesn't need to be declared unless you need to specify its semantic - value data type (*note Value Type::), associativity, or precedence - (*note Precedence::). - - You can associate the literal string token with a symbolic name as - an alias, using the ‘%token’ declaration (*note Token Decl::). If - you don't do that, the lexical analyzer has to retrieve the token - code for the literal string token from the ‘yytname’ table (*note - Calling Convention::). - - *Warning*: literal string tokens do not work in Yacc. - - By convention, a literal string token is used only to represent a - token that consists of that particular string. Thus, you should - use the token kind ‘"<="’ to represent the string ‘<=’ as a token. - Bison does not enforce this convention, but if you depart from it, - people who read your program will be confused. - - All the escape sequences used in string literals in C can be used - in Bison as well, except that you must not use a null character - within a string literal. Also, unlike Standard C, trigraphs have - no special meaning in Bison string literals, nor is - backslash-newline allowed. A literal string token must contain two - or more characters; for a token containing just one character, use - a character token (see above). - - How you choose to write a terminal symbol has no effect on its -grammatical meaning. That depends only on where it appears in rules and -on when the parser function returns that symbol. - - The value returned by ‘yylex’ is always one of the terminal symbols, -except that a zero or negative value signifies end-of-input. Whichever -way you write the token kind in the grammar rules, you write it the same -way in the definition of ‘yylex’. The numeric code for a character -token kind is simply the positive numeric code of the character, so -‘yylex’ can use the identical value to generate the requisite code, -though you may need to convert it to ‘unsigned char’ to avoid -sign-extension on hosts where ‘char’ is signed. Each named token kind -becomes a C macro in the parser implementation file, so ‘yylex’ can use -the name to stand for the code. (This is why periods don't make sense -in terminal symbols.) *Note Calling Convention::. - - If ‘yylex’ is defined in a separate file, you need to arrange for the -token-kind definitions to be available there. Use the ‘-d’ option when -you run Bison, so that it will write these definitions into a separate -header file ‘NAME.tab.h’ which you can include in the other source files -that need it. *Note Invocation::. - - If you want to write a grammar that is portable to any Standard C -host, you must use only nonnull character tokens taken from the basic -execution character set of Standard C. This set consists of the ten -digits, the 52 lower- and upper-case English letters, and the characters -in the following C-language string: - - "\a\b\t\n\v\f\r !\"#%&'()*+,-./:;<=>?[\\]^_{|}~" - - The ‘yylex’ function and Bison must use a consistent character set -and encoding for character tokens. For example, if you run Bison in an -ASCII environment, but then compile and run the resulting program in an -environment that uses an incompatible character set like EBCDIC, the -resulting program may not work because the tables generated by Bison -will assume ASCII numeric values for character tokens. It is standard -practice for software distributions to contain C source files that were -generated by Bison in an ASCII environment, so installers on platforms -that are incompatible with ASCII must rebuild those files before -compiling them. - - The symbol ‘error’ is a terminal symbol reserved for error recovery -(*note Error Recovery::); you shouldn't use it for any other purpose. -In particular, ‘yylex’ should never return this value. The default -value of the error token is 256, unless you explicitly assigned 256 to -one of your tokens with a ‘%token’ declaration. - - -File: bison.info, Node: Rules, Next: Semantics, Prev: Symbols, Up: Grammar File - -3.3 Grammar Rules -================= - -A Bison grammar is a list of rules. - -* Menu: - -* Rules Syntax:: Syntax of the rules. -* Empty Rules:: Symbols that can match the empty string. -* Recursion:: Writing recursive rules. - - -File: bison.info, Node: Rules Syntax, Next: Empty Rules, Up: Rules - -3.3.1 Syntax of Grammar Rules ------------------------------ - -A Bison grammar rule has the following general form: - - RESULT: COMPONENTS...; - -where RESULT is the nonterminal symbol that this rule describes, and -COMPONENTS are various terminal and nonterminal symbols that are put -together by this rule (*note Symbols::). - - For example, - - exp: exp '+' exp; - -says that two groupings of type ‘exp’, with a ‘+’ token in between, can -be combined into a larger grouping of type ‘exp’. - - White space in rules is significant only to separate symbols. You -can add extra white space as you wish. - - Scattered among the components can be ACTIONS that determine the -semantics of the rule. An action looks like this: - - {C STATEMENTS} - -This is an example of “braced code”, that is, C code surrounded by -braces, much like a compound statement in C. Braced code can contain -any sequence of C tokens, so long as its braces are balanced. Bison -does not check the braced code for correctness directly; it merely -copies the code to the parser implementation file, where the C compiler -can check it. - - Within braced code, the balanced-brace count is not affected by -braces within comments, string literals, or character constants, but it -is affected by the C digraphs ‘<%’ and ‘%>’ that represent braces. At -the top level braced code must be terminated by ‘}’ and not by a -digraph. Bison does not look for trigraphs, so if braced code uses -trigraphs you should ensure that they do not affect the nesting of -braces or the boundaries of comments, string literals, or character -constants. - - Usually there is only one action and it follows the components. -*Note Actions::. - - Multiple rules for the same RESULT can be written separately or can -be joined with the vertical-bar character ‘|’ as follows: - - RESULT: - RULE1-COMPONENTS... - | RULE2-COMPONENTS... - ... - ; - -They are still considered distinct rules even when joined in this way. - - -File: bison.info, Node: Empty Rules, Next: Recursion, Prev: Rules Syntax, Up: Rules - -3.3.2 Empty Rules ------------------ - -A rule is said to be “empty” if its right-hand side (COMPONENTS) is -empty. It means that RESULT in the previous example can match the empty -string. As another example, here is how to define an optional -semicolon: - - semicolon.opt: | ";"; - -It is easy not to see an empty rule, especially when ‘|’ is used. The -‘%empty’ directive allows to make explicit that a rule is empty on -purpose: - - semicolon.opt: - %empty - | ";" - ; - - Flagging a non-empty rule with ‘%empty’ is an error. If run with -‘-Wempty-rule’, ‘bison’ will report empty rules without ‘%empty’. Using -‘%empty’ enables this warning, unless ‘-Wno-empty-rule’ was specified. - - The ‘%empty’ directive is a Bison extension, it does not work with -Yacc. To remain compatible with POSIX Yacc, it is customary to write a -comment ‘/* empty */’ in each rule with no components: - - semicolon.opt: - /* empty */ - | ";" - ; - - -File: bison.info, Node: Recursion, Prev: Empty Rules, Up: Rules - -3.3.3 Recursive Rules ---------------------- - -A rule is called “recursive” when its RESULT nonterminal appears also on -its right hand side. Nearly all Bison grammars need to use recursion, -because that is the only way to define a sequence of any number of a -particular thing. Consider this recursive definition of a -comma-separated sequence of one or more expressions: - - expseq1: - exp - | expseq1 ',' exp - ; - -Since the recursive use of ‘expseq1’ is the leftmost symbol in the right -hand side, we call this “left recursion”. By contrast, here the same -construct is defined using “right recursion”: - - expseq1: - exp - | exp ',' expseq1 - ; - -Any kind of sequence can be defined using either left recursion or right -recursion, but you should always use left recursion, because it can -parse a sequence of any number of elements with bounded stack space. -Right recursion uses up space on the Bison stack in proportion to the -number of elements in the sequence, because all the elements must be -shifted onto the stack before the rule can be applied even once. *Note -Algorithm::, for further explanation of this. - - “Indirect” or “mutual” recursion occurs when the result of the rule -does not appear directly on its right hand side, but does appear in -rules for other nonterminals which do appear on its right hand side. - - For example: - - expr: - primary - | primary '+' primary - ; - - primary: - constant - | '(' expr ')' - ; - -defines two mutually-recursive nonterminals, since each refers to the -other. - - -File: bison.info, Node: Semantics, Next: Tracking Locations, Prev: Rules, Up: Grammar File - -3.4 Defining Language Semantics -=============================== - -The grammar rules for a language determine only the syntax. The -semantics are determined by the semantic values associated with various -tokens and groupings, and by the actions taken when various groupings -are recognized. - - For example, the calculator calculates properly because the value -associated with each expression is the proper number; it adds properly -because the action for the grouping ‘X + Y’ is to add the numbers -associated with X and Y. - -* Menu: - -* Value Type:: Specifying one data type for all semantic values. -* Multiple Types:: Specifying several alternative data types. -* Type Generation:: Generating the semantic value type. -* Union Decl:: Declaring the set of all semantic value types. -* Structured Value Type:: Providing a structured semantic value type. -* Actions:: An action is the semantic definition of a grammar rule. -* Action Types:: Specifying data types for actions to operate on. -* Midrule Actions:: Most actions go at the end of a rule. - This says when, why and how to use the exceptional - action in the middle of a rule. - - -File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics - -3.4.1 Data Types of Semantic Values ------------------------------------ - -In a simple program it may be sufficient to use the same data type for -the semantic values of all language constructs. This was true in the -RPN and infix calculator examples (*note RPN Calc::). - - Bison normally uses the type ‘int’ for semantic values if your -program uses the same data type for all language constructs. To specify -some other type, define the ‘%define’ variable ‘api.value.type’ like -this: - - %define api.value.type {double} - -or - - %define api.value.type {struct semantic_value_type} - - The value of ‘api.value.type’ should be a type name that does not -contain parentheses or square brackets. - - Alternatively in C, instead of relying of Bison's ‘%define’ support, -you may rely on the C preprocessor and define ‘YYSTYPE’ as a macro: - - #define YYSTYPE double - -This macro definition must go in the prologue of the grammar file (*note -Grammar Outline::). If compatibility with POSIX Yacc matters to you, -use this. Note however that Bison cannot know ‘YYSTYPE’'s value, not -even whether it is defined, so there are services it cannot provide. -Besides this works only for C. - - -File: bison.info, Node: Multiple Types, Next: Type Generation, Prev: Value Type, Up: Semantics - -3.4.2 More Than One Value Type ------------------------------- - -In most programs, you will need different data types for different kinds -of tokens and groupings. For example, a numeric constant may need type -‘int’ or ‘long’, while a string constant needs type ‘char *’, and an -identifier might need a pointer to an entry in the symbol table. - - To use more than one data type for semantic values in one parser, -Bison requires you to do two things: - - • Specify the entire collection of possible data types. There are - several options: - • let Bison compute the union type from the tags you assign to - symbols; - - • use the ‘%union’ Bison declaration (*note Union Decl::); - - • define the ‘%define’ variable ‘api.value.type’ to be a union - type whose members are the type tags (*note Structured Value - Type::); - - • use a ‘typedef’ or a ‘#define’ to define ‘YYSTYPE’ to be a - union type whose member names are the type tags. - - • Choose one of those types for each symbol (terminal or nonterminal) - for which semantic values are used. This is done for tokens with - the ‘%token’ Bison declaration (*note Token Decl::) and for - groupings with the ‘%nterm’/‘%type’ Bison declarations (*note Type - Decl::). - - -File: bison.info, Node: Type Generation, Next: Union Decl, Prev: Multiple Types, Up: Semantics - -3.4.3 Generating the Semantic Value Type ----------------------------------------- - -The special value ‘union’ of the ‘%define’ variable ‘api.value.type’ -instructs Bison that the type tags (used with the ‘%token’, ‘%nterm’ and -‘%type’ directives) are genuine types, not names of members of -‘YYSTYPE’. - - For example: - - %define api.value.type union - %token INT "integer" - %token 'n' - %nterm expr - %token ID "identifier" - -generates an appropriate value of ‘YYSTYPE’ to support each symbol type. -The name of the member of ‘YYSTYPE’ for tokens than have a declared -identifier ID (such as ‘INT’ and ‘ID’ above, but not ‘'n'’) is ‘ID’. -The other symbols have unspecified names on which you should not depend; -instead, relying on C casts to access the semantic value with the -appropriate type: - - /* For an "integer". */ - yylval.INT = 42; - return INT; - - /* For an 'n', also declared as int. */ - *((int*)&yylval) = 42; - return 'n'; - - /* For an "identifier". */ - yylval.ID = "42"; - return ID; - - If the ‘%define’ variable ‘api.token.prefix’ is defined (*note -%define Summary::), then it is also used to prefix the union member -names. For instance, with ‘%define api.token.prefix {TOK_}’: - - /* For an "integer". */ - yylval.TOK_INT = 42; - return TOK_INT; - - This Bison extension cannot work if ‘%yacc’ (or ‘-y’/‘--yacc’) is -enabled, as POSIX mandates that Yacc generate tokens as macros (e.g., -‘#define INT 258’, or ‘#define TOK_INT 258’). - - A similar feature is provided for C++ that in addition overcomes C++ -limitations (that forbid non-trivial objects to be part of a ‘union’): -‘%define api.value.type variant’, see *note C++ Variants::. - - -File: bison.info, Node: Union Decl, Next: Structured Value Type, Prev: Type Generation, Up: Semantics - -3.4.4 The Union Declaration ---------------------------- - -The ‘%union’ declaration specifies the entire collection of possible -data types for semantic values. The keyword ‘%union’ is followed by -braced code containing the same thing that goes inside a ‘union’ in C. - - For example: - - %union { - double val; - symrec *tptr; - } - -This says that the two alternative types are ‘double’ and ‘symrec *’. -They are given names ‘val’ and ‘tptr’; these names are used in the -‘%token’, ‘%nterm’ and ‘%type’ declarations to pick one of the types for -a terminal or nonterminal symbol (*note Type Decl::). - - As an extension to POSIX, a tag is allowed after the ‘%union’. For -example: - - %union value { - double val; - symrec *tptr; - } - -specifies the union tag ‘value’, so the corresponding C type is ‘union -value’. If you do not specify a tag, it defaults to ‘YYSTYPE’ (*note -%define Summary::). - - As another extension to POSIX, you may specify multiple ‘%union’ -declarations; their contents are concatenated. However, only the first -‘%union’ declaration can specify a tag. - - Note that, unlike making a ‘union’ declaration in C, you need not -write a semicolon after the closing brace. - - -File: bison.info, Node: Structured Value Type, Next: Actions, Prev: Union Decl, Up: Semantics - -3.4.5 Providing a Structured Semantic Value Type ------------------------------------------------- - -Instead of ‘%union’, you can define and use your own union type -‘YYSTYPE’ if your grammar contains at least one ‘’ tag. For -example, you can put the following into a header file ‘parser.h’: - - union YYSTYPE { - double val; - symrec *tptr; - }; - -and then your grammar can use the following instead of ‘%union’: - - %{ - #include "parser.h" - %} - %define api.value.type {union YYSTYPE} - %nterm expr - %token ID - - Actually, you may also provide a ‘struct’ rather that a ‘union’, -which may be handy if you want to track information for every symbol -(such as preceding comments). - - The type you provide may even be structured and include pointers, in -which case the type tags you provide may be composite, with ‘.’ and ‘->’ -operators. - - -File: bison.info, Node: Actions, Next: Action Types, Prev: Structured Value Type, Up: Semantics - -3.4.6 Actions -------------- - -An action accompanies a syntactic rule and contains C code to be -executed each time an instance of that rule is recognized. The task of -most actions is to compute a semantic value for the grouping built by -the rule from the semantic values associated with tokens or smaller -groupings. - - An action consists of braced code containing C statements, and can be -placed at any position in the rule; it is executed at that position. -Most rules have just one action at the end of the rule, following all -the components. Actions in the middle of a rule are tricky and used -only for special purposes (*note Midrule Actions::). - - The C code in an action can refer to the semantic values of the -components matched by the rule with the construct ‘$N’, which stands for -the value of the Nth component. The semantic value for the grouping -being constructed is ‘$$’. In addition, the semantic values of symbols -can be accessed with the named references construct ‘$NAME’ or -‘$[NAME]’. Bison translates both of these constructs into expressions -of the appropriate type when it copies the actions into the parser -implementation file. ‘$$’ (or ‘$NAME’, when it stands for the current -grouping) is translated to a modifiable lvalue, so it can be assigned -to. - - Here is a typical example: - - exp: - ... - | exp '+' exp { $$ = $1 + $3; } - - Or, in terms of named references: - - exp[result]: - ... - | exp[left] '+' exp[right] { $result = $left + $right; } - -This rule constructs an ‘exp’ from two smaller ‘exp’ groupings connected -by a plus-sign token. In the action, ‘$1’ and ‘$3’ (‘$left’ and -‘$right’) refer to the semantic values of the two component ‘exp’ -groupings, which are the first and third symbols on the right hand side -of the rule. The sum is stored into ‘$$’ (‘$result’) so that it becomes -the semantic value of the addition-expression just recognized by the -rule. If there were a useful semantic value associated with the ‘+’ -token, it could be referred to as ‘$2’. - - *Note Named References::, for more information about using the named -references construct. - - Note that the vertical-bar character ‘|’ is really a rule separator, -and actions are attached to a single rule. This is a difference with -tools like Flex, for which ‘|’ stands for either "or", or "the same -action as that of the next rule". In the following example, the action -is triggered only when ‘b’ is found: - - a-or-b: 'a'|'b' { a_or_b_found = 1; }; - - If you don't specify an action for a rule, Bison supplies a default: -‘$$ = $1’. Thus, the value of the first symbol in the rule becomes the -value of the whole rule. Of course, the default action is valid only if -the two data types match. There is no meaningful default action for an -empty rule; every empty rule must have an explicit action unless the -rule's value does not matter. - - ‘$N’ with N zero or negative is allowed for reference to tokens and -groupings on the stack _before_ those that match the current rule. This -is a very risky practice, and to use it reliably you must be certain of -the context in which the rule is applied. Here is a case in which you -can use this reliably: - - foo: - expr bar '+' expr { ... } - | expr bar '-' expr { ... } - ; - - bar: - %empty { previous_expr = $0; } - ; - - As long as ‘bar’ is used only in the fashion shown here, ‘$0’ always -refers to the ‘expr’ which precedes ‘bar’ in the definition of ‘foo’. - - It is also possible to access the semantic value of the lookahead -token, if any, from a semantic action. This semantic value is stored in -‘yylval’. *Note Action Features::. - - -File: bison.info, Node: Action Types, Next: Midrule Actions, Prev: Actions, Up: Semantics - -3.4.7 Data Types of Values in Actions -------------------------------------- - -If you have chosen a single data type for semantic values, the ‘$$’ and -‘$N’ constructs always have that data type. - - If you have used ‘%union’ to specify a variety of data types, then -you must declare a choice among these types for each terminal or -nonterminal symbol that can have a semantic value. Then each time you -use ‘$$’ or ‘$N’, its data type is determined by which symbol it refers -to in the rule. In this example, - - exp: - ... - | exp '+' exp { $$ = $1 + $3; } - -‘$1’ and ‘$3’ refer to instances of ‘exp’, so they all have the data -type declared for the nonterminal symbol ‘exp’. If ‘$2’ were used, it -would have the data type declared for the terminal symbol ‘'+'’, -whatever that might be. - - Alternatively, you can specify the data type when you refer to the -value, by inserting ‘’ after the ‘$’ at the beginning of the -reference. For example, if you have defined types as shown here: - - %union { - int itype; - double dtype; - } - -then you can write ‘$1’ to refer to the first subunit of the rule -as an integer, or ‘$1’ to refer to it as a double. - - -File: bison.info, Node: Midrule Actions, Prev: Action Types, Up: Semantics - -3.4.8 Actions in Midrule ------------------------- - -Occasionally it is useful to put an action in the middle of a rule. -These actions are written just like usual end-of-rule actions, but they -are executed before the parser even recognizes the following components. - -* Menu: - -* Using Midrule Actions:: Putting an action in the middle of a rule. -* Typed Midrule Actions:: Specifying the semantic type of their values. -* Midrule Action Translation:: How midrule actions are actually processed. -* Midrule Conflicts:: Midrule actions can cause conflicts. - - -File: bison.info, Node: Using Midrule Actions, Next: Typed Midrule Actions, Up: Midrule Actions - -3.4.8.1 Using Midrule Actions -............................. - -A midrule action may refer to the components preceding it using ‘$N’, -but it may not refer to subsequent components because it is run before -they are parsed. - - The midrule action itself counts as one of the components of the -rule. This makes a difference when there is another action later in the -same rule (and usually there is another at the end): you have to count -the actions along with the symbols when working out which number N to -use in ‘$N’. - - The midrule action can also have a semantic value. The action can -set its value with an assignment to ‘$$’, and actions later in the rule -can refer to the value using ‘$N’. Since there is no symbol to name the -action, there is no way to declare a data type for the value in advance, -so you must use the ‘$<...>N’ construct to specify a data type each time -you refer to this value. - - There is no way to set the value of the entire rule with a midrule -action, because assignments to ‘$$’ do not have that effect. The only -way to set the value for the entire rule is with an ordinary action at -the end of the rule. - - Here is an example from a hypothetical compiler, handling a ‘let’ -statement that looks like ‘let (VARIABLE) STATEMENT’ and serves to -create a variable named VARIABLE temporarily for the duration of -STATEMENT. To parse this construct, we must put VARIABLE into the -symbol table while STATEMENT is parsed, then remove it afterward. Here -is how it is done: - - stmt: - "let" '(' var ')' - { - $$ = push_context (); - declare_variable ($3); - } - stmt - { - $$ = $6; - pop_context ($5); - } - -As soon as ‘let (VARIABLE)’ has been recognized, the first action is -run. It saves a copy of the current semantic context (the list of -accessible variables) as its semantic value, using alternative ‘context’ -in the data-type union. Then it calls ‘declare_variable’ to add the new -variable to that list. Once the first action is finished, the embedded -statement ‘stmt’ can be parsed. - - Note that the midrule action is component number 5, so the ‘stmt’ is -component number 6. Named references can be used to improve the -readability and maintainability (*note Named References::): - - stmt: - "let" '(' var ')' - { - $let = push_context (); - declare_variable ($3); - }[let] - stmt - { - $$ = $6; - pop_context ($let); - } - - After the embedded statement is parsed, its semantic value becomes -the value of the entire ‘let’-statement. Then the semantic value from -the earlier action is used to restore the prior list of variables. This -removes the temporary ‘let’-variable from the list so that it won't -appear to exist while the rest of the program is parsed. - - Because the types of the semantic values of midrule actions are -unknown to Bison, type-based features (e.g., ‘%printer’, ‘%destructor’) -do not work, which could result in memory leaks. They also forbid the -use of the ‘variant’ implementation of the ‘api.value.type’ in C++ -(*note C++ Variants::). - - *Note Typed Midrule Actions::, for one way to address this issue, and -*note Midrule Action Translation::, for another: turning mid-action -actions into regular actions. - - -File: bison.info, Node: Typed Midrule Actions, Next: Midrule Action Translation, Prev: Using Midrule Actions, Up: Midrule Actions - -3.4.8.2 Typed Midrule Actions -............................. - -In the above example, if the parser initiates error recovery (*note -Error Recovery::) while parsing the tokens in the embedded statement -‘stmt’, it might discard the previous semantic context ‘$5’ -without restoring it. Thus, ‘$5’ needs a destructor (*note -Destructor Decl::), and Bison needs the type of the semantic value -(‘context’) to select the right destructor. - - As an extension to Yacc's midrule actions, Bison offers a means to -type their semantic value: specify its type tag (‘<...>’ before the -midrule action. - - Consider the previous example, with an untyped midrule action: - - stmt: - "let" '(' var ')' - { - $$ = push_context (); // *** - declare_variable ($3); - } - stmt - { - $$ = $6; - pop_context ($5); // *** - } - -If instead you write: - - stmt: - "let" '(' var ')' - { // *** - $$ = push_context (); // *** - declare_variable ($3); - } - stmt - { - $$ = $6; - pop_context ($5); // *** - } - -then ‘%printer’ and ‘%destructor’ work properly (no more leaks!), C++ -‘variant’s can be used, and redundancy is reduced (‘’ is -specified once). - - -File: bison.info, Node: Midrule Action Translation, Next: Midrule Conflicts, Prev: Typed Midrule Actions, Up: Midrule Actions - -3.4.8.3 Midrule Action Translation -.................................. - -Midrule actions are actually transformed into regular rules and actions. -The various reports generated by Bison (textual, graphical, etc., see -*note Understanding::) reveal this translation, best explained by means -of an example. The following rule: - - exp: { a(); } "b" { c(); } { d(); } "e" { f(); }; - -is translated into: - - $@1: %empty { a(); }; - $@2: %empty { c(); }; - $@3: %empty { d(); }; - exp: $@1 "b" $@2 $@3 "e" { f(); }; - -with new nonterminal symbols ‘$@N’, where N is a number. - - A midrule action is expected to generate a value if it uses ‘$$’, or -the (final) action uses ‘$N’ where N denote the midrule action. In that -case its nonterminal is rather named ‘@N’: - - exp: { a(); } "b" { $$ = c(); } { d(); } "e" { f = $1; }; - -is translated into - - @1: %empty { a(); }; - @2: %empty { $$ = c(); }; - $@3: %empty { d(); }; - exp: @1 "b" @2 $@3 "e" { f = $1; } - - There are probably two errors in the above example: the first midrule -action does not generate a value (it does not use ‘$$’ although the -final action uses it), and the value of the second one is not used (the -final action does not use ‘$3’). Bison reports these errors when the -‘midrule-value’ warnings are enabled (*note Invocation::): - - $ bison -Wmidrule-value mid.y - mid.y:2.6-13: warning: unset value: $$ - 2 | exp: { a(); } "b" { $$ = c(); } { d(); } "e" { f = $1; }; - | ^~~~~~~~ - mid.y:2.19-31: warning: unused value: $3 - 2 | exp: { a(); } "b" { $$ = c(); } { d(); } "e" { f = $1; }; - | ^~~~~~~~~~~~~ - - - It is sometimes useful to turn midrule actions into regular actions, -e.g., to factor them, or to escape from their limitations. For -instance, as an alternative to _typed_ midrule action, you may bury the -midrule action inside a nonterminal symbol and to declare a printer and -a destructor for that symbol: - - %nterm let - %destructor { pop_context ($$); } let - %printer { print_context (yyo, $$); } let - - %% - - stmt: - let stmt - { - $$ = $2; - pop_context ($let); - }; - - let: - "let" '(' var ')' - { - $let = push_context (); - declare_variable ($var); - }; - - -File: bison.info, Node: Midrule Conflicts, Prev: Midrule Action Translation, Up: Midrule Actions - -3.4.8.4 Conflicts due to Midrule Actions -........................................ - -Taking action before a rule is completely recognized often leads to -conflicts since the parser must commit to a parse in order to execute -the action. For example, the following two rules, without midrule -actions, can coexist in a working parser because the parser can shift -the open-brace token and look at what follows before deciding whether -there is a declaration or not: - - compound: - '{' declarations statements '}' - | '{' statements '}' - ; - -But when we add a midrule action as follows, the rules become -nonfunctional: - - compound: - { prepare_for_local_variables (); } - '{' declarations statements '}' - | '{' statements '}' - ; - -Now the parser is forced to decide whether to run the midrule action -when it has read no farther than the open-brace. In other words, it -must commit to using one rule or the other, without sufficient -information to do it correctly. (The open-brace token is what is called -the “lookahead” token at this time, since the parser is still deciding -what to do about it. *Note Lookahead::.) - - You might think that you could correct the problem by putting -identical actions into the two rules, like this: - - compound: - { prepare_for_local_variables (); } - '{' declarations statements '}' - | { prepare_for_local_variables (); } - '{' statements '}' - ; - -But this does not help, because Bison does not realize that the two -actions are identical. (Bison never tries to understand the C code in -an action.) - - If the grammar is such that a declaration can be distinguished from a -statement by the first token (which is true in C), then one solution -which does work is to put the action after the open-brace, like this: - - compound: - '{' { prepare_for_local_variables (); } - declarations statements '}' - | '{' statements '}' - ; - -Now the first token of the following declaration or statement, which -would in any case tell Bison which rule to use, can still do so. - - Another solution is to bury the action inside a nonterminal symbol -which serves as a subroutine: - - subroutine: - %empty { prepare_for_local_variables (); } - ; - - compound: - subroutine '{' declarations statements '}' - | subroutine '{' statements '}' - ; - -Now Bison can execute the action in the rule for ‘subroutine’ without -deciding which rule for ‘compound’ it will eventually use. - - -File: bison.info, Node: Tracking Locations, Next: Named References, Prev: Semantics, Up: Grammar File - -3.5 Tracking Locations -====================== - -Though grammar rules and semantic actions are enough to write a fully -functional parser, it can be useful to process some additional -information, especially symbol locations. - - The way locations are handled is defined by providing a data type, -and actions to take when rules are matched. - -* Menu: - -* Location Type:: Specifying a data type for locations. -* Actions and Locations:: Using locations in actions. -* Printing Locations:: Defining how locations are printed. -* Location Default Action:: Defining a general way to compute locations. - - -File: bison.info, Node: Location Type, Next: Actions and Locations, Up: Tracking Locations - -3.5.1 Data Type of Locations ----------------------------- - -Defining a data type for locations is much simpler than for semantic -values, since all tokens and groupings always use the same type. The -location type is specified using ‘%define api.location.type’: - - %define api.location.type {location_t} - - This defines, in the C generated code, the ‘YYLTYPE’ type name. When -‘YYLTYPE’ is not defined, Bison uses a default structure type with four -members: - - typedef struct YYLTYPE - { - int first_line; - int first_column; - int last_line; - int last_column; - } YYLTYPE; - - In C, you may also specify the type of locations by defining a macro -called ‘YYLTYPE’, just as you can specify the semantic value type by -defining a ‘YYSTYPE’ macro (*note Value Type::). However, rather than -using macros, we recommend the ‘api.value.type’ and ‘api.location.type’ -‘%define’ variables. - - Default locations represent a range in the source file(s), but this -is not a requirement. It could be a single point or just a line number, -or even more complex structures. - - When the default location type is used, Bison initializes all these -fields to 1 for ‘yylloc’ at the beginning of the parsing. To initialize -‘yylloc’ with a custom location type (or to chose a different -initialization), use the ‘%initial-action’ directive. *Note Initial -Action Decl::. - - -File: bison.info, Node: Actions and Locations, Next: Printing Locations, Prev: Location Type, Up: Tracking Locations - -3.5.2 Actions and Locations ---------------------------- - -Actions are not only useful for defining language semantics, but also -for describing the behavior of the output parser with locations. - - The most obvious way for building locations of syntactic groupings is -very similar to the way semantic values are computed. In a given rule, -several constructs can be used to access the locations of the elements -being matched. The location of the Nth component of the right hand side -is ‘@N’, while the location of the left hand side grouping is ‘@$’. - - In addition, the named references construct ‘@NAME’ and ‘@[NAME]’ may -also be used to address the symbol locations. *Note Named References::, -for more information about using the named references construct. - - Here is a basic example using the default data type for locations: - - exp: - ... - | exp '/' exp - { - @$.first_column = @1.first_column; - @$.first_line = @1.first_line; - @$.last_column = @3.last_column; - @$.last_line = @3.last_line; - if ($3) - $$ = $1 / $3; - else - { - $$ = 1; - fprintf (stderr, "%d.%d-%d.%d: division by zero", - @3.first_line, @3.first_column, - @3.last_line, @3.last_column); - } - } - - As for semantic values, there is a default action for locations that -is run each time a rule is matched. It sets the beginning of ‘@$’ to -the beginning of the first symbol, and the end of ‘@$’ to the end of the -last symbol. - - With this default action, the location tracking can be fully -automatic. The example above simply rewrites this way: - - exp: - ... - | exp '/' exp - { - if ($3) - $$ = $1 / $3; - else - { - $$ = 1; - fprintf (stderr, "%d.%d-%d.%d: division by zero", - @3.first_line, @3.first_column, - @3.last_line, @3.last_column); - } - } - - It is also possible to access the location of the lookahead token, if -any, from a semantic action. This location is stored in ‘yylloc’. -*Note Action Features::. - - -File: bison.info, Node: Printing Locations, Next: Location Default Action, Prev: Actions and Locations, Up: Tracking Locations - -3.5.3 Printing Locations ------------------------- - -When using the default location type, the debug traces report the -symbols' location. The generated parser does so using the -‘YYLOCATION_PRINT’ macro. - - -- Macro: YYLOCATION_PRINT (FILE, LOC); - When traces are enabled, print LOC (of type ‘YYLTYPE const *’) on - FILE (of type ‘FILE *’). Do nothing when traces are disabled, or - if the location type is user defined. - - To get locations in the debug traces with your user-defined location -types, define the ‘YYLOCATION_PRINT’ macro. For instance: - - #define YYLOCATION_PRINT location_print - - -File: bison.info, Node: Location Default Action, Prev: Printing Locations, Up: Tracking Locations - -3.5.4 Default Action for Locations ----------------------------------- - -Actually, actions are not the best place to compute locations. Since -locations are much more general than semantic values, there is room in -the output parser to redefine the default action to take for each rule. -The ‘YYLLOC_DEFAULT’ macro is invoked each time a rule is matched, -before the associated action is run. It is also invoked while -processing a syntax error, to compute the error's location. Before -reporting an unresolvable syntactic ambiguity, a GLR parser invokes -‘YYLLOC_DEFAULT’ recursively to compute the location of that ambiguity. - - Most of the time, this macro is general enough to suppress location -dedicated code from semantic actions. - - The ‘YYLLOC_DEFAULT’ macro takes three parameters. The first one is -the location of the grouping (the result of the computation). When a -rule is matched, the second parameter identifies locations of all right -hand side elements of the rule being matched, and the third parameter is -the size of the rule's right hand side. When a GLR parser reports an -ambiguity, which of multiple candidate right hand sides it passes to -‘YYLLOC_DEFAULT’ is undefined. When processing a syntax error, the -second parameter identifies locations of the symbols that were discarded -during error processing, and the third parameter is the number of -discarded symbols. - - By default, ‘YYLLOC_DEFAULT’ is defined this way: - - # define YYLLOC_DEFAULT(Cur, Rhs, N) \ - do \ - if (N) \ - { \ - (Cur).first_line = YYRHSLOC(Rhs, 1).first_line; \ - (Cur).first_column = YYRHSLOC(Rhs, 1).first_column; \ - (Cur).last_line = YYRHSLOC(Rhs, N).last_line; \ - (Cur).last_column = YYRHSLOC(Rhs, N).last_column; \ - } \ - else \ - { \ - (Cur).first_line = (Cur).last_line = \ - YYRHSLOC(Rhs, 0).last_line; \ - (Cur).first_column = (Cur).last_column = \ - YYRHSLOC(Rhs, 0).last_column; \ - } \ - while (0) - -where ‘YYRHSLOC (rhs, k)’ is the location of the Kth symbol in RHS when -K is positive, and the location of the symbol just before the reduction -when K and N are both zero. - - When defining ‘YYLLOC_DEFAULT’, you should consider that: - - • All arguments are free of side-effects. However, only the first - one (the result) should be modified by ‘YYLLOC_DEFAULT’. - - • For consistency with semantic actions, valid indexes within the - right hand side range from 1 to N. When N is zero, only 0 is a - valid index, and it refers to the symbol just before the reduction. - During error processing N is always positive. - - • Your macro should parenthesize its arguments, if need be, since the - actual arguments may not be surrounded by parentheses. Also, your - macro should expand to something that can be used as a single - statement when it is followed by a semicolon. - - -File: bison.info, Node: Named References, Next: Declarations, Prev: Tracking Locations, Up: Grammar File - -3.6 Named References -==================== - -As described in the preceding sections, the traditional way to refer to -any semantic value or location is a “positional reference”, which takes -the form ‘$N’, ‘$$’, ‘@N’, and ‘@$’. However, such a reference is not -very descriptive. Moreover, if you later decide to insert or remove -symbols in the right-hand side of a grammar rule, the need to renumber -such references can be tedious and error-prone. - - To avoid these issues, you can also refer to a semantic value or -location using a “named reference”. First of all, original symbol names -may be used as named references. For example: - - invocation: op '(' args ')' - { $invocation = new_invocation ($op, $args, @invocation); } - -Positional and named references can be mixed arbitrarily. For example: - - invocation: op '(' args ')' - { $$ = new_invocation ($op, $args, @$); } - -However, sometimes regular symbol names are not sufficient due to -ambiguities: - - exp: exp '/' exp - { $exp = $exp / $exp; } // $exp is ambiguous. - - exp: exp '/' exp - { $$ = $1 / $exp; } // One usage is ambiguous. - - exp: exp '/' exp - { $$ = $1 / $3; } // No error. - -When ambiguity occurs, explicitly declared names may be used for values -and locations. Explicit names are declared as a bracketed name after a -symbol appearance in rule definitions. For example: - exp[result]: exp[left] '/' exp[right] - { $result = $left / $right; } - -In order to access a semantic value generated by a midrule action, an -explicit name may also be declared by putting a bracketed name after the -closing brace of the midrule action code: - exp[res]: exp[x] '+' {$left = $x;}[left] exp[right] - { $res = $left + $right; } - - In references, in order to specify names containing dots and dashes, -an explicit bracketed syntax ‘$[name]’ and ‘@[name]’ must be used: - if-stmt: "if" '(' expr ')' "then" then.stmt ';' - { $[if-stmt] = new_if_stmt ($expr, $[then.stmt]); } - - It often happens that named references are followed by a dot, dash or -other C punctuation marks and operators. By default, Bison will read -‘$name.suffix’ as a reference to symbol value ‘$name’ followed by -‘.suffix’, i.e., an access to the ‘suffix’ field of the semantic value. -In order to force Bison to recognize ‘name.suffix’ in its entirety as -the name of a semantic value, the bracketed syntax ‘$[name.suffix]’ must -be used. - - -File: bison.info, Node: Declarations, Next: Multiple Parsers, Prev: Named References, Up: Grammar File - -3.7 Bison Declarations -====================== - -The “Bison declarations” section of a Bison grammar defines the symbols -used in formulating the grammar and the data types of semantic values. -*Note Symbols::. - - All token kind names (but not single-character literal tokens such as -‘'+'’ and ‘'*'’) must be declared. Nonterminal symbols must be declared -if you need to specify which data type to use for the semantic value -(*note Multiple Types::). - - The first rule in the grammar file also specifies the start symbol, -by default. If you want some other symbol to be the start symbol, you -must declare it explicitly (*note Language and Grammar::). - -* Menu: - -* Require Decl:: Requiring a Bison version. -* Token Decl:: Declaring terminal symbols. -* Precedence Decl:: Declaring terminals with precedence and associativity. -* Type Decl:: Declaring the choice of type for a nonterminal symbol. -* Symbol Decls:: Summary of the Syntax of Symbol Declarations. -* Initial Action Decl:: Code run before parsing starts. -* Destructor Decl:: Declaring how symbols are freed. -* Printer Decl:: Declaring how symbol values are displayed. -* Expect Decl:: Suppressing warnings about parsing conflicts. -* Start Decl:: Specifying the start symbol. -* Pure Decl:: Requesting a reentrant parser. -* Push Decl:: Requesting a push parser. -* Decl Summary:: Table of all Bison declarations. -* %define Summary:: Defining variables to adjust Bison's behavior. -* %code Summary:: Inserting code into the parser source. - - -File: bison.info, Node: Require Decl, Next: Token Decl, Up: Declarations - -3.7.1 Require a Version of Bison --------------------------------- - -You may require the minimum version of Bison to process the grammar. If -the requirement is not met, ‘bison’ exits with an error (exit status -63). - - %require "VERSION" - - Some deprecated behaviors are disabled for some required VERSION: -‘"3.2"’ (or better) - The C++ deprecated files ‘position.hh’ and ‘stack.hh’ are no longer - generated. - - -File: bison.info, Node: Token Decl, Next: Precedence Decl, Prev: Require Decl, Up: Declarations - -3.7.2 Token Kind Names ----------------------- - -The basic way to declare a token kind name (terminal symbol) is as -follows: - - %token NAME - - Bison will convert this into a definition in the parser, so that the -function ‘yylex’ (if it is in this file) can use the name NAME to stand -for this token kind's code. - - Alternatively, you can use ‘%left’, ‘%right’, ‘%precedence’, or -‘%nonassoc’ instead of ‘%token’, if you wish to specify associativity -and precedence. *Note Precedence Decl::. However, for clarity, we -recommend to use these directives only to declare associativity and -precedence, and not to add string aliases, semantic types, etc. - - You can explicitly specify the numeric code for a token kind by -appending a nonnegative decimal or hexadecimal integer value in the -field immediately following the token name: - - %token NUM 300 - %token XNUM 0x12d // a GNU extension - -It is generally best, however, to let Bison choose the numeric codes for -all token kinds. Bison will automatically select codes that don't -conflict with each other or with normal characters. - - In the event that the stack type is a union, you must augment the -‘%token’ or other token declaration to include the data type alternative -delimited by angle-brackets (*note Multiple Types::). - - For example: - - %union { /* define stack type */ - double val; - symrec *tptr; - } - %token NUM /* define token NUM and its type */ - - You can associate a literal string token with a token kind name by -writing the literal string at the end of a ‘%token’ declaration which -declares the name. For example: - - %token ARROW "=>" - -For example, a grammar for the C language might specify these names with -equivalent literal string tokens: - - %token OR "||" - %token LE 134 "<=" - %left OR "<=" - -Once you equate the literal string and the token kind name, you can use -them interchangeably in further declarations or the grammar rules. The -‘yylex’ function can use the token name or the literal string to obtain -the token kind code (*note Calling Convention::). - - String aliases allow for better error messages using the literal -strings instead of the token names, such as ‘syntax error, unexpected -||, expecting number or (’ rather than ‘syntax error, unexpected OR, -expecting NUM or LPAREN’. - - String aliases may also be marked for internationalization (*note -Token I18n::): - - %token - OR "||" - LPAREN "(" - RPAREN ")" - '\n' _("end of line") - - NUM _("number") - -would produce in French ‘erreur de syntaxe, || inattendu, attendait -nombre ou (’ rather than ‘erreur de syntaxe, || inattendu, attendait -number ou (’. - - -File: bison.info, Node: Precedence Decl, Next: Type Decl, Prev: Token Decl, Up: Declarations - -3.7.3 Operator Precedence -------------------------- - -Use the ‘%left’, ‘%right’, ‘%nonassoc’, or ‘%precedence’ declaration to -declare a token and specify its precedence and associativity, all at -once. These are called “precedence declarations”. *Note Precedence::, -for general information on operator precedence. - - The syntax of a precedence declaration is nearly the same as that of -‘%token’: either - - %left SYMBOLS... - -or - - %left SYMBOLS... - - And indeed any of these declarations serves the purposes of ‘%token’. -But in addition, they specify the associativity and relative precedence -for all the SYMBOLS: - - • The associativity of an operator OP determines how repeated uses of - the operator nest: whether ‘X OP Y OP Z’ is parsed by grouping X - with Y first or by grouping Y with Z first. ‘%left’ specifies - left-associativity (grouping X with Y first) and ‘%right’ specifies - right-associativity (grouping Y with Z first). ‘%nonassoc’ - specifies no associativity, which means that ‘X OP Y OP Z’ is - considered a syntax error. - - ‘%precedence’ gives only precedence to the SYMBOLS, and defines no - associativity at all. Use this to define precedence only, and - leave any potential conflict due to associativity enabled. - - • The precedence of an operator determines how it nests with other - operators. All the tokens declared in a single precedence - declaration have equal precedence and nest together according to - their associativity. When two tokens declared in different - precedence declarations associate, the one declared later has the - higher precedence and is grouped first. - - For backward compatibility, there is a confusing difference between -the argument lists of ‘%token’ and precedence declarations. Only a -‘%token’ can associate a literal string with a token kind name. A -precedence declaration always interprets a literal string as a reference -to a separate token. For example: - - %left OR "<=" // Does not declare an alias. - %left OR 134 "<=" 135 // Declares 134 for OR and 135 for "<=". - - -File: bison.info, Node: Type Decl, Next: Symbol Decls, Prev: Precedence Decl, Up: Declarations - -3.7.4 Nonterminal Symbols -------------------------- - -When you use ‘%union’ to specify multiple value types, you must declare -the value type of each nonterminal symbol for which values are used. -This is done with a ‘%type’ declaration, like this: - - %type NONTERMINAL... - -Here NONTERMINAL is the name of a nonterminal symbol, and TYPE is the -name given in the ‘%union’ to the alternative that you want (*note Union -Decl::). You can give any number of nonterminal symbols in the same -‘%type’ declaration, if they have the same value type. Use spaces to -separate the symbol names. - - While POSIX Yacc allows ‘%type’ only for nonterminals, Bison accepts -that this directive be also applied to terminal symbols. To declare -exclusively nonterminal symbols, use the safer ‘%nterm’: - - %nterm NONTERMINAL... - - -File: bison.info, Node: Symbol Decls, Next: Initial Action Decl, Prev: Type Decl, Up: Declarations - -3.7.5 Syntax of Symbol Declarations ------------------------------------ - -The syntax of the various directives to declare symbols is as follows. - - %token TAG? ( ID NUMBER? STRING? )+ ( TAG ( ID NUMBER? STRING? )+ )* - %left TAG? ( ID NUMBER?)+ ( TAG ( ID NUMBER? )+ )* - %type TAG? ( ID | CHAR | STRING )+ ( TAG ( ID | CHAR | STRING )+ )* - %nterm TAG? ID+ ( TAG ID+ )* - -where TAG denotes a type tag such as ‘’, ID denotes an identifier -such as ‘NUM’, NUMBER a decimal or hexadecimal integer such as ‘300’ or -‘0x12d’, CHAR a character literal such as ‘'+'’, and STRING a string -literal such as ‘"number"’. The postfix quantifiers are ‘?’ (zero or -one), ‘*’ (zero or more) and ‘+’ (one or more). - - The directives ‘%precedence’, ‘%right’ and ‘%nonassoc’ behave like -‘%left’. - - -File: bison.info, Node: Initial Action Decl, Next: Destructor Decl, Prev: Symbol Decls, Up: Declarations - -3.7.6 Performing Actions before Parsing ---------------------------------------- - -Sometimes your parser needs to perform some initializations before -parsing. The ‘%initial-action’ directive allows for such arbitrary -code. - - -- Directive: %initial-action { CODE } - Declare that the braced CODE must be invoked before parsing each - time ‘yyparse’ is called. The CODE may use ‘$$’ (or ‘$$’) and - ‘@$’ -- initial value and location of the lookahead -- and the - ‘%parse-param’. - - For instance, if your locations use a file name, you may use - - %parse-param { char const *file_name }; - %initial-action - { - @$.initialize (file_name); - }; - - -File: bison.info, Node: Destructor Decl, Next: Printer Decl, Prev: Initial Action Decl, Up: Declarations - -3.7.7 Freeing Discarded Symbols -------------------------------- - -During error recovery (*note Error Recovery::), symbols already pushed -on the stack and tokens coming from the rest of the file are discarded -until the parser falls on its feet. If the parser runs out of memory, -or if it returns via ‘YYABORT’, ‘YYACCEPT’ or ‘YYNOMEM’, all the symbols -on the stack must be discarded. Even if the parser succeeds, it must -discard the start symbol. - - When discarded symbols convey heap based information, this memory is -lost. While this behavior can be tolerable for batch parsers, such as -in traditional compilers, it is unacceptable for programs like shells or -protocol implementations that may parse and execute indefinitely. - - The ‘%destructor’ directive defines code that is called when a symbol -is automatically discarded. - - -- Directive: %destructor { CODE } SYMBOLS - Invoke the braced CODE whenever the parser discards one of the - SYMBOLS. Within CODE, ‘$$’ (or ‘$$’) designates the semantic - value associated with the discarded symbol, and ‘@$’ designates its - location. The additional parser parameters are also available - (*note Parser Function::). - - When a symbol is listed among SYMBOLS, its ‘%destructor’ is called - a per-symbol ‘%destructor’. You may also define a per-type - ‘%destructor’ by listing a semantic type tag among SYMBOLS. In - that case, the parser will invoke this CODE whenever it discards - any grammar symbol that has that semantic type tag unless that - symbol has its own per-symbol ‘%destructor’. - - Finally, you can define two different kinds of default - ‘%destructor’s. You can place each of ‘<*>’ and ‘<>’ in the - SYMBOLS list of exactly one ‘%destructor’ declaration in your - grammar file. The parser will invoke the CODE associated with one - of these whenever it discards any user-defined grammar symbol that - has no per-symbol and no per-type ‘%destructor’. The parser uses - the CODE for ‘<*>’ in the case of such a grammar symbol for which - you have formally declared a semantic type tag (‘%token’, ‘%nterm’, - and ‘%type’ count as such a declaration, but ‘$$’ does not). - The parser uses the CODE for ‘<>’ in the case of such a grammar - symbol that has no declared semantic type tag. - -For example: - - %union { char *string; } - %token STRING1 STRING2 - %nterm string1 string2 - %union { char character; } - %token CHR - %nterm chr - %token TAGLESS - - %destructor { } - %destructor { free ($$); } <*> - %destructor { free ($$); printf ("%d", @$.first_line); } STRING1 string1 - %destructor { printf ("Discarding tagless symbol.\n"); } <> - -guarantees that, when the parser discards any user-defined symbol that -has a semantic type tag other than ‘’, it passes its semantic -value to ‘free’ by default. However, when the parser discards a -‘STRING1’ or a ‘string1’, it uses the third ‘%destructor’, which frees -it and prints its line number to ‘stdout’ (‘free’ is invoked only once). -Finally, the parser merely prints a message whenever it discards any -symbol, such as ‘TAGLESS’, that has no semantic type tag. - - A Bison-generated parser invokes the default ‘%destructor’s only for -user-defined as opposed to Bison-defined symbols. For example, the -parser will not invoke either kind of default ‘%destructor’ for the -special Bison-defined symbols ‘$accept’, ‘$undefined’, or ‘$end’ (*note -Table of Symbols::), none of which you can reference in your grammar. -It also will not invoke either for the ‘error’ token (*note Table of -Symbols::), which is always defined by Bison regardless of whether you -reference it in your grammar. However, it may invoke one of them for -the end token (token 0) if you redefine it from ‘$end’ to, for example, -‘END’: - - %token END 0 - - Finally, Bison will never invoke a ‘%destructor’ for an unreferenced -midrule semantic value (*note Midrule Actions::). That is, Bison does -not consider a midrule to have a semantic value if you do not reference -‘$$’ in the midrule's action or ‘$N’ (where N is the right-hand side -symbol position of the midrule) in any later action in that rule. -However, if you do reference either, the Bison-generated parser will -invoke the ‘<>’ ‘%destructor’ whenever it discards the midrule symbol. - - - “Discarded symbols” are the following: - - • stacked symbols popped during the first phase of error recovery, - • incoming terminals during the second phase of error recovery, - • the current lookahead and the entire stack (except the current - right-hand side symbols) when the parser returns immediately, and - • the current lookahead and the entire stack (including the current - right-hand side symbols) when the C++ parser (‘lalr1.cc’) catches - an exception in ‘parse’, - • the start symbol, when the parser succeeds. - - The parser can “return immediately” because of an explicit call to -‘YYABORT’, ‘YYACCEPT’ or ‘YYNOMEM’, or failed error recovery, or memory -exhaustion. - - Right-hand side symbols of a rule that explicitly triggers a syntax -error via ‘YYERROR’ are not discarded automatically. As a rule of -thumb, destructors are invoked only when user actions cannot manage the -memory. - - -File: bison.info, Node: Printer Decl, Next: Expect Decl, Prev: Destructor Decl, Up: Declarations - -3.7.8 Printing Semantic Values ------------------------------- - -When run-time traces are enabled (*note Tracing::), the parser reports -its actions, such as reductions. When a symbol involved in an action is -reported, only its kind is displayed, as the parser cannot know how -semantic values should be formatted. - - The ‘%printer’ directive defines code that is called when a symbol is -reported. Its syntax is the same as ‘%destructor’ (*note Destructor -Decl::). - - -- Directive: %printer { CODE } SYMBOLS - Invoke the braced CODE whenever the parser displays one of the - SYMBOLS. Within CODE, ‘yyo’ denotes the output stream (a ‘FILE*’ - in C, an ‘std::ostream&’ in C++, and ‘stdout’ in D), ‘$$’ (or - ‘$$’) designates the semantic value associated with the - symbol, and ‘@$’ its location. The additional parser parameters - are also available (*note Parser Function::). - - The SYMBOLS are defined as for ‘%destructor’ (*note Destructor - Decl::.): they can be per-type (e.g., ‘’), per-symbol (e.g., - ‘exp’, ‘NUM’, ‘"float"’), typed per-default (i.e., ‘<*>’, or - untyped per-default (i.e., ‘<>’). - -For example: - - %union { char *string; } - %token STRING1 STRING2 - %nterm string1 string2 - %union { char character; } - %token CHR - %nterm chr - %token TAGLESS - - %printer { fprintf (yyo, "'%c'", $$); } - %printer { fprintf (yyo, "&%p", $$); } <*> - %printer { fprintf (yyo, "\"%s\"", $$); } STRING1 string1 - %printer { fprintf (yyo, "<>"); } <> - -guarantees that, when the parser print any symbol that has a semantic -type tag other than ‘’, it display the address of the -semantic value by default. However, when the parser displays a -‘STRING1’ or a ‘string1’, it formats it as a string in double quotes. -It performs only the second ‘%printer’ in this case, so it prints only -once. Finally, the parser print ‘<>’ for any symbol, such as ‘TAGLESS’, -that has no semantic type tag. *Note Mfcalc Traces::, for a complete -example. - - -File: bison.info, Node: Expect Decl, Next: Start Decl, Prev: Printer Decl, Up: Declarations - -3.7.9 Suppressing Conflict Warnings ------------------------------------ - -Bison normally warns if there are any conflicts in the grammar (*note -Shift/Reduce::), but most real grammars have harmless shift/reduce -conflicts which are resolved in a predictable way and would be difficult -to eliminate. It is desirable to suppress the warning about these -conflicts unless the number of conflicts changes. You can do this with -the ‘%expect’ declaration. - - The declaration looks like this: - - %expect N - - Here N is a decimal integer. The declaration says there should be N -shift/reduce conflicts and no reduce/reduce conflicts. Bison reports an -error if the number of shift/reduce conflicts differs from N, or if -there are any reduce/reduce conflicts. - - For deterministic parsers, reduce/reduce conflicts are more serious, -and should be eliminated entirely. Bison will always report -reduce/reduce conflicts for these parsers. With GLR parsers, however, -both kinds of conflicts are routine; otherwise, there would be no need -to use GLR parsing. Therefore, it is also possible to specify an -expected number of reduce/reduce conflicts in GLR parsers, using the -declaration: - - %expect-rr N - - You may wish to be more specific in your specification of expected -conflicts. To this end, you can also attach ‘%expect’ and ‘%expect-rr’ -modifiers to individual rules. The interpretation of these modifiers -differs from their use as declarations. When attached to rules, they -indicate the number of states in which the rule is involved in a -conflict. You will need to consult the output resulting from ‘-v’ to -determine appropriate numbers to use. For example, for the following -grammar fragment, the first rule for ‘empty_dims’ appears in two states -in which the ‘[’ token is a lookahead. Having determined that, you can -document this fact with an ‘%expect’ modifier as follows: - - dims: - empty_dims - | '[' expr ']' dims - ; - - empty_dims: - %empty %expect 2 - | empty_dims '[' ']' - ; - - Mid-rule actions generate implicit rules that are also subject to -conflicts (*note Midrule Conflicts::). To attach an ‘%expect’ or -‘%expect-rr’ annotation to an implicit mid-rule action's rule, put it -before the action. For example, - - %glr-parser - %expect-rr 1 - - %% - - clause: - "condition" %expect-rr 1 { value_mode(); } '(' exprs ')' - | "condition" %expect-rr 1 { class_mode(); } '(' types ')' - ; - -Here, the appropriate mid-rule action will not be determined until after -the ‘(’ token is shifted. Thus, the two actions will clash with each -other, and we should expect one reduce/reduce conflict for each. - - In general, using ‘%expect’ involves these steps: - - • Compile your grammar without ‘%expect’. Use the ‘-v’ option to get - a verbose list of where the conflicts occur. Bison will also print - the number of conflicts. - - • Check each of the conflicts to make sure that Bison's default - resolution is what you really want. If not, rewrite the grammar - and go back to the beginning. - - • Add an ‘%expect’ declaration, copying the number N from the number - that Bison printed. With GLR parsers, add an ‘%expect-rr’ - declaration as well. - - • Optionally, count up the number of states in which one or more - conflicted reductions for particular rules appear and add these - numbers to the affected rules as ‘%expect-rr’ or ‘%expect’ - modifiers as appropriate. Rules that are in conflict appear in the - output listing surrounded by square brackets or, in the case of - reduce/reduce conflicts, as reductions having the same lookahead - symbol as a square-bracketed reduction in the same state. - - Now Bison will report an error if you introduce an unexpected -conflict, but will keep silent otherwise. - - -File: bison.info, Node: Start Decl, Next: Pure Decl, Prev: Expect Decl, Up: Declarations - -3.7.10 The Start-Symbol ------------------------ - -Bison assumes by default that the start symbol for the grammar is the -first nonterminal specified in the grammar specification section. The -programmer may override this restriction with the ‘%start’ declaration -as follows: - - %start SYMBOL - - -File: bison.info, Node: Pure Decl, Next: Push Decl, Prev: Start Decl, Up: Declarations - -3.7.11 A Pure (Reentrant) Parser --------------------------------- - -A “reentrant” program is one which does not alter in the course of -execution; in other words, it consists entirely of “pure” (read-only) -code. Reentrancy is important whenever asynchronous execution is -possible; for example, a nonreentrant program may not be safe to call -from a signal handler. In systems with multiple threads of control, a -nonreentrant program must be called only within interlocks. - - Normally, Bison generates a parser which is not reentrant. This is -suitable for most uses, and it permits compatibility with Yacc. (The -standard Yacc interfaces are inherently nonreentrant, because they use -statically allocated variables for communication with ‘yylex’, including -‘yylval’ and ‘yylloc’.) - - Alternatively, you can generate a pure, reentrant parser. The Bison -declaration ‘%define api.pure’ says that you want the parser to be -reentrant. It looks like this: - - %define api.pure full - - The result is that the communication variables ‘yylval’ and ‘yylloc’ -become local variables in ‘yyparse’, and a different calling convention -is used for the lexical analyzer function ‘yylex’. *Note Pure -Calling::, for the details of this. The variable ‘yynerrs’ becomes -local in ‘yyparse’ in pull mode but it becomes a member of ‘yypstate’ in -push mode. (*note Error Reporting Function::). The convention for -calling ‘yyparse’ itself is unchanged. - - Whether the parser is pure has nothing to do with the grammar rules. -You can generate either a pure parser or a nonreentrant parser from any -valid grammar. - - -File: bison.info, Node: Push Decl, Next: Decl Summary, Prev: Pure Decl, Up: Declarations - -3.7.12 A Push Parser --------------------- - -A pull parser is called once and it takes control until all its input is -completely parsed. A push parser, on the other hand, is called each -time a new token is made available. - - A push parser is typically useful when the parser is part of a main -event loop in the client's application. This is typically a requirement -of a GUI, when the main event loop needs to be triggered within a -certain time period. - - Normally, Bison generates a pull parser. The following Bison -declaration says that you want the parser to be a push parser (*note -%define Summary::): - - %define api.push-pull push - - In almost all cases, you want to ensure that your push parser is also -a pure parser (*note Pure Decl::). The only time you should create an -impure push parser is to have backwards compatibility with the impure -Yacc pull mode interface. Unless you know what you are doing, your -declarations should look like this: - - %define api.pure full - %define api.push-pull push - - There is a major notable functional difference between the pure push -parser and the impure push parser. It is acceptable for a pure push -parser to have many parser instances, of the same type of parser, in -memory at the same time. An impure push parser should only use one -parser at a time. - - When a push parser is selected, Bison will generate some new symbols -in the generated parser. ‘yypstate’ is a structure that the generated -parser uses to store the parser's state. ‘yypstate_new’ is the function -that will create a new parser instance. ‘yypstate_delete’ will free the -resources associated with the corresponding parser instance. Finally, -‘yypush_parse’ is the function that should be called whenever a token is -available to provide the parser. A trivial example of using a pure push -parser would look like this: - - int status; - yypstate *ps = yypstate_new (); - do { - status = yypush_parse (ps, yylex (), NULL); - } while (status == YYPUSH_MORE); - yypstate_delete (ps); - - If the user decided to use an impure push parser, a few things about -the generated parser will change. The ‘yychar’ variable becomes a -global variable instead of a local one in the ‘yypush_parse’ function. -For this reason, the signature of the ‘yypush_parse’ function is changed -to remove the token as a parameter. A nonreentrant push parser example -would thus look like this: - - extern int yychar; - int status; - yypstate *ps = yypstate_new (); - do { - yychar = yylex (); - status = yypush_parse (ps); - } while (status == YYPUSH_MORE); - yypstate_delete (ps); - - That's it. Notice the next token is put into the global variable -‘yychar’ for use by the next invocation of the ‘yypush_parse’ function. - - Bison also supports both the push parser interface along with the -pull parser interface in the same generated parser. In order to get -this functionality, you should replace the ‘%define api.push-pull push’ -declaration with the ‘%define api.push-pull both’ declaration. Doing -this will create all of the symbols mentioned earlier along with the two -extra symbols, ‘yyparse’ and ‘yypull_parse’. ‘yyparse’ can be used -exactly as it normally would be used. However, the user should note -that it is implemented in the generated parser by calling -‘yypull_parse’. This makes the ‘yyparse’ function that is generated -with the ‘%define api.push-pull both’ declaration slower than the normal -‘yyparse’ function. If the user calls the ‘yypull_parse’ function it -will parse the rest of the input stream. It is possible to -‘yypush_parse’ tokens to select a subgrammar and then ‘yypull_parse’ the -rest of the input stream. If you would like to switch back and forth -between between parsing styles, you would have to write your own -‘yypull_parse’ function that knows when to quit looking for input. An -example of using the ‘yypull_parse’ function would look like this: - - yypstate *ps = yypstate_new (); - yypull_parse (ps); /* Will call the lexer */ - yypstate_delete (ps); - - Adding the ‘%define api.pure’ declaration does exactly the same thing -to the generated parser with ‘%define api.push-pull both’ as it did for -‘%define api.push-pull push’. - - -File: bison.info, Node: Decl Summary, Next: %define Summary, Prev: Push Decl, Up: Declarations - -3.7.13 Bison Declaration Summary --------------------------------- - -Here is a summary of the declarations used to define a grammar: - - -- Directive: %union - Declare the collection of data types that semantic values may have - (*note Union Decl::). - - -- Directive: %token - Declare a terminal symbol (token kind name) with no precedence or - associativity specified (*note Token Decl::). - - -- Directive: %right - Declare a terminal symbol (token kind name) that is - right-associative (*note Precedence Decl::). - - -- Directive: %left - Declare a terminal symbol (token kind name) that is - left-associative (*note Precedence Decl::). - - -- Directive: %nonassoc - Declare a terminal symbol (token kind name) that is nonassociative - (*note Precedence Decl::). Using it in a way that would be - associative is a syntax error. - - -- Directive: %nterm - Declare the type of semantic values for a nonterminal symbol (*note - Type Decl::). - - -- Directive: %type - Declare the type of semantic values for a symbol (*note Type - Decl::). - - -- Directive: %start - Specify the grammar's start symbol (*note Start Decl::). - - -- Directive: %expect - Declare the expected number of shift/reduce conflicts, either - overall or for a given rule (*note Expect Decl::). - - -- Directive: %expect-rr - Declare the expected number of reduce/reduce conflicts, either - overall or for a given rule (*note Expect Decl::). - - -In order to change the behavior of ‘bison’, use the following -directives: - - -- Directive: %code {CODE} - -- Directive: %code QUALIFIER {CODE} - Insert CODE verbatim into the output parser source at the default - location or at the location specified by QUALIFIER. *Note %code - Summary::. - - -- Directive: %debug - Instrument the parser for traces. Obsoleted by ‘%define - parse.trace’. *Note Tracing::. - - -- Directive: %define VARIABLE - -- Directive: %define VARIABLE VALUE - -- Directive: %define VARIABLE {VALUE} - -- Directive: %define VARIABLE "VALUE" - Define a variable to adjust Bison's behavior. *Note %define - Summary::. - - -- Directive: %defines - -- Directive: %defines DEFINES-FILE - Historical name for ‘%header’. *Note ‘%header’: %header. - - -- Directive: %destructor - Specify how the parser should reclaim the memory associated to - discarded symbols. *Note Destructor Decl::. - - -- Directive: %file-prefix "PREFIX" - Specify a prefix to use for all Bison output file names. The names - are chosen as if the grammar file were named ‘PREFIX.y’. - - -- Directive: %header - Write a parser header file containing definitions for the token - kind names defined in the grammar as well as a few other - declarations. If the parser implementation file is named ‘NAME.c’ - then the parser header file is named ‘NAME.h’. - - For C parsers, the parser header file declares ‘YYSTYPE’ unless - ‘YYSTYPE’ is already defined as a macro or you have used a ‘’ - tag without using ‘%union’. Therefore, if you are using a ‘%union’ - (*note Multiple Types::) with components that require other - definitions, or if you have defined a ‘YYSTYPE’ macro or type - definition (*note Value Type::), you need to arrange for these - definitions to be propagated to all modules, e.g., by putting them - in a prerequisite header that is included both by your parser and - by any other module that needs ‘YYSTYPE’. - - Unless your parser is pure, the parser header file declares - ‘yylval’ as an external variable. *Note Pure Decl::. - - If you have also used locations, the parser header file declares - ‘YYLTYPE’ and ‘yylloc’ using a protocol similar to that of the - ‘YYSTYPE’ macro and ‘yylval’. *Note Tracking Locations::. - - This parser header file is normally essential if you wish to put - the definition of ‘yylex’ in a separate source file, because - ‘yylex’ typically needs to be able to refer to the above-mentioned - declarations and to the token kind codes. *Note Token Values::. - - If you have declared ‘%code requires’ or ‘%code provides’, the - output header also contains their code. *Note %code Summary::. - - The generated header is protected against multiple inclusions with - a C preprocessor guard: ‘YY_PREFIX_FILE_INCLUDED’, where PREFIX and - FILE are the prefix (*note Multiple Parsers::) and generated file - name turned uppercase, with each series of non alphanumerical - characters converted to a single underscore. - - For instance with ‘%define api.prefix {calc}’ and ‘%header - "lib/parse.h"’, the header will be guarded as follows. - #ifndef YY_CALC_LIB_PARSE_H_INCLUDED - # define YY_CALC_LIB_PARSE_H_INCLUDED - ... - #endif /* ! YY_CALC_LIB_PARSE_H_INCLUDED */ - - Introduced in Bison 3.8. - - -- Directive: %header HEADER-FILE - Same as above, but save in the file ‘HEADER-FILE’. - - -- Directive: %language "LANGUAGE" - Specify the programming language for the generated parser. - Currently supported languages include C, C++, D and Java. LANGUAGE - is case-insensitive. - - -- Directive: %locations - Generate the code processing the locations (*note Action - Features::). This mode is enabled as soon as the grammar uses the - special ‘@N’ tokens, but if your grammar does not use it, using - ‘%locations’ allows for more accurate syntax error messages. - - -- Directive: %name-prefix "PREFIX" - Obsoleted by ‘%define api.prefix {PREFIX}’. *Note Multiple - Parsers::. For C++ parsers, see the ‘%define api.namespace’ - documentation in this section. - - Rename the external symbols used in the parser so that they start - with PREFIX instead of ‘yy’. The precise list of symbols renamed - in C parsers is ‘yyparse’, ‘yylex’, ‘yyerror’, ‘yynerrs’, ‘yylval’, - ‘yychar’, ‘yydebug’, and (if locations are used) ‘yylloc’. If you - use a push parser, ‘yypush_parse’, ‘yypull_parse’, ‘yypstate’, - ‘yypstate_new’ and ‘yypstate_delete’ will also be renamed. For - example, if you use ‘%name-prefix "c_"’, the names become - ‘c_parse’, ‘c_lex’, and so on. - - Contrary to defining ‘api.prefix’, some symbols are _not_ renamed - by ‘%name-prefix’, for instance ‘YYDEBUG’, ‘YYTOKENTYPE’, - ‘yytoken_kind_t’, ‘YYSTYPE’, ‘YYLTYPE’. - - -- Directive: %no-lines - Don't generate any ‘#line’ preprocessor commands in the parser - implementation file. Ordinarily Bison writes these commands in the - parser implementation file so that the C compiler and debuggers - will associate errors and object code with your source file (the - grammar file). This directive causes them to associate errors with - the parser implementation file, treating it as an independent - source file in its own right. - - -- Directive: %output "FILE" - Generate the parser implementation in ‘FILE’. - - -- Directive: %pure-parser - Deprecated version of ‘%define api.pure’ (*note %define Summary::), - for which Bison is more careful to warn about unreasonable usage. - - -- Directive: %require "VERSION" - Require version VERSION or higher of Bison. *Note Require Decl::. - - -- Directive: %skeleton "FILE" - Specify the skeleton to use. - - If FILE does not contain a ‘/’, FILE is the name of a skeleton file - in the Bison installation directory. If it does, FILE is an - absolute file name or a file name relative to the directory of the - grammar file. This is similar to how most shells resolve commands. - - -- Directive: %token-table - This feature is obsolescent, avoid it in new projects. - - Generate an array of token names in the parser implementation file. - The name of the array is ‘yytname’; ‘yytname[I]’ is the name of the - token whose internal Bison token code is I. The first three - elements of ‘yytname’ correspond to the predefined tokens ‘"$end"’, - ‘"error"’, and ‘"$undefined"’; after these come the symbols defined - in the grammar file. - - The name in the table includes all the characters needed to - represent the token in Bison. For single-character literals and - literal strings, this includes the surrounding quoting characters - and any escape sequences. For example, the Bison single-character - literal ‘'+'’ corresponds to a three-character name, represented in - C as ‘"'+'"’; and the Bison two-character literal string ‘"\\/"’ - corresponds to a five-character name, represented in C as - ‘"\"\\\\/\""’. - - When you specify ‘%token-table’, Bison also generates macro - definitions for macros ‘YYNTOKENS’, ‘YYNNTS’, and ‘YYNRULES’, and - ‘YYNSTATES’: - - ‘YYNTOKENS’ - The number of terminal symbols, i.e., the highest token code, - plus one. - ‘YYNNTS’ - The number of nonterminal symbols. - ‘YYNRULES’ - The number of grammar rules, - ‘YYNSTATES’ - The number of parser states (*note Parser States::). - - Here's code for looking up a multicharacter token in ‘yytname’, - assuming that the characters of the token are stored in - ‘token_buffer’, and assuming that the token does not contain any - characters like ‘"’ that require escaping. - - for (int i = 0; i < YYNTOKENS; i++) - if (yytname[i] - && yytname[i][0] == '"' - && ! strncmp (yytname[i] + 1, token_buffer, - strlen (token_buffer)) - && yytname[i][strlen (token_buffer) + 1] == '"' - && yytname[i][strlen (token_buffer) + 2] == 0) - break; - - This method is discouraged: the primary purpose of string aliases - is forging good error messages, not describing the spelling of - keywords. In addition, looking for the token kind at runtime - incurs a (small but noticeable) cost. - - Finally, ‘%token-table’ is incompatible with the ‘custom’ and - ‘detailed’ values of the ‘parse.error’ ‘%define’ variable. - - -- Directive: %verbose - Write an extra output file containing verbose descriptions of the - parser states and what is done for each type of lookahead token in - that state. *Note Understanding::, for more information. - - -- Directive: %yacc - Pretend the option ‘--yacc’ was given (*note ‘--yacc’: - option-yacc.), i.e., imitate Yacc, including its naming - conventions. Only makes sense with the ‘yacc.c’ skeleton. *Note - Tuning the Parser::, for more. - - Of course, being a Bison extension, ‘%yacc’ is somewhat - self-contradictory... - - -File: bison.info, Node: %define Summary, Next: %code Summary, Prev: Decl Summary, Up: Declarations - -3.7.14 %define Summary ----------------------- - -There are many features of Bison's behavior that can be controlled by -assigning the feature a single value. For historical reasons, some such -features are assigned values by dedicated directives, such as ‘%start’, -which assigns the start symbol. However, newer such features are -associated with variables, which are assigned by the ‘%define’ -directive: - - -- Directive: %define VARIABLE - -- Directive: %define VARIABLE VALUE - -- Directive: %define VARIABLE {VALUE} - -- Directive: %define VARIABLE "VALUE" - Define VARIABLE to VALUE. - - The type of the values depend on the syntax. Braces denote value - in the target language (e.g., a namespace, a type, etc.). Keyword - values (no delimiters) denote finite choice (e.g., a variation of a - feature). String values denote remaining cases (e.g., a file - name). - - It is an error if a VARIABLE is defined by ‘%define’ multiple - times, but see *note ‘-D NAME[=VALUE]’: Tuning the Parser. - - The rest of this section summarizes variables and values that -‘%define’ accepts. - - Some VARIABLEs take Boolean values. In this case, Bison will -complain if the variable definition does not meet one of the following -four conditions: - - 1. ‘VALUE’ is ‘true’ - - 2. ‘VALUE’ is omitted (or ‘""’ is specified). This is equivalent to - ‘true’. - - 3. ‘VALUE’ is ‘false’. - - 4. VARIABLE is never defined. In this case, Bison selects a default - value. - - What VARIABLEs are accepted, as well as their meanings and default -values, depend on the selected target language and/or the parser -skeleton (*note Decl Summary::, *note Decl Summary::). Unaccepted -VARIABLEs produce an error. Some of the accepted VARIABLEs are -described below. - - -- Directive: %define api.filename.type {TYPE} - - • Language(s): C++ - - • Purpose: Define the type of file names in Bison's default - location and position types. *Note Exposing the Location - Classes::. - - • Accepted Values: Any type that is printable (via streams) and - comparable (with ‘==’ and ‘!=’). - - • Default Value: ‘const std::string’. - - • History: Introduced in Bison 2.0 as ‘filename_type’ (with - ‘std::string’ as default), renamed as ‘api.filename.type’ in - Bison 3.7 (with ‘const std::string’ as default). - - -- Directive: %define api.header.include {"header.h"} - -- Directive: %define api.header.include {} - • Languages(s): C (‘yacc.c’) - - • Purpose: Specify how the generated parser should include the - generated header. - - Historically, when option ‘-d’ or ‘--header’ was used, ‘bison’ - generated a header and pasted an exact copy of it into the - generated parser implementation file. Since Bison 3.6, it is - ‘#include’d as ‘"BASENAME.h"’, instead of duplicated, unless - FILE is ‘y.tab’, see below. - - The ‘api.header.include’ variable allows to control how the - generated parser ‘#include’s the generated header. For - instance: - - %define api.header.include {"parse.h"} - - or - - %define api.header.include {} - - Using ‘api.header.include’ does not change the name of the - generated header, only how it is included. - - To work around limitations of Automake's ‘ylwrap’ (which runs - ‘bison’ with ‘--yacc’), ‘api.header.include’ is _not_ - predefined when the output file is ‘y.tab.c’. Define it to - avoid the duplication. - - • Accepted Values: An argument for ‘#include’. - - • Default Value: ‘"HEADER-BASENAME"’, unless the header file is - ‘y.tab.h’, where HEADER-BASENAME is the name of the generated - header, without directory part. For instance with ‘bison -d - calc/parse.y’, ‘api.header.include’ defaults to ‘"parse.h"’, - not ‘"calc/parse.h"’. - - • History: Introduced in Bison 3.4. Defaults to ‘"BASENAME.h"’ - since Bison 3.7, unless the header file is ‘y.tab.h’. - - -- Directive: %define api.location.file "FILE" - -- Directive: %define api.location.file none - - • Language(s): C++ - - • Purpose: Define the name of the file in which Bison's default - location and position types are generated. *Note Exposing the - Location Classes::. - - • Accepted Values: - ‘none’ - If locations are enabled, generate the definition of the - ‘position’ and ‘location’ classes in the header file if - ‘%header’, otherwise in the parser implementation. - - "FILE" - Generate the definition of the ‘position’ and ‘location’ - classes in FILE. This file name can be relative (to - where the parser file is output) or absolute. - - • Default Value: Not applicable if locations are not enabled, or - if a user location type is specified (see - ‘api.location.type’). Otherwise, Bison's ‘location’ is - generated in ‘location.hh’ (*note C++ location::). - - • History: Introduced in Bison 3.2. - - -- Directive: %define api.location.include {"FILE"} - -- Directive: %define api.location.include {} - - • Language(s): C++ - - • Purpose: Specify how the generated file that defines the - ‘position’ and ‘location’ classes is included. This makes - sense when the ‘location’ class is exposed to the rest of your - application/library in another directory. *Note Exposing the - Location Classes::. - - • Accepted Values: Argument for ‘#include’. - - • Default Value: ‘"DIR/location.hh"’ where DIR is the directory - part of the output. For instance ‘src/parse’ if - ‘--output=src/parse/parser.cc’ was given. - - • History: Introduced in Bison 3.2. - - -- Directive: %define api.location.type {TYPE} - - • Language(s): C, C++, Java - - • Purpose: Define the location type. *Note Location Type::, and - *note User Defined Location Type::. - - • Accepted Values: String - - • Default Value: none - - • History: Introduced in Bison 2.7 for C++ and Java, in Bison - 3.4 for C. Was originally named ‘location_type’ in Bison 2.5 - and 2.6. - - -- Directive: %define api.namespace {NAMESPACE} - • Languages(s): C++ - - • Purpose: Specify the namespace for the parser class. For - example, if you specify: - - %define api.namespace {foo::bar} - - Bison uses ‘foo::bar’ verbatim in references such as: - - foo::bar::parser::value_type - - However, to open a namespace, Bison removes any leading ‘::’ - and then splits on any remaining occurrences: - - namespace foo { namespace bar { - class position; - class location; - } } - - • Accepted Values: Any absolute or relative C++ namespace - reference without a trailing ‘"::"’. For example, ‘"foo"’ or - ‘"::foo::bar"’. - - • Default Value: ‘yy’, unless you used the obsolete - ‘%name-prefix "PREFIX"’ directive. - - -- Directive: %define api.parser.class {NAME} - • Language(s): C++, Java, D - - • Purpose: The name of the parser class. - - • Accepted Values: Any valid identifier. - - • Default Value: In C++, ‘parser’. In D and Java, ‘YYParser’ or - ‘API.PREFIXParser’ (*note Java Bison Interface::). - - • History: Introduced in Bison 3.3 to replace - ‘parser_class_name’. - - -- Directive: %define api.prefix {PREFIX} - - • Language(s): C, C++, Java - - • Purpose: Rename exported symbols. *Note Multiple Parsers::. - - • Accepted Values: String - - • Default Value: ‘YY’ for Java, ‘yy’ otherwise. - - • History: introduced in Bison 2.6, with its argument in double - quotes. Uses braces since Bison 3.0 (double quotes are still - supported for backward compatibility). - - -- Directive: %define api.pure PURITY - - • Language(s): C - - • Purpose: Request a pure (reentrant) parser program. *Note - Pure Decl::. - - • Accepted Values: ‘true’, ‘false’, ‘full’ - - The value may be omitted: this is equivalent to specifying - ‘true’, as is the case for Boolean values. - - When ‘%define api.pure full’ is used, the parser is made - reentrant. This changes the signature for ‘yylex’ (*note Pure - Calling::), and also that of ‘yyerror’ when the tracking of - locations has been activated, as shown below. - - The ‘true’ value is very similar to the ‘full’ value, the only - difference is in the signature of ‘yyerror’ on Yacc parsers - without ‘%parse-param’, for historical reasons. - - I.e., if ‘%locations %define api.pure’ is passed then the - prototypes for ‘yyerror’ are: - - void yyerror (char const *msg); // Yacc parsers. - void yyerror (YYLTYPE *locp, char const *msg); // GLR parsers. - - But if ‘%locations %define api.pure %parse-param {int - *nastiness}’ is used, then both parsers have the same - signature: - - void yyerror (YYLTYPE *llocp, int *nastiness, char const *msg); - - (*note Error Reporting Function::) - - • Default Value: ‘false’ - - • History: the ‘full’ value was introduced in Bison 2.7 - - -- Directive: %define api.push-pull KIND - - • Language(s): C (deterministic parsers only), D, Java - - • Purpose: Request a pull parser, a push parser, or both. *Note - Push Decl::. - - • Accepted Values: ‘pull’, ‘push’, ‘both’ - - • Default Value: ‘pull’ - - -- Directive: %define api.symbol.prefix {PREFIX} - - • Languages(s): all - - • Purpose: Add a prefix to the name of the symbol kinds. For - instance - - %define api.symbol.prefix {S_} - %token FILE for ERROR - %% - start: FILE for ERROR; - - generates this definition in C: - - /* Symbol kind. */ - enum yysymbol_kind_t - { - S_YYEMPTY = -2, /* No symbol. */ - S_YYEOF = 0, /* $end */ - S_YYERROR = 1, /* error */ - S_YYUNDEF = 2, /* $undefined */ - S_FILE = 3, /* FILE */ - S_for = 4, /* for */ - S_ERROR = 5, /* ERROR */ - S_YYACCEPT = 6, /* $accept */ - S_start = 7 /* start */ - }; - - • Accepted Values: Any non empty string. Must be a valid - identifier in the target language (typically a non empty - sequence of letters, underscores, and --not at the beginning-- - digits). - - The empty prefix is (generally) invalid: - • in C it would create collision with the ‘YYERROR’ macro, - and potentially token kind definitions and symbol kind - definitions would collide; - • unnamed symbols (such as ‘'+'’) have a name which starts - with a digit; - • even in languages with scoped enumerations such as Java, - an empty prefix is dangerous: symbol names may collide - with the target language keywords, or with other members - of the ‘SymbolKind’ class. - - • Default Value: ‘YYSYMBOL_’ in C, ‘S_’ in C++ and Java, empty - in D. - • History: introduced in Bison 3.6. - - -- Directive: %define api.token.constructor - - • Language(s): C++, D - - • Purpose: Request that symbols be handled as a whole (type, - value, and possibly location) in the scanner. In the case of - C++, it works only when variant-based semantic values are - enabled (*note C++ Variants::), see *note Complete Symbols::, - for details. In D, token constructors work with both ‘%union’ - and ‘%define api.value.type union’. - - • Accepted Values: Boolean. - - • Default Value: ‘false’ - • History: introduced in Bison 3.0. - - -- Directive: %define api.token.prefix {PREFIX} - • Languages(s): all - - • Purpose: Add a prefix to the token names when generating their - definition in the target language. For instance - - %define api.token.prefix {TOK_} - %token FILE for ERROR - %% - start: FILE for ERROR; - - generates the definition of the symbols ‘TOK_FILE’, ‘TOK_for’, - and ‘TOK_ERROR’ in the generated source files. In particular, - the scanner must use these prefixed token names, while the - grammar itself may still use the short names (as in the sample - rule given above). The generated informational files - (‘*.output’, ‘*.xml’, ‘*.gv’) are not modified by this prefix. - - Bison also prefixes the generated member names of the semantic - value union. *Note Type Generation::, for more details. - - See *note Calc++ Parser:: and *note Calc++ Scanner::, for a - complete example. - - • Accepted Values: Any string. Must be a valid identifier - prefix in the target language (typically, a possibly empty - sequence of letters, underscores, and --not at the beginning-- - digits). - - • Default Value: empty - • History: introduced in Bison 3.0. - - -- Directive: %define api.token.raw - - • Language(s): all - - • Purpose: The output files normally define the enumeration of - the _token kinds_ with Yacc-compatible token codes: sequential - numbers starting at 257 except for single character tokens - which stand for themselves (e.g., in ASCII, ‘'a'’ is numbered - 65). The parser however uses _symbol kinds_ which are - assigned numbers sequentially starting at 0. Therefore each - time the scanner returns an (external) token kind, it must be - mapped to the (internal) symbol kind. - - When ‘api.token.raw’ is set, the code of the token kinds are - forced to coincide with the symbol kind. This saves one table - lookup per token to map them from the token kind to the symbol - kind, and also saves the generation of the mapping table. The - gain is typically moderate, but in extreme cases (very simple - user actions), a 10% improvement can be observed. - - When ‘api.token.raw’ is set, the grammar cannot use character - literals (such as ‘'a'’). - - • Accepted Values: Boolean. - - • Default Value: ‘true’ in D, ‘false’ otherwise - • History: introduced in Bison 3.5. Was initially introduced in - Bison 1.25 as ‘%raw’, but never worked and was removed in - Bison 1.29. - - -- Directive: %define api.value.automove - - • Language(s): C++ - - • Purpose: Let occurrences of semantic values of the right-hand - sides of a rule be implicitly turned in rvalues. When - enabled, a grammar such as: - - exp: - "number" { $$ = make_number ($1); } - | exp "+" exp { $$ = make_binary (add, $1, $3); } - | "(" exp ")" { $$ = $2; } - - is actually compiled as if you had written: - - exp: - "number" { $$ = make_number (std::move ($1)); } - | exp "+" exp { $$ = make_binary (add, - std::move ($1), - std::move ($3)); } - | "(" exp ")" { $$ = std::move ($2); } - - Using a value several times with automove enabled is typically - an error. For instance, instead of: - - exp: "twice" exp { $$ = make_binary (add, $2, $2); } - - write: - - exp: "twice" exp { auto v = $2; $$ = make_binary (add, v, v); } - - It is tempting to use ‘std::move’ on one of the ‘v’, but the - argument evaluation order in C++ is unspecified. - - • Accepted Values: Boolean. - - • Default Value: ‘false’ - • History: introduced in Bison 3.2 - - -- Directive: %define api.value.type SUPPORT - -- Directive: %define api.value.type {TYPE} - • Language(s): all - - • Purpose: The type for semantic values. - - • Accepted Values: - ‘{}’ - This grammar has no semantic value at all. This is not - properly supported yet. - ‘union-directive’ (C, C++, D) - The type is defined thanks to the ‘%union’ directive. - You don't have to define ‘api.value.type’ in that case, - using ‘%union’ suffices. *Note Union Decl::. For - instance: - %define api.value.type union-directive - %union - { - int ival; - char *sval; - } - %token INT "integer" - %token STR "string" - - ‘union’ (C, C++) - The symbols are defined with type names, from which Bison - will generate a ‘union’. For instance: - %define api.value.type union - %token INT "integer" - %token STR "string" - Most C++ objects cannot be stored in a ‘union’, use - ‘variant’ instead. - - ‘variant’ (C++) - This is similar to ‘union’, but special storage - techniques are used to allow any kind of C++ object to be - used. For instance: - %define api.value.type variant - %token INT "integer" - %token STR "string" - *Note C++ Variants::. - - ‘{TYPE}’ - Use this TYPE as semantic value. - %code requires - { - struct my_value - { - enum - { - is_int, is_str - } kind; - union - { - int ival; - char *sval; - } u; - }; - } - %define api.value.type {struct my_value} - %token INT "integer" - %token STR "string" - - • Default Value: - − ‘union-directive’ if ‘%union’ is used, otherwise ... - − ‘int’ if type tags are used (i.e., ‘%token ...’ or - ‘%nterm ...’ is used), otherwise ... - − undefined. - - • History: introduced in Bison 3.0. Was introduced for Java - only in 2.3b as ‘stype’. - - -- Directive: %define api.value.union.name NAME - • Language(s): C - - • Purpose: The tag of the generated ‘union’ (_not_ the name of - the ‘typedef’). This variable is set to ‘ID’ when ‘%union ID’ - is used. There is no clear reason to give this union a name. - - • Accepted Values: Any valid identifier. - - • Default Value: ‘YYSTYPE’. - - • History: Introduced in Bison 3.0.3. - - -- Directive: %define lr.default-reduction WHEN - - • Language(s): all - - • Purpose: Specify the kind of states that are permitted to - contain default reductions. *Note Default Reductions::. - - • Accepted Values: ‘most’, ‘consistent’, ‘accepting’ - • Default Value: - • ‘accepting’ if ‘lr.type’ is ‘canonical-lr’. - • ‘most’ otherwise. - • History: introduced as ‘lr.default-reductions’ in 2.5, renamed - as ‘lr.default-reduction’ in 3.0. - - -- Directive: %define lr.keep-unreachable-state - - • Language(s): all - • Purpose: Request that Bison allow unreachable parser states to - remain in the parser tables. *Note Unreachable States::. - • Accepted Values: Boolean - • Default Value: ‘false’ - • History: introduced as ‘lr.keep_unreachable_states’ in 2.3b, - renamed as ‘lr.keep-unreachable-states’ in 2.5, and as - ‘lr.keep-unreachable-state’ in 3.0. - - -- Directive: %define lr.type TYPE - - • Language(s): all - - • Purpose: Specify the type of parser tables within the LR(1) - family. *Note LR Table Construction::. - - • Accepted Values: ‘lalr’, ‘ielr’, ‘canonical-lr’ - - • Default Value: ‘lalr’ - - -- Directive: %define namespace {NAMESPACE} - Obsoleted by ‘api.namespace’ - - -- Directive: %define parse.assert - - • Languages(s): C, C++ - - • Purpose: Issue runtime assertions to catch invalid uses. In - C, some important invariants in the implementation of the - parser are checked when this option is enabled. - - In C++, when variants are used (*note C++ Variants::), symbols - must be constructed and destroyed properly. This option - checks these constraints using runtime type information - (RTTI). Therefore the generated code cannot be compiled with - RTTI disabled (via compiler options such as ‘-fno-rtti’). - - • Accepted Values: Boolean - - • Default Value: ‘false’ - - -- Directive: %define parse.error VERBOSITY - • Languages(s): all - • Purpose: Control the generation of syntax error messages. - *Note Error Reporting::. - • Accepted Values: - • ‘simple’ Error messages passed to ‘yyerror’ are simply - ‘"syntax error"’. - - • ‘detailed’ Error messages report the unexpected token, - and possibly the expected ones. However, this report can - often be incorrect when LAC is not enabled (*note LAC::). - Token name internationalization is supported. - - • ‘verbose’ Similar (but inferior) to ‘detailed’. The D - parser does not support this value. - - Error messages report the unexpected token, and possibly - the expected ones. However, this report can often be - incorrect when LAC is not enabled (*note LAC::). - - Does not support token internationalization. Using - non-ASCII characters in token aliases is not portable. - - • ‘custom’ The user is in charge of generating the syntax - error message by defining the ‘yyreport_syntax_error’ - function. *Note Syntax Error Reporting Function::. - - • Default Value: ‘simple’ - - • History: introduced in 3.0 with support for ‘simple’ and - ‘verbose’. Values ‘custom’ and ‘detailed’ were introduced in - 3.6. - - -- Directive: %define parse.lac WHEN - - • Languages(s): C/C++ (deterministic parsers only), D and Java. - - • Purpose: Enable LAC (lookahead correction) to improve syntax - error handling. *Note LAC::. - • Accepted Values: ‘none’, ‘full’ - • Default Value: ‘none’ - - -- Directive: %define parse.trace - - • Languages(s): C, C++, D, Java - - • Purpose: Require parser instrumentation for tracing. *Note - Tracing::. - - In C/C++, define the macro ‘YYDEBUG’ (or ‘PREFIXDEBUG’ with - ‘%define api.prefix {PREFIX}’), see *note Multiple Parsers::) - to 1 (if it is not already defined) so that the debugging - facilities are compiled. - - • Accepted Values: Boolean - - • Default Value: ‘false’ - - -- Directive: %define parser_class_name {NAME} - Obsoleted by ‘api.parser.class’ - - -File: bison.info, Node: %code Summary, Prev: %define Summary, Up: Declarations - -3.7.15 %code Summary --------------------- - -The ‘%code’ directive inserts code verbatim into the output parser -source at any of a predefined set of locations. It thus serves as a -flexible and user-friendly alternative to the traditional Yacc prologue, -‘%{CODE%}’. This section summarizes the functionality of ‘%code’ for -the various target languages supported by Bison. For a detailed -discussion of how to use ‘%code’ in place of ‘%{CODE%}’ for C/C++ and -why it is advantageous to do so, *note Prologue Alternatives::. - - -- Directive: %code {CODE} - This is the unqualified form of the ‘%code’ directive. It inserts - CODE verbatim at a language-dependent default location in the - parser implementation. - - For C/C++, the default location is the parser implementation file - after the usual contents of the parser header file. Thus, the - unqualified form replaces ‘%{CODE%}’ for most purposes. - - For D and Java, the default location is inside the parser class. - - -- Directive: %code QUALIFIER {CODE} - This is the qualified form of the ‘%code’ directive. QUALIFIER - identifies the purpose of CODE and thus the location(s) where Bison - should insert it. That is, if you need to specify - location-sensitive CODE that does not belong at the default - location selected by the unqualified ‘%code’ form, use this form - instead. - - For any particular qualifier or for the unqualified form, if there -are multiple occurrences of the ‘%code’ directive, Bison concatenates -the specified code in the order in which it appears in the grammar file. - - Not all qualifiers are accepted for all target languages. Unaccepted -qualifiers produce an error. Some of the accepted qualifiers are: - -‘requires’ - - • Language(s): C, C++ - - • Purpose: This is the best place to write dependency code - required for the value and location types (‘YYSTYPE’ and - ‘YYLTYPE’ in C). In other words, it's the best place to define - types referenced in ‘%union’ directives. In C, if you use - ‘#define’ to override Bison's default ‘YYSTYPE’ and ‘YYLTYPE’ - definitions, then it is also the best place. However you - should rather ‘%define’ ‘api.value.type’ and - ‘api.location.type’. - - • Location(s): The parser header file and the parser - implementation file before the Bison-generated definitions of - the value and location types (‘YYSTYPE’ and ‘YYLTYPE’ in C). - -‘provides’ - - • Language(s): C, C++ - - • Purpose: This is the best place to write additional - definitions and declarations that should be provided to other - modules. - - • Location(s): The parser header file and the parser - implementation file after the Bison-generated value and - location types (‘YYSTYPE’ and ‘YYLTYPE’ in C), and token - definitions. - -‘top’ - - • Language(s): C, C++ - - • Purpose: The unqualified ‘%code’ or ‘%code requires’ should - usually be more appropriate than ‘%code top’. However, - occasionally it is necessary to insert code much nearer the - top of the parser implementation file. For example: - - %code top { - #define _GNU_SOURCE - #include - } - - • Location(s): Near the top of the parser implementation file. - -‘imports’ - - • Language(s): D, Java - - • Purpose: This is the best place to write Java import - directives. D syntax allows for import statements all - throughout the code. - - • Location(s): The parser Java file after any Java package - directive and before any class definitions. The parser D file - before any class definitions. - - Though we say the insertion locations are language-dependent, they -are technically skeleton-dependent. Writers of non-standard skeletons -however should choose their locations consistently with the behavior of -the standard Bison skeletons. - - -File: bison.info, Node: Multiple Parsers, Prev: Declarations, Up: Grammar File - -3.8 Multiple Parsers in the Same Program -======================================== - -Most programs that use Bison parse only one language and therefore -contain only one Bison parser. But what if you want to parse more than -one language with the same program? Then you need to avoid name -conflicts between different definitions of functions and variables such -as ‘yyparse’, ‘yylval’. To use different parsers from the same -compilation unit, you also need to avoid conflicts on types and macros -(e.g., ‘YYSTYPE’) exported in the generated header. - - The easy way to do this is to define the ‘%define’ variable -‘api.prefix’. With different ‘api.prefix’s it is guaranteed that -headers do not conflict when included together, and that compiled -objects can be linked together too. Specifying ‘%define api.prefix -{PREFIX}’ (or passing the option ‘-Dapi.prefix={PREFIX}’, see *note -Invocation::) renames the interface functions and variables of the Bison -parser to start with PREFIX instead of ‘yy’, and all the macros to start -by PREFIX (i.e., PREFIX upper-cased) instead of ‘YY’. - - The renamed symbols include ‘yyparse’, ‘yylex’, ‘yyerror’, ‘yynerrs’, -‘yylval’, ‘yylloc’, ‘yychar’ and ‘yydebug’. If you use a push parser, -‘yypush_parse’, ‘yypull_parse’, ‘yypstate’, ‘yypstate_new’ and -‘yypstate_delete’ will also be renamed. The renamed macros include -‘YYSTYPE’, ‘YYLTYPE’, and ‘YYDEBUG’, which is treated specifically -- -more about this below. - - For example, if you use ‘%define api.prefix {c}’, the names become -‘cparse’, ‘clex’, ..., ‘CSTYPE’, ‘CLTYPE’, and so on. - - Users of Flex must update the signature of the generated ‘yylex’ -function. Since the Flex scanner usually includes the generated header -of the parser (to get the definitions of the tokens, etc.), the most -convenient way is to insert the declaration of ‘yylex’ in the ‘provides’ -section: - - %define api.prefix {c} - // Emitted in the header file, after the definition of YYSTYPE. - %code provides - { - // Tell Flex the expected prototype of yylex. - #define YY_DECL \ - int clex (CSTYPE *yylval, CLTYPE *yylloc) - - // Declare the scanner. - YY_DECL; - } - - - The ‘%define’ variable ‘api.prefix’ works in two different ways. In -the implementation file, it works by adding macro definitions to the -beginning of the parser implementation file, defining ‘yyparse’ as -‘PREFIXparse’, and so on: - - #define YYSTYPE CTYPE - #define yyparse cparse - #define yylval clval - ... - YYSTYPE yylval; - int yyparse (void); - - This effectively substitutes one name for the other in the entire -parser implementation file, thus the "original" names (‘yylex’, -‘YYSTYPE’, ...) are also usable in the parser implementation file. - - However, in the parser header file, the symbols are defined renamed, -for instance: - - extern CSTYPE clval; - int cparse (void); - - The macro ‘YYDEBUG’ is commonly used to enable the tracing support in -parsers. To comply with this tradition, when ‘api.prefix’ is used, -‘YYDEBUG’ (not renamed) is used as a default value: - - /* Debug traces. */ - #ifndef CDEBUG - # if defined YYDEBUG - # if YYDEBUG - # define CDEBUG 1 - # else - # define CDEBUG 0 - # endif - # else - # define CDEBUG 0 - # endif - #endif - #if CDEBUG - extern int cdebug; - #endif - - - - Prior to Bison 2.6, a feature similar to ‘api.prefix’ was provided by -the obsolete directive ‘%name-prefix’ (*note Table of Symbols::) and the -option ‘--name-prefix’ (*note Output Files::). - - -File: bison.info, Node: Interface, Next: Algorithm, Prev: Grammar File, Up: Top - -4 Parser C-Language Interface -***************************** - -The Bison parser is actually a C function named ‘yyparse’. Here we -describe the interface conventions of ‘yyparse’ and the other functions -that it needs to use. - - Keep in mind that the parser uses many C identifiers starting with -‘yy’ and ‘YY’ for internal purposes. If you use such an identifier -(aside from those in this manual) in an action or in epilogue in the -grammar file, you are likely to run into trouble. - -* Menu: - -* Parser Function:: How to call ‘yyparse’ and what it returns. -* Push Parser Interface:: How to create, use, and destroy push parsers. -* Lexical:: You must supply a function ‘yylex’ - which reads tokens. -* Error Reporting:: Passing error messages to the user. -* Action Features:: Special features for use in actions. -* Internationalization:: How to let the parser speak in the user's - native language. - - -File: bison.info, Node: Parser Function, Next: Push Parser Interface, Up: Interface - -4.1 The Parser Function ‘yyparse’ -================================= - -You call the function ‘yyparse’ to cause parsing to occur. This -function reads tokens, executes actions, and ultimately returns when it -encounters end-of-input or an unrecoverable syntax error. You can also -write an action which directs ‘yyparse’ to return immediately without -reading further. - - -- Function: int yyparse (void) - The value returned by ‘yyparse’ is 0 if parsing was successful - (return is due to end-of-input). - - The value is 1 if parsing failed because of invalid input, i.e., - input that contains a syntax error or that causes ‘YYABORT’ to be - invoked. - - The value is 2 if parsing failed due to memory exhaustion. - - In an action, you can cause immediate return from ‘yyparse’ by using -these macros: - - -- Macro: YYACCEPT - Return immediately with value 0 (to report success). - - -- Macro: YYABORT - Return immediately with value 1 (to report failure). - - -- Macro: YYNOMEM - Return immediately with value 2 (to report memory exhaustion). - - If you use a reentrant parser, you can optionally pass additional -parameter information to it in a reentrant way. To do so, use the -declaration ‘%parse-param’: - - -- Directive: %parse-param {ARGUMENT-DECLARATION} ... - Declare that one or more ARGUMENT-DECLARATION are additional - ‘yyparse’ arguments. The ARGUMENT-DECLARATION is used when - declaring functions or prototypes. The last identifier in - ARGUMENT-DECLARATION must be the argument name. - - Here's an example. Write this in the parser: - - %parse-param {int *nastiness} {int *randomness} - -Then call the parser like this: - - { - int nastiness, randomness; - ... /* Store proper data in ‘nastiness’ and ‘randomness’. */ - value = yyparse (&nastiness, &randomness); - ... - } - -In the grammar actions, use expressions like this to refer to the data: - - exp: ... { ...; *randomness += 1; ... } - -Using the following: - %parse-param {int *randomness} - - Results in these signatures: - void yyerror (int *randomness, const char *msg); - int yyparse (int *randomness); - -Or, if both ‘%define api.pure full’ (or just ‘%define api.pure’) and -‘%locations’ are used: - - void yyerror (YYLTYPE *llocp, int *randomness, const char *msg); - int yyparse (int *randomness); - - -File: bison.info, Node: Push Parser Interface, Next: Lexical, Prev: Parser Function, Up: Interface - -4.2 Push Parser Interface -========================= - -You call the function ‘yypstate_new’ to create a new parser instance. -This function is available if either the ‘%define api.push-pull push’ or -‘%define api.push-pull both’ declaration is used. *Note Push Decl::. - - -- Function: yypstate* yypstate_new (void) - Return a valid parser instance if there is memory available, 0 - otherwise. In impure mode, it will also return 0 if a parser - instance is currently allocated. - - You call the function ‘yypstate_delete’ to delete a parser instance. -function is available if either the ‘%define api.push-pull push’ or -‘%define api.push-pull both’ declaration is used. *Note Push Decl::. - - -- Function: void yypstate_delete (yypstate *YYPS) - Reclaim the memory associated with a parser instance. After this - call, you should no longer attempt to use the parser instance. - - You call the function ‘yypush_parse’ to parse a single token. This -function is available if either the ‘%define api.push-pull push’ or -‘%define api.push-pull both’ declaration is used. *Note Push Decl::. - - -- Function: int yypush_parse (yypstate *YYPS) - The value returned by ‘yypush_parse’ is the same as for ‘yyparse’ - with the following exception: it returns ‘YYPUSH_MORE’ if more - input is required to finish parsing the grammar. - - After ‘yypush_parse’ returned, the instance may be consulted. For - instance check ‘yynerrs’ to see whether there were (possibly - recovered) syntax errors. - - After ‘yypush_parse’ returns a status other than ‘YYPUSH_MORE’, the - parser instance ‘yyps’ may be reused for a new parse. - - The fact that the parser state is reusable even after an error -simplifies reuse. For example, a calculator application which parses -each input line as an expression can just keep reusing the same ‘yyps’ -even if an input was invalid. - - You call the function ‘yypull_parse’ to parse the rest of the input -stream. This function is available if the ‘%define api.push-pull both’ -declaration is used. *Note Push Decl::. - - -- Function: int yypull_parse (yypstate *YYPS) - The value returned by ‘yypull_parse’ is the same as for ‘yyparse’. - - The parser instance ‘yyps’ may be reused for new parses. - - -- Function: int yypstate_expected_tokens (const yypstate *yyps, - yysymbol_kind_t ARGV[], int ARGC) - Fill ARGV with the expected tokens, which never includes - ‘YYSYMBOL_YYEMPTY’, ‘YYSYMBOL_YYerror’, or ‘YYSYMBOL_YYUNDEF’. - - Never put more than ARGC elements into ARGV, and on success return - the number of tokens stored in ARGV. If there are more expected - tokens than ARGC, fill ARGV up to ARGC and return 0. If there are - no expected tokens, also return 0, but set ‘argv[0]’ to - ‘YYSYMBOL_YYEMPTY’. - - When LAC is enabled, may return a negative number on errors, such - as ‘YYENOMEM’ on memory exhaustion. - - If ARGV is null, return the size needed to store all the possible - values, which is always less than ‘YYNTOKENS’. - - -File: bison.info, Node: Lexical, Next: Error Reporting, Prev: Push Parser Interface, Up: Interface - -4.3 The Lexical Analyzer Function ‘yylex’ -========================================= - -The “lexical analyzer” function, ‘yylex’, recognizes tokens from the -input stream and returns them to the parser. Bison does not create this -function automatically; you must write it so that ‘yyparse’ can call it. -The function is sometimes referred to as a lexical scanner. - - In simple programs, ‘yylex’ is often defined at the end of the Bison -grammar file. If ‘yylex’ is defined in a separate source file, you need -to arrange for the token-kind definitions to be available there. To do -this, use the ‘-d’ option when you run Bison, so that it will write -these definitions into the separate parser header file, ‘NAME.tab.h’, -which you can include in the other source files that need it. *Note -Invocation::. - -* Menu: - -* Calling Convention:: How ‘yyparse’ calls ‘yylex’. -* Special Tokens:: Signaling end-of-file and errors to the parser. -* Tokens from Literals:: Finding token kinds from string aliases. -* Token Values:: How ‘yylex’ must return the semantic value - of the token it has read. -* Token Locations:: How ‘yylex’ must return the text location - (line number, etc.) of the token, if the - actions want that. -* Pure Calling:: How the calling convention differs in a pure parser - (*note Pure Decl::). - - -File: bison.info, Node: Calling Convention, Next: Special Tokens, Up: Lexical - -4.3.1 Calling Convention for ‘yylex’ ------------------------------------- - -The value that ‘yylex’ returns must be the positive numeric code for the -kind of token it has just found; a zero or negative value signifies -end-of-input. - - When a token kind is referred to in the grammar rules by a name, that -name in the parser implementation file becomes an enumerator of the enum -‘yytoken_kind_t’ whose definition is the proper numeric code for that -token kind. So ‘yylex’ should use the name to indicate that type. -*Note Symbols::. - - When a token is referred to in the grammar rules by a character -literal, the numeric code for that character is also the code for the -token kind. So ‘yylex’ can simply return that character code, possibly -converted to ‘unsigned char’ to avoid sign-extension. The null -character must not be used this way, because its code is zero and that -signifies end-of-input. - - Here is an example showing these things: - - int - yylex (void) - { - ... - if (c == EOF) /* Detect end-of-input. */ - return YYEOF; - ... - else if (c == '+' || c == '-') - return c; /* Assume token kind for '+' is '+'. */ - ... - else - return INT; /* Return the kind of the token. */ - ... - } - -This interface has been designed so that the output from the ‘lex’ -utility can be used without change as the definition of ‘yylex’. - - -File: bison.info, Node: Special Tokens, Next: Tokens from Literals, Prev: Calling Convention, Up: Lexical - -4.3.2 Special Tokens --------------------- - -In addition to the user defined tokens, Bison generates a few special -tokens that ‘yylex’ may return. - - The ‘YYEOF’ token denotes the end of file, and signals to the parser -that there is nothing left afterwards. *Note Calling Convention::, for -an example. - - Returning ‘YYUNDEF’ tells the parser that some lexical error was -found. It will emit an error message about an "invalid token", and -enter error-recovery (*note Error Recovery::). Returning an unknown -token kind results in the exact same behavior. - - Returning ‘YYerror’ requires the parser to enter error-recovery -_without_ emitting an error message. This way the lexical analyzer can -produce an accurate error messages about the invalid input (something -the parser cannot do), and yet benefit from the error-recovery features -of the parser. - - int - yylex (void) - { - ... - switch (c) - { - ... - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - ... - return TOK_NUM; - ... - case EOF: - return YYEOF; - default: - yyerror ("syntax error: invalid character: %c", c); - return YYerror; - } - } - - -File: bison.info, Node: Tokens from Literals, Next: Token Values, Prev: Special Tokens, Up: Lexical - -4.3.3 Finding Tokens by String Literals ---------------------------------------- - -If the grammar uses literal string tokens, there are two ways that -‘yylex’ can determine the token kind codes for them: - - • If the grammar defines symbolic token names as aliases for the - literal string tokens, ‘yylex’ can use these symbolic names like - all others. In this case, the use of the literal string tokens in - the grammar file has no effect on ‘yylex’. - - This is the preferred approach. - - • ‘yylex’ can search for the multicharacter token in the ‘yytname’ - table. This method is discouraged: the primary purpose of string - aliases is forging good error messages, not describing the spelling - of keywords. In addition, looking for the token kind at runtime - incurs a (small but noticeable) cost. - - The ‘yytname’ table is generated only if you use the ‘%token-table’ - declaration. *Note Decl Summary::. - - -File: bison.info, Node: Token Values, Next: Token Locations, Prev: Tokens from Literals, Up: Lexical - -4.3.4 Semantic Values of Tokens -------------------------------- - -In an ordinary (nonreentrant) parser, the semantic value of the token -must be stored into the global variable ‘yylval’. When you are using -just one data type for semantic values, ‘yylval’ has that type. Thus, -if the type is ‘int’ (the default), you might write this in ‘yylex’: - - ... - yylval = value; /* Put value onto Bison stack. */ - return INT; /* Return the kind of the token. */ - ... - - When you are using multiple data types, ‘yylval’'s type is a union -made from the ‘%union’ declaration (*note Union Decl::). So when you -store a token's value, you must use the proper member of the union. If -the ‘%union’ declaration looks like this: - - %union { - int intval; - double val; - symrec *tptr; - } - -then the code in ‘yylex’ might look like this: - - ... - yylval.intval = value; /* Put value onto Bison stack. */ - return INT; /* Return the kind of the token. */ - ... - - -File: bison.info, Node: Token Locations, Next: Pure Calling, Prev: Token Values, Up: Lexical - -4.3.5 Textual Locations of Tokens ---------------------------------- - -If you are using the ‘@N’-feature (*note Tracking Locations::) in -actions to keep track of the textual locations of tokens and groupings, -then you must provide this information in ‘yylex’. The function -‘yyparse’ expects to find the textual location of a token just parsed in -the global variable ‘yylloc’. So ‘yylex’ must store the proper data in -that variable. - - By default, the value of ‘yylloc’ is a structure and you need only -initialize the members that are going to be used by the actions. The -four members are called ‘first_line’, ‘first_column’, ‘last_line’ and -‘last_column’. Note that the use of this feature makes the parser -noticeably slower. - - The data type of ‘yylloc’ has the name ‘YYLTYPE’. - - -File: bison.info, Node: Pure Calling, Prev: Token Locations, Up: Lexical - -4.3.6 Calling Conventions for Pure Parsers ------------------------------------------- - -When you use the Bison declaration ‘%define api.pure full’ to request a -pure, reentrant parser, the global communication variables ‘yylval’ and -‘yylloc’ cannot be used. (*Note Pure Decl::.) In such parsers the two -global variables are replaced by pointers passed as arguments to -‘yylex’. You must declare them as shown here, and pass the information -back by storing it through those pointers. - - int - yylex (YYSTYPE *lvalp, YYLTYPE *llocp) - { - ... - *lvalp = value; /* Put value onto Bison stack. */ - return INT; /* Return the kind of the token. */ - ... - } - - If the grammar file does not use the ‘@’ constructs to refer to -textual locations, then the type ‘YYLTYPE’ will not be defined. In this -case, omit the second argument; ‘yylex’ will be called with only one -argument. - - If you wish to pass additional arguments to ‘yylex’, use ‘%lex-param’ -just like ‘%parse-param’ (*note Parser Function::). To pass additional -arguments to both ‘yylex’ and ‘yyparse’, use ‘%param’. - - -- Directive: %lex-param {ARGUMENT-DECLARATION} ... - Specify that ARGUMENT-DECLARATION are additional ‘yylex’ argument - declarations. You may pass one or more such declarations, which is - equivalent to repeating ‘%lex-param’. - - -- Directive: %param {ARGUMENT-DECLARATION} ... - Specify that ARGUMENT-DECLARATION are additional ‘yylex’/‘yyparse’ - argument declaration. This is equivalent to ‘%lex-param - {ARGUMENT-DECLARATION} ... %parse-param {ARGUMENT-DECLARATION} - ...’. You may pass one or more declarations, which is equivalent - to repeating ‘%param’. - -For instance: - - %lex-param {scanner_mode *mode} - %parse-param {parser_mode *mode} - %param {environment_type *env} - -results in the following signatures: - - int yylex (scanner_mode *mode, environment_type *env); - int yyparse (parser_mode *mode, environment_type *env); - - If ‘%define api.pure full’ is added: - - int yylex (YYSTYPE *lvalp, scanner_mode *mode, environment_type *env); - int yyparse (parser_mode *mode, environment_type *env); - -and finally, if both ‘%define api.pure full’ and ‘%locations’ are used: - - int yylex (YYSTYPE *lvalp, YYLTYPE *llocp, - scanner_mode *mode, environment_type *env); - int yyparse (parser_mode *mode, environment_type *env); - - -File: bison.info, Node: Error Reporting, Next: Action Features, Prev: Lexical, Up: Interface - -4.4 Error Reporting -=================== - -During its execution the parser may have error messages to pass to the -user, such as syntax error, or memory exhaustion. How this message is -delivered to the user must be specified by the developer. - -* Menu: - -* Error Reporting Function:: You must supply a ‘yyerror’ function. -* Syntax Error Reporting Function:: You can supply a ‘yyreport_syntax_error’ function. - - -File: bison.info, Node: Error Reporting Function, Next: Syntax Error Reporting Function, Up: Error Reporting - -4.4.1 The Error Reporting Function ‘yyerror’ --------------------------------------------- - -The Bison parser detects a “syntax error” (or “parse error”) whenever it -reads a token which cannot satisfy any syntax rule. An action in the -grammar can also explicitly proclaim an error, using the macro ‘YYERROR’ -(*note Action Features::). - - The Bison parser expects to report the error by calling an error -reporting function named ‘yyerror’, which you must supply. It is called -by ‘yyparse’ whenever a syntax error is found, and it receives one -argument. For a syntax error, the string is normally ‘"syntax error"’. - - If you invoke ‘%define parse.error detailed’ (or ‘custom’) in the -Bison declarations section (*note Bison Declarations::), then Bison -provides a more verbose and specific error message string instead of -just plain ‘"syntax error"’. However, that message sometimes contains -incorrect information if LAC is not enabled (*note LAC::). - - The parser can detect one other kind of error: memory exhaustion. -This can happen when the input contains constructions that are very -deeply nested. It isn't likely you will encounter this, since the Bison -parser normally extends its stack automatically up to a very large -limit. But if memory is exhausted, ‘yyparse’ calls ‘yyerror’ in the -usual fashion, except that the argument string is ‘"memory exhausted"’. - - In some cases diagnostics like ‘"syntax error"’ are translated -automatically from English to some other language before they are passed -to ‘yyerror’. *Note Internationalization::. - - The following definition suffices in simple programs: - - void - yyerror (char const *s) - { - fprintf (stderr, "%s\n", s); - } - - After ‘yyerror’ returns to ‘yyparse’, the latter will attempt error -recovery if you have written suitable error recovery grammar rules -(*note Error Recovery::). If recovery is impossible, ‘yyparse’ will -immediately return 1. - - Obviously, in location tracking pure parsers, ‘yyerror’ should have -an access to the current location. With ‘%define api.pure’, this is -indeed the case for the GLR parsers, but not for the Yacc parser, for -historical reasons, and this is the why ‘%define api.pure full’ should -be preferred over ‘%define api.pure’. - - When ‘%locations %define api.pure full’ is used, ‘yyerror’ has the -following signature: - - void yyerror (YYLTYPE *locp, char const *msg); - -The prototypes are only indications of how the code produced by Bison -uses ‘yyerror’. Bison-generated code always ignores the returned value, -so ‘yyerror’ can return any type, including ‘void’. Also, ‘yyerror’ can -be a variadic function; that is why the message is always passed last. - - Traditionally ‘yyerror’ returns an ‘int’ that is always ignored, but -this is purely for historical reasons, and ‘void’ is preferable since it -more accurately describes the return type for ‘yyerror’. - - The variable ‘yynerrs’ contains the number of syntax errors reported -so far. Normally this variable is global; but if you request a pure -parser (*note Pure Decl::) then it is a local variable which only the -actions can access. - - -File: bison.info, Node: Syntax Error Reporting Function, Prev: Error Reporting Function, Up: Error Reporting - -4.4.2 The Syntax Error Reporting Function ‘yyreport_syntax_error’ ------------------------------------------------------------------ - -If you invoke ‘%define parse.error custom’ (*note Bison Declarations::), -then the parser no longer passes syntax error messages to ‘yyerror’, -rather it delegates that task to the user by calling the -‘yyreport_syntax_error’ function. - - The following functions and types are "‘static’": they are defined in -the implementation file (‘*.c’) and available only from there. They are -meant to be used from the grammar's epilogue. - - -- Function: static int yyreport_syntax_error (const yypcontext_t *CTX) - Report a syntax error to the user. Return 0 on success, ‘YYENOMEM’ - on memory exhaustion. Whether it uses ‘yyerror’ is up to the user. - - Use the following types and functions to build the error message. - - -- Type: yypcontext_t - An opaque type that captures the circumstances of the syntax error. - - -- Type: yysymbol_kind_t - An enum of all the grammar symbols, tokens and nonterminals. Its - enumerators are forged from the symbol names: - - enum yysymbol_kind_t - { - YYSYMBOL_YYEMPTY = -2, /* No symbol. */ - YYSYMBOL_YYEOF = 0, /* "end of file" */ - YYSYMBOL_YYerror = 1, /* error */ - YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ - YYSYMBOL_PLUS = 3, /* "+" */ - YYSYMBOL_MINUS = 4, /* "-" */ - [...] - YYSYMBOL_VAR = 14, /* "variable" */ - YYSYMBOL_NEG = 15, /* NEG */ - YYSYMBOL_YYACCEPT = 16, /* $accept */ - YYSYMBOL_exp = 17, /* exp */ - YYSYMBOL_input = 18 /* input */ - }; - typedef enum yysymbol_kind_t yysymbol_kind_t; - - -- Function: static yysymbol_kind_t yypcontext_token (const - yypcontext_t *CTX) - The "unexpected" token: the symbol kind of the lookahead token that - caused the syntax error. Returns ‘YYSYMBOL_YYEMPTY’ if there is no - lookahead. - - -- Function: static YYLTYPE * yypcontext_location (const yypcontext_t - *CTX) - The location of the syntax error (that of the unexpected token). - - -- Function: static int yypcontext_expected_tokens (const yypcontext_t - *ctx, yysymbol_kind_t ARGV[], int ARGC) - Fill ARGV with the expected tokens, which never includes - ‘YYSYMBOL_YYEMPTY’, ‘YYSYMBOL_YYerror’, or ‘YYSYMBOL_YYUNDEF’. - - Never put more than ARGC elements into ARGV, and on success return - the number of tokens stored in ARGV. If there are more expected - tokens than ARGC, fill ARGV up to ARGC and return 0. If there are - no expected tokens, also return 0, but set ‘argv[0]’ to - ‘YYSYMBOL_YYEMPTY’. - - When LAC is enabled, may return a negative number on errors, such - as ‘YYENOMEM’ on memory exhaustion. - - If ARGV is null, return the size needed to store all the possible - values, which is always less than ‘YYNTOKENS’. - - -- Function: static const char * yysymbol_name (symbol_kind_t SYMBOL) - The name of the symbol whose kind is SYMBOL, possibly translated. - - A custom syntax error function looks as follows. This implementation -is inappropriate for internationalization, see the ‘c/bistromathic’ -example for a better alternative. - - static int - yyreport_syntax_error (const yypcontext_t *ctx) - { - int res = 0; - YYLOCATION_PRINT (stderr, *yypcontext_location (ctx)); - fprintf (stderr, ": syntax error"); - // Report the tokens expected at this point. - { - enum { TOKENMAX = 5 }; - yysymbol_kind_t expected[TOKENMAX]; - int n = yypcontext_expected_tokens (ctx, expected, TOKENMAX); - if (n < 0) - // Forward errors to yyparse. - res = n; - else - for (int i = 0; i < n; ++i) - fprintf (stderr, "%s %s", - i == 0 ? ": expected" : " or", yysymbol_name (expected[i])); - } - // Report the unexpected token. - { - yysymbol_kind_t lookahead = yypcontext_token (ctx); - if (lookahead != YYSYMBOL_YYEMPTY) - fprintf (stderr, " before %s", yysymbol_name (lookahead)); - } - fprintf (stderr, "\n"); - return res; - } - - You still must provide a ‘yyerror’ function, used for instance to -report memory exhaustion. - - -File: bison.info, Node: Action Features, Next: Internationalization, Prev: Error Reporting, Up: Interface - -4.5 Special Features for Use in Actions -======================================= - -Here is a table of Bison constructs, variables and macros that are -useful in actions. - - -- Variable: $$ - Acts like a variable that contains the semantic value for the - grouping made by the current rule. *Note Actions::. - - -- Variable: $N - Acts like a variable that contains the semantic value for the Nth - component of the current rule. *Note Actions::. - - -- Variable: $$ - Like ‘$$’ but specifies alternative TYPEALT in the union specified - by the ‘%union’ declaration. *Note Action Types::. - - -- Variable: $N - Like ‘$N’ but specifies alternative TYPEALT in the union specified - by the ‘%union’ declaration. *Note Action Types::. - - -- Macro: YYABORT ; - Return immediately from ‘yyparse’, indicating failure. *Note - Parser Function::. - - -- Macro: YYACCEPT ; - Return immediately from ‘yyparse’, indicating success. *Note - Parser Function::. - - -- Macro: YYBACKUP (TOKEN, VALUE); - Unshift a token. This macro is allowed only for rules that reduce - a single value, and only when there is no lookahead token. It is - also disallowed in GLR parsers. It installs a lookahead token with - token kind TOKEN and semantic value VALUE; then it discards the - value that was going to be reduced by this rule. - - If the macro is used when it is not valid, such as when there is a - lookahead token already, then it reports a syntax error with a - message ‘cannot back up’ and performs ordinary error recovery. - - In either case, the rest of the action is not executed. - - -- Value: YYEMPTY - Value stored in ‘yychar’ when there is no lookahead token. - - -- Value: YYEOF - Value stored in ‘yychar’ when the lookahead is the end of the input - stream. - - -- Macro: YYERROR ; - Cause an immediate syntax error. This statement initiates error - recovery just as if the parser itself had detected an error; - however, it does not call ‘yyerror’, and does not print any - message. If you want to print an error message, call ‘yyerror’ - explicitly before the ‘YYERROR;’ statement. *Note Error - Recovery::. - - -- Macro: YYNOMEM ; - Return immediately from ‘yyparse’, indicating memory exhaustion. - *Note Parser Function::. - - -- Macro: YYRECOVERING - The expression ‘YYRECOVERING ()’ yields 1 when the parser is - recovering from a syntax error, and 0 otherwise. *Note Error - Recovery::. - - -- Variable: yychar - Variable containing either the lookahead token, or ‘YYEOF’ when the - lookahead is the end of the input stream, or ‘YYEMPTY’ when no - lookahead has been performed so the next token is not yet known. - Do not modify ‘yychar’ in a deferred semantic action (*note GLR - Semantic Actions::). *Note Lookahead::. - - -- Macro: yyclearin ; - Discard the current lookahead token. This is useful primarily in - error rules. Do not invoke ‘yyclearin’ in a deferred semantic - action (*note GLR Semantic Actions::). *Note Error Recovery::. - - -- Macro: yyerrok ; - Resume generating error messages immediately for subsequent syntax - errors. This is useful primarily in error rules. *Note Error - Recovery::. - - -- Variable: yylloc - Variable containing the lookahead token location when ‘yychar’ is - not set to ‘YYEMPTY’ or ‘YYEOF’. Do not modify ‘yylloc’ in a - deferred semantic action (*note GLR Semantic Actions::). *Note - Actions and Locations::. - - -- Variable: yylval - Variable containing the lookahead token semantic value when - ‘yychar’ is not set to ‘YYEMPTY’ or ‘YYEOF’. Do not modify - ‘yylval’ in a deferred semantic action (*note GLR Semantic - Actions::). *Note Actions::. - - -- Value: @$ - Acts like a structure variable containing information on the - textual location of the grouping made by the current rule. *Note - Tracking Locations::. - - -- Value: @N - Acts like a structure variable containing information on the - textual location of the Nth component of the current rule. *Note - Tracking Locations::. - - -File: bison.info, Node: Internationalization, Prev: Action Features, Up: Interface - -4.6 Parser Internationalization -=============================== - -A Bison-generated parser can print diagnostics, including error and -tracing messages. By default, they appear in English. However, Bison -also supports outputting diagnostics in the user's native language. To -make this work, the user should set the usual environment variables. -*Note The User's View: (gettext)Users. For example, the shell command -‘export LC_ALL=fr_CA.UTF-8’ might set the user's locale to French -Canadian using the UTF-8 encoding. The exact set of available locales -depends on the user's installation. - -* Menu: - -* Enabling I18n:: Preparing your project to support internationalization. -* Token I18n:: Preparing tokens for internationalization in error messages. - - -File: bison.info, Node: Enabling I18n, Next: Token I18n, Up: Internationalization - -4.6.1 Enabling Internationalization ------------------------------------ - -The maintainer of a package that uses a Bison-generated parser enables -the internationalization of the parser's output through the following -steps. Here we assume a package that uses GNU Autoconf and GNU -Automake. - - 1. Into the directory containing the GNU Autoconf macros used by the - package --often called ‘m4’-- copy the ‘bison-i18n.m4’ file - installed by Bison under ‘share/aclocal/bison-i18n.m4’ in Bison's - installation directory. For example: - - cp /usr/local/share/aclocal/bison-i18n.m4 m4/bison-i18n.m4 - - 2. In the top-level ‘configure.ac’, after the ‘AM_GNU_GETTEXT’ - invocation, add an invocation of ‘BISON_I18N’. This macro is - defined in the file ‘bison-i18n.m4’ that you copied earlier. It - causes ‘configure’ to find the value of the ‘BISON_LOCALEDIR’ - variable, and it defines the source-language symbol ‘YYENABLE_NLS’ - to enable translations in the Bison-generated parser. - - 3. In the ‘main’ function of your program, designate the directory - containing Bison's runtime message catalog, through a call to - ‘bindtextdomain’ with domain name ‘bison-runtime’. For example: - - bindtextdomain ("bison-runtime", BISON_LOCALEDIR); - - Typically this appears after any other call ‘bindtextdomain - (PACKAGE, LOCALEDIR)’ that your package already has. Here we rely - on ‘BISON_LOCALEDIR’ to be defined as a string through the - ‘Makefile’. - - 4. In the ‘Makefile.am’ that controls the compilation of the ‘main’ - function, make ‘BISON_LOCALEDIR’ available as a C preprocessor - macro, either in ‘DEFS’ or in ‘AM_CPPFLAGS’. For example: - - DEFS = @DEFS@ -DBISON_LOCALEDIR='"$(BISON_LOCALEDIR)"' - - or: - - AM_CPPFLAGS = -DBISON_LOCALEDIR='"$(BISON_LOCALEDIR)"' - - 5. Finally, invoke the command ‘autoreconf’ to generate the build - infrastructure. - - -File: bison.info, Node: Token I18n, Prev: Enabling I18n, Up: Internationalization - -4.6.2 Token Internationalization --------------------------------- - -When the ‘%define’ variable ‘parse.error’ is set to ‘custom’ or -‘detailed’, token aliases can be internationalized: - - %token - '\n' _("end of line") - - NUM _("number") - - FUN _("function") - VAR _("variable") - - The remainder of the grammar may freely use either the token symbol -(‘FUN’) or its alias (‘"function"’), but not with the -internationalization marker (‘_("function")’). - - If at least one token alias is internationalized, then the generated -parser will use both ‘N_’ and ‘_’, that must be defined (*note The -Programmer’s View: (gettext)Programmers.). They are used only on string -aliases marked for translation. In other words, even if your catalog -features a translation for "function", then with - - %token - - FUN "function" - VAR _("variable") - -"function" will appear untranslated in debug traces and error messages. - - Unless defined by the user, the end-of-file token, ‘YYEOF’, is -provided "end of file" as an alias. It is also internationalized if the -user internationalized tokens. To map it to another string, use: - - %token END 0 _("end of input") - - -File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top - -5 The Bison Parser Algorithm -**************************** - -As Bison reads tokens, it pushes them onto a stack along with their -semantic values. The stack is called the “parser stack”. Pushing a -token is traditionally called “shifting”. - - For example, suppose the infix calculator has read ‘1 + 5 *’, with a -‘3’ to come. The stack will have four elements, one for each token that -was shifted. - - But the stack does not always have an element for each token read. -When the last N tokens and groupings shifted match the components of a -grammar rule, they can be combined according to that rule. This is -called “reduction”. Those tokens and groupings are replaced on the -stack by a single grouping whose symbol is the result (left hand side) -of that rule. Running the rule's action is part of the process of -reduction, because this is what computes the semantic value of the -resulting grouping. - - For example, if the infix calculator's parser stack contains this: - - 1 + 5 * 3 - -and the next input token is a newline character, then the last three -elements can be reduced to 15 via the rule: - - expr: expr '*' expr; - -Then the stack contains just these three elements: - - 1 + 15 - -At this point, another reduction can be made, resulting in the single -value 16. Then the newline token can be shifted. - - The parser tries, by shifts and reductions, to reduce the entire -input down to a single grouping whose symbol is the grammar's -start-symbol (*note Language and Grammar::). - - This kind of parser is known in the literature as a bottom-up parser. - -* Menu: - -* Lookahead:: Parser looks one token ahead when deciding what to do. -* Shift/Reduce:: Conflicts: when either shifting or reduction is valid. -* Precedence:: Operator precedence works by resolving conflicts. -* Contextual Precedence:: When an operator's precedence depends on context. -* Parser States:: The parser is a finite-state-machine with stack. -* Reduce/Reduce:: When two rules are applicable in the same situation. -* Mysterious Conflicts:: Conflicts that look unjustified. -* Tuning LR:: How to tune fundamental aspects of LR-based parsing. -* Generalized LR Parsing:: Parsing arbitrary context-free grammars. -* Memory Management:: What happens when memory is exhausted. How to avoid it. - - -File: bison.info, Node: Lookahead, Next: Shift/Reduce, Up: Algorithm - -5.1 Lookahead Tokens -==================== - -The Bison parser does _not_ always reduce immediately as soon as the -last N tokens and groupings match a rule. This is because such a simple -strategy is inadequate to handle most languages. Instead, when a -reduction is possible, the parser sometimes "looks ahead" at the next -token in order to decide what to do. - - When a token is read, it is not immediately shifted; first it becomes -the “lookahead token”, which is not on the stack. Now the parser can -perform one or more reductions of tokens and groupings on the stack, -while the lookahead token remains off to the side. When no more -reductions should take place, the lookahead token is shifted onto the -stack. This does not mean that all possible reductions have been done; -depending on the token kind of the lookahead token, some rules may -choose to delay their application. - - Here is a simple case where lookahead is needed. These three rules -define expressions which contain binary addition operators and postfix -unary factorial operators (‘!’), and allow parentheses for grouping. - - expr: - term '+' expr - | term - ; - - term: - '(' expr ')' - | term '!' - | "number" - ; - - Suppose that the tokens ‘1 + 2’ have been read and shifted; what -should be done? If the following token is ‘)’, then the first three -tokens must be reduced to form an ‘expr’. This is the only valid -course, because shifting the ‘)’ would produce a sequence of symbols -‘term ')'’, and no rule allows this. - - If the following token is ‘!’, then it must be shifted immediately so -that ‘2 !’ can be reduced to make a ‘term’. If instead the parser were -to reduce before shifting, ‘1 + 2’ would become an ‘expr’. It would -then be impossible to shift the ‘!’ because doing so would produce on -the stack the sequence of symbols ‘expr '!'’. No rule allows that -sequence. - - The lookahead token is stored in the variable ‘yychar’. Its semantic -value and location, if any, are stored in the variables ‘yylval’ and -‘yylloc’. *Note Action Features::. - - -File: bison.info, Node: Shift/Reduce, Next: Precedence, Prev: Lookahead, Up: Algorithm - -5.2 Shift/Reduce Conflicts -========================== - -Suppose we are parsing a language which has if-then and if-then-else -statements, with a pair of rules like this: - - if_stmt: - "if" expr "then" stmt - | "if" expr "then" stmt "else" stmt - ; - -Here ‘"if"’, ‘"then"’ and ‘"else"’ are terminal symbols for specific -keyword tokens. - - When the ‘"else"’ token is read and becomes the lookahead token, the -contents of the stack (assuming the input is valid) are just right for -reduction by the first rule. But it is also legitimate to shift the -‘"else"’, because that would lead to eventual reduction by the second -rule. - - This situation, where either a shift or a reduction would be valid, -is called a “shift/reduce conflict”. Bison is designed to resolve these -conflicts by choosing to shift, unless otherwise directed by operator -precedence declarations. To see the reason for this, let's contrast it -with the other alternative. - - Since the parser prefers to shift the ‘"else"’, the result is to -attach the else-clause to the innermost if-statement, making these two -inputs equivalent: - - if x then if y then win; else lose; - - if x then do; if y then win; else lose; end; - - But if the parser chose to reduce when possible rather than shift, -the result would be to attach the else-clause to the outermost -if-statement, making these two inputs equivalent: - - if x then if y then win; else lose; - - if x then do; if y then win; end; else lose; - - The conflict exists because the grammar as written is ambiguous: -either parsing of the simple nested if-statement is legitimate. The -established convention is that these ambiguities are resolved by -attaching the else-clause to the innermost if-statement; this is what -Bison accomplishes by choosing to shift rather than reduce. (It would -ideally be cleaner to write an unambiguous grammar, but that is very -hard to do in this case.) This particular ambiguity was first -encountered in the specifications of Algol 60 and is called the -"dangling ‘else’" ambiguity. - - To assist the grammar author in understanding the nature of each -conflict, Bison can be asked to generate "counterexamples". In the -present case it actually even proves that the grammar is ambiguous by -exhibiting a string with two different parses: - - Example: "if" expr "then" "if" expr "then" stmt • "else" stmt - Shift derivation - if_stmt - ↳ 3: "if" expr "then" stmt - ↳ 2: if_stmt - ↳ 4: "if" expr "then" stmt • "else" stmt - Example: "if" expr "then" "if" expr "then" stmt • "else" stmt - Reduce derivation - if_stmt - ↳ 4: "if" expr "then" stmt "else" stmt - ↳ 2: if_stmt - ↳ 3: "if" expr "then" stmt • - -*Note Counterexamples::, for more details. - - - To avoid warnings from Bison about predictable, _legitimate_ -shift/reduce conflicts, you can use the ‘%expect N’ declaration. There -will be no warning as long as the number of shift/reduce conflicts is -exactly N, and Bison will report an error if there is a different -number. *Note Expect Decl::. However, we don't recommend the use of -‘%expect’ (except ‘%expect 0’!), as an equal number of conflicts does -not mean that they are the _same_. When possible, you should rather use -precedence directives to _fix_ the conflicts explicitly (*note Non -Operators::). - - The definition of ‘if_stmt’ above is solely to blame for the -conflict, but the conflict does not actually appear without additional -rules. Here is a complete Bison grammar file that actually manifests -the conflict: - - %% - stmt: - expr - | if_stmt - ; - - if_stmt: - "if" expr "then" stmt - | "if" expr "then" stmt "else" stmt - ; - - expr: - "identifier" - ; - - -File: bison.info, Node: Precedence, Next: Contextual Precedence, Prev: Shift/Reduce, Up: Algorithm - -5.3 Operator Precedence -======================= - -Another situation where shift/reduce conflicts appear is in arithmetic -expressions. Here shifting is not always the preferred resolution; the -Bison declarations for operator precedence allow you to specify when to -shift and when to reduce. - -* Menu: - -* Why Precedence:: An example showing why precedence is needed. -* Using Precedence:: How to specify precedence and associativity. -* Precedence Only:: How to specify precedence only. -* Precedence Examples:: How these features are used in the previous example. -* How Precedence:: How they work. -* Non Operators:: Using precedence for general conflicts. - - -File: bison.info, Node: Why Precedence, Next: Using Precedence, Up: Precedence - -5.3.1 When Precedence is Needed -------------------------------- - -Consider the following ambiguous grammar fragment (ambiguous because the -input ‘1 - 2 * 3’ can be parsed in two different ways): - - expr: - expr '-' expr - | expr '*' expr - | expr '<' expr - | '(' expr ')' - ... - ; - -Suppose the parser has seen the tokens ‘1’, ‘-’ and ‘2’; should it -reduce them via the rule for the subtraction operator? It depends on -the next token. Of course, if the next token is ‘)’, we must reduce; -shifting is invalid because no single rule can reduce the token sequence -‘- 2 )’ or anything starting with that. But if the next token is ‘*’ or -‘<’, we have a choice: either shifting or reduction would allow the -parse to complete, but with different results. - - To decide which one Bison should do, we must consider the results. -If the next operator token OP is shifted, then it must be reduced first -in order to permit another opportunity to reduce the difference. The -result is (in effect) ‘1 - (2 OP 3)’. On the other hand, if the -subtraction is reduced before shifting OP, the result is ‘(1 - 2) OP 3’. -Clearly, then, the choice of shift or reduce should depend on the -relative precedence of the operators ‘-’ and OP: ‘*’ should be shifted -first, but not ‘<’. - - What about input such as ‘1 - 2 - 5’; should this be ‘(1 - 2) - 5’ or -should it be ‘1 - (2 - 5)’? For most operators we prefer the former, -which is called “left association”. The latter alternative, “right -association”, is desirable for assignment operators. The choice of left -or right association is a matter of whether the parser chooses to shift -or reduce when the stack contains ‘1 - 2’ and the lookahead token is -‘-’: shifting makes right-associativity. - - -File: bison.info, Node: Using Precedence, Next: Precedence Only, Prev: Why Precedence, Up: Precedence - -5.3.2 Specifying Operator Precedence ------------------------------------- - -Bison allows you to specify these choices with the operator precedence -declarations ‘%left’ and ‘%right’. Each such declaration contains a -list of tokens, which are operators whose precedence and associativity -is being declared. The ‘%left’ declaration makes all those operators -left-associative and the ‘%right’ declaration makes them -right-associative. A third alternative is ‘%nonassoc’, which declares -that it is a syntax error to find the same operator twice "in a row". -The last alternative, ‘%precedence’, allows to define only precedence -and no associativity at all. As a result, any associativity-related -conflict that remains will be reported as an compile-time error. The -directive ‘%nonassoc’ creates run-time error: using the operator in a -associative way is a syntax error. The directive ‘%precedence’ creates -compile-time errors: an operator _can_ be involved in an -associativity-related conflict, contrary to what expected the grammar -author. - - The relative precedence of different operators is controlled by the -order in which they are declared. The first precedence/associativity -declaration in the file declares the operators whose precedence is -lowest, the next such declaration declares the operators whose -precedence is a little higher, and so on. - - -File: bison.info, Node: Precedence Only, Next: Precedence Examples, Prev: Using Precedence, Up: Precedence - -5.3.3 Specifying Precedence Only --------------------------------- - -Since POSIX Yacc defines only ‘%left’, ‘%right’, and ‘%nonassoc’, which -all defines precedence and associativity, little attention is paid to -the fact that precedence cannot be defined without defining -associativity. Yet, sometimes, when trying to solve a conflict, -precedence suffices. In such a case, using ‘%left’, ‘%right’, or -‘%nonassoc’ might hide future (associativity related) conflicts that -would remain hidden. - - The dangling ‘else’ ambiguity (*note Shift/Reduce::) can be solved -explicitly. This shift/reduce conflicts occurs in the following -situation, where the period denotes the current parsing state: - - if E1 then if E2 then S1 • else S2 - - The conflict involves the reduction of the rule ‘IF expr THEN stmt’, -which precedence is by default that of its last token (‘THEN’), and the -shifting of the token ‘ELSE’. The usual disambiguation (attach the -‘else’ to the closest ‘if’), shifting must be preferred, i.e., the -precedence of ‘ELSE’ must be higher than that of ‘THEN’. But neither is -expected to be involved in an associativity related conflict, which can -be specified as follows. - - %precedence THEN - %precedence ELSE - - The unary-minus is another typical example where associativity is -usually over-specified, see *note Infix Calc::. The ‘%left’ directive -is traditionally used to declare the precedence of ‘NEG’, which is more -than needed since it also defines its associativity. While this is -harmless in the traditional example, who knows how ‘NEG’ might be used -in future evolutions of the grammar... - - -File: bison.info, Node: Precedence Examples, Next: How Precedence, Prev: Precedence Only, Up: Precedence - -5.3.4 Precedence Examples -------------------------- - -In our example, we would want the following declarations: - - %left '<' - %left '-' - %left '*' - - In a more complete example, which supports other operators as well, -we would declare them in groups of equal precedence. For example, ‘'+'’ -is declared with ‘'-'’: - - %left '<' '>' '=' "!=" "<=" ">=" - %left '+' '-' - %left '*' '/' - - -File: bison.info, Node: How Precedence, Next: Non Operators, Prev: Precedence Examples, Up: Precedence - -5.3.5 How Precedence Works --------------------------- - -The first effect of the precedence declarations is to assign precedence -levels to the terminal symbols declared. The second effect is to assign -precedence levels to certain rules: each rule gets its precedence from -the last terminal symbol mentioned in the components. (You can also -specify explicitly the precedence of a rule. *Note Contextual -Precedence::.) - - Finally, the resolution of conflicts works by comparing the -precedence of the rule being considered with that of the lookahead -token. If the token's precedence is higher, the choice is to shift. If -the rule's precedence is higher, the choice is to reduce. If they have -equal precedence, the choice is made based on the associativity of that -precedence level. The verbose output file made by ‘-v’ (*note -Invocation::) says how each conflict was resolved. - - Not all rules and not all tokens have precedence. If either the rule -or the lookahead token has no precedence, then the default is to shift. - - -File: bison.info, Node: Non Operators, Prev: How Precedence, Up: Precedence - -5.3.6 Using Precedence For Non Operators ----------------------------------------- - -Using properly precedence and associativity directives can help fixing -shift/reduce conflicts that do not involve arithmetic-like operators. -For instance, the "dangling ‘else’" problem (*note Shift/Reduce::) can -be solved elegantly in two different ways. - - In the present case, the conflict is between the token ‘"else"’ -willing to be shifted, and the rule ‘if_stmt: "if" expr "then" stmt’, -asking for reduction. By default, the precedence of a rule is that of -its last token, here ‘"then"’, so the conflict will be solved -appropriately by giving ‘"else"’ a precedence higher than that of -‘"then"’, for instance as follows: - - %precedence "then" - %precedence "else" - - Alternatively, you may give both tokens the same precedence, in which -case associativity is used to solve the conflict. To preserve the shift -action, use right associativity: - - %right "then" "else" - - Neither solution is perfect however. Since Bison does not provide, -so far, "scoped" precedence, both force you to declare the precedence of -these keywords with respect to the other operators your grammar. -Therefore, instead of being warned about new conflicts you would be -unaware of (e.g., a shift/reduce conflict due to ‘if test then 1 else 2 -+ 3’ being ambiguous: ‘if test then 1 else (2 + 3)’ or ‘(if test then 1 -else 2) + 3’?), the conflict will be already "fixed". - - -File: bison.info, Node: Contextual Precedence, Next: Parser States, Prev: Precedence, Up: Algorithm - -5.4 Context-Dependent Precedence -================================ - -Often the precedence of an operator depends on the context. This sounds -outlandish at first, but it is really very common. For example, a minus -sign typically has a very high precedence as a unary operator, and a -somewhat lower precedence (lower than multiplication) as a binary -operator. - - The Bison precedence declarations can only be used once for a given -token; so a token has only one precedence declared in this way. For -context-dependent precedence, you need to use an additional mechanism: -the ‘%prec’ modifier for rules. - - The ‘%prec’ modifier declares the precedence of a particular rule by -specifying a terminal symbol whose precedence should be used for that -rule. It's not necessary for that symbol to appear otherwise in the -rule. The modifier's syntax is: - - %prec TERMINAL-SYMBOL - -and it is written after the components of the rule. Its effect is to -assign the rule the precedence of TERMINAL-SYMBOL, overriding the -precedence that would be deduced for it in the ordinary way. The -altered rule precedence then affects how conflicts involving that rule -are resolved (*note Precedence::). - - Here is how ‘%prec’ solves the problem of unary minus. First, -declare a precedence for a fictitious terminal symbol named ‘UMINUS’. -There are no tokens of this type, but the symbol serves to stand for its -precedence: - - ... - %left '+' '-' - %left '*' - %left UMINUS - - Now the precedence of ‘UMINUS’ can be used in specific rules: - - exp: - ... - | exp '-' exp - ... - | '-' exp %prec UMINUS - - -File: bison.info, Node: Parser States, Next: Reduce/Reduce, Prev: Contextual Precedence, Up: Algorithm - -5.5 Parser States -================= - -The function ‘yyparse’ is implemented using a finite-state machine. The -values pushed on the parser stack are not simply token kind codes; they -represent the entire sequence of terminal and nonterminal symbols at or -near the top of the stack. The current state collects all the -information about previous input which is relevant to deciding what to -do next. - - Each time a lookahead token is read, the current parser state -together with the kind of lookahead token are looked up in a table. -This table entry can say, "Shift the lookahead token." In this case, it -also specifies the new parser state, which is pushed onto the top of the -parser stack. Or it can say, "Reduce using rule number N." This means -that a certain number of tokens or groupings are taken off the top of -the stack, and replaced by one grouping. In other words, that number of -states are popped from the stack, and one new state is pushed. - - There is one other alternative: the table can say that the lookahead -token is erroneous in the current state. This causes error processing -to begin (*note Error Recovery::). - - -File: bison.info, Node: Reduce/Reduce, Next: Mysterious Conflicts, Prev: Parser States, Up: Algorithm - -5.6 Reduce/Reduce Conflicts -=========================== - -A reduce/reduce conflict occurs if there are two or more rules that -apply to the same sequence of input. This usually indicates a serious -error in the grammar. - - For example, here is an erroneous attempt to define a sequence of -zero or more ‘word’ groupings. - - sequence: - %empty { printf ("empty sequence\n"); } - | maybeword - | sequence word { printf ("added word %s\n", $2); } - ; - - maybeword: - %empty { printf ("empty maybeword\n"); } - | word { printf ("single word %s\n", $1); } - ; - -The error is an ambiguity: as counterexample generation would -demonstrate (*note Counterexamples::), there is more than one way to -parse a single ‘word’ into a ‘sequence’. It could be reduced to a -‘maybeword’ and then into a ‘sequence’ via the second rule. -Alternatively, nothing-at-all could be reduced into a ‘sequence’ via the -first rule, and this could be combined with the ‘word’ using the third -rule for ‘sequence’. - - There is also more than one way to reduce nothing-at-all into a -‘sequence’. This can be done directly via the first rule, or indirectly -via ‘maybeword’ and then the second rule. - - You might think that this is a distinction without a difference, -because it does not change whether any particular input is valid or not. -But it does affect which actions are run. One parsing order runs the -second rule's action; the other runs the first rule's action and the -third rule's action. In this example, the output of the program -changes. - - Bison resolves a reduce/reduce conflict by choosing to use the rule -that appears first in the grammar, but it is very risky to rely on this. -Every reduce/reduce conflict must be studied and usually eliminated. -Here is the proper way to define ‘sequence’: - - sequence: - %empty { printf ("empty sequence\n"); } - | sequence word { printf ("added word %s\n", $2); } - ; - - Here is another common error that yields a reduce/reduce conflict: - - sequence: - %empty - | sequence words - | sequence redirects - ; - - words: - %empty - | words word - ; - - redirects: - %empty - | redirects redirect - ; - -The intention here is to define a sequence which can contain either -‘word’ or ‘redirect’ groupings. The individual definitions of -‘sequence’, ‘words’ and ‘redirects’ are error-free, but the three -together make a subtle ambiguity: even an empty input can be parsed in -infinitely many ways! - - Consider: nothing-at-all could be a ‘words’. Or it could be two -‘words’ in a row, or three, or any number. It could equally well be a -‘redirects’, or two, or any number. Or it could be a ‘words’ followed -by three ‘redirects’ and another ‘words’. And so on. - - Here are two ways to correct these rules. First, to make it a single -level of sequence: - - sequence: - %empty - | sequence word - | sequence redirect - ; - - Second, to prevent either a ‘words’ or a ‘redirects’ from being -empty: - - sequence: - %empty - | sequence words - | sequence redirects - ; - - words: - word - | words word - ; - - redirects: - redirect - | redirects redirect - ; - - Yet this proposal introduces another kind of ambiguity! The input -‘word word’ can be parsed as a single ‘words’ composed of two ‘word’s, -or as two one-‘word’ ‘words’ (and likewise for ‘redirect’/‘redirects’). -However this ambiguity is now a shift/reduce conflict, and therefore it -can now be addressed with precedence directives. - - To simplify the matter, we will proceed with ‘word’ and ‘redirect’ -being tokens: ‘"word"’ and ‘"redirect"’. - - To prefer the longest ‘words’, the conflict between the token -‘"word"’ and the rule ‘sequence: sequence words’ must be resolved as a -shift. To this end, we use the same techniques as exposed above, see -*note Non Operators::. One solution relies on precedences: use ‘%prec’ -to give a lower precedence to the rule: - - %precedence "word" - %precedence "sequence" - %% - sequence: - %empty - | sequence word %prec "sequence" - | sequence redirect %prec "sequence" - ; - - words: - word - | words "word" - ; - - Another solution relies on associativity: provide both the token and -the rule with the same precedence, but make them right-associative: - - %right "word" "redirect" - %% - sequence: - %empty - | sequence word %prec "word" - | sequence redirect %prec "redirect" - ; - - -File: bison.info, Node: Mysterious Conflicts, Next: Tuning LR, Prev: Reduce/Reduce, Up: Algorithm - -5.7 Mysterious Conflicts -======================== - -Sometimes reduce/reduce conflicts can occur that don't look warranted. -Here is an example: - - %% - def: param_spec return_spec ','; - param_spec: - type - | name_list ':' type - ; - - return_spec: - type - | name ':' type - ; - - type: "id"; - - name: "id"; - name_list: - name - | name ',' name_list - ; - - It would seem that this grammar can be parsed with only a single -token of lookahead: when a ‘param_spec’ is being read, an ‘"id"’ is a -‘name’ if a comma or colon follows, or a ‘type’ if another ‘"id"’ -follows. In other words, this grammar is LR(1). Yet Bison finds one -reduce/reduce conflict, for which counterexample generation (*note -Counterexamples::) would find a _nonunifying_ example. - - This is because Bison does not handle all LR(1) grammars _by -default_, for historical reasons. In this grammar, two contexts, that -after an ‘"id"’ at the beginning of a ‘param_spec’ and likewise at the -beginning of a ‘return_spec’, are similar enough that Bison assumes they -are the same. They appear similar because the same set of rules would -be active--the rule for reducing to a ‘name’ and that for reducing to a -‘type’. Bison is unable to determine at that stage of processing that -the rules would require different lookahead tokens in the two contexts, -so it makes a single parser state for them both. Combining the two -contexts causes a conflict later. In parser terminology, this -occurrence means that the grammar is not LALR(1). - - For many practical grammars (specifically those that fall into the -non-LR(1) class), the limitations of LALR(1) result in difficulties -beyond just mysterious reduce/reduce conflicts. The best way to fix all -these problems is to select a different parser table construction -algorithm. Either IELR(1) or canonical LR(1) would suffice, but the -former is more efficient and easier to debug during development. *Note -LR Table Construction::, for details. - - If you instead wish to work around LALR(1)'s limitations, you can -often fix a mysterious conflict by identifying the two parser states -that are being confused, and adding something to make them look -distinct. In the above example, adding one rule to ‘return_spec’ as -follows makes the problem go away: - - ... - return_spec: - type - | name ':' type - | "id" "bogus" /* This rule is never used. */ - ; - - This corrects the problem because it introduces the possibility of an -additional active rule in the context after the ‘"id"’ at the beginning -of ‘return_spec’. This rule is not active in the corresponding context -in a ‘param_spec’, so the two contexts receive distinct parser states. -As long as the token ‘"bogus"’ is never generated by ‘yylex’, the added -rule cannot alter the way actual input is parsed. - - In this particular example, there is another way to solve the -problem: rewrite the rule for ‘return_spec’ to use ‘"id"’ directly -instead of via ‘name’. This also causes the two confusing contexts to -have different sets of active rules, because the one for ‘return_spec’ -activates the altered rule for ‘return_spec’ rather than the one for -‘name’. - - param_spec: - type - | name_list ':' type - ; - - return_spec: - type - | "id" ':' type - ; - - For a more detailed exposition of LALR(1) parsers and parser -generators, see *note DeRemer 1982::. - - -File: bison.info, Node: Tuning LR, Next: Generalized LR Parsing, Prev: Mysterious Conflicts, Up: Algorithm - -5.8 Tuning LR -============= - -The default behavior of Bison's LR-based parsers is chosen mostly for -historical reasons, but that behavior is often not robust. For example, -in the previous section, we discussed the mysterious conflicts that can -be produced by LALR(1), Bison's default parser table construction -algorithm. Another example is Bison's ‘%define parse.error verbose’ -directive, which instructs the generated parser to produce verbose -syntax error messages, which can sometimes contain incorrect -information. - - In this section, we explore several modern features of Bison that -allow you to tune fundamental aspects of the generated LR-based parsers. -Some of these features easily eliminate shortcomings like those -mentioned above. Others can be helpful purely for understanding your -parser. - -* Menu: - -* LR Table Construction:: Choose a different construction algorithm. -* Default Reductions:: Disable default reductions. -* LAC:: Correct lookahead sets in the parser states. -* Unreachable States:: Keep unreachable parser states for debugging. - - -File: bison.info, Node: LR Table Construction, Next: Default Reductions, Up: Tuning LR - -5.8.1 LR Table Construction ---------------------------- - -For historical reasons, Bison constructs LALR(1) parser tables by -default. However, LALR does not possess the full language-recognition -power of LR. As a result, the behavior of parsers employing LALR parser -tables is often mysterious. We presented a simple example of this -effect in *note Mysterious Conflicts::. - - As we also demonstrated in that example, the traditional approach to -eliminating such mysterious behavior is to restructure the grammar. -Unfortunately, doing so correctly is often difficult. Moreover, merely -discovering that LALR causes mysterious behavior in your parser can be -difficult as well. - - Fortunately, Bison provides an easy way to eliminate the possibility -of such mysterious behavior altogether. You simply need to activate a -more powerful parser table construction algorithm by using the ‘%define -lr.type’ directive. - - -- Directive: %define lr.type TYPE - Specify the type of parser tables within the LR(1) family. The - accepted values for TYPE are: - - • ‘lalr’ (default) - • ‘ielr’ - • ‘canonical-lr’ - - For example, to activate IELR, you might add the following directive -to you grammar file: - - %define lr.type ielr - -For the example in *note Mysterious Conflicts::, the mysterious conflict -is then eliminated, so there is no need to invest time in comprehending -the conflict or restructuring the grammar to fix it. If, during future -development, the grammar evolves such that all mysterious behavior would -have disappeared using just LALR, you need not fear that continuing to -use IELR will result in unnecessarily large parser tables. That is, -IELR generates LALR tables when LALR (using a deterministic parsing -algorithm) is sufficient to support the full language-recognition power -of LR. Thus, by enabling IELR at the start of grammar development, you -can safely and completely eliminate the need to consider LALR's -shortcomings. - - While IELR is almost always preferable, there are circumstances where -LALR or the canonical LR parser tables described by Knuth (*note Knuth -1965::) can be useful. Here we summarize the relative advantages of -each parser table construction algorithm within Bison: - - • LALR - - There are at least two scenarios where LALR can be worthwhile: - - • GLR without static conflict resolution. - - When employing GLR parsers (*note GLR Parsers::), if you do - not resolve any conflicts statically (for example, with - ‘%left’ or ‘%precedence’), then the parser explores all - potential parses of any given input. In this case, the choice - of parser table construction algorithm is guaranteed not to - alter the language accepted by the parser. LALR parser tables - are the smallest parser tables Bison can currently construct, - so they may then be preferable. Nevertheless, once you begin - to resolve conflicts statically, GLR behaves more like a - deterministic parser in the syntactic contexts where those - conflicts appear, and so either IELR or canonical LR can then - be helpful to avoid LALR's mysterious behavior. - - • Malformed grammars. - - Occasionally during development, an especially malformed - grammar with a major recurring flaw may severely impede the - IELR or canonical LR parser table construction algorithm. - LALR can be a quick way to construct parser tables in order to - investigate such problems while ignoring the more subtle - differences from IELR and canonical LR. - - • IELR - - IELR (Inadequacy Elimination LR) is a minimal LR algorithm. That - is, given any grammar (LR or non-LR), parsers using IELR or - canonical LR parser tables always accept exactly the same set of - sentences. However, like LALR, IELR merges parser states during - parser table construction so that the number of parser states is - often an order of magnitude less than for canonical LR. More - importantly, because canonical LR's extra parser states may contain - duplicate conflicts in the case of non-LR grammars, the number of - conflicts for IELR is often an order of magnitude less as well. - This effect can significantly reduce the complexity of developing a - grammar. - - • Canonical LR - - While inefficient, canonical LR parser tables can be an interesting - means to explore a grammar because they possess a property that - IELR and LALR tables do not. That is, if ‘%nonassoc’ is not used - and default reductions are left disabled (*note Default - Reductions::), then, for every left context of every canonical LR - state, the set of tokens accepted by that state is guaranteed to be - the exact set of tokens that is syntactically acceptable in that - left context. It might then seem that an advantage of canonical LR - parsers in production is that, under the above constraints, they - are guaranteed to detect a syntax error as soon as possible without - performing any unnecessary reductions. However, IELR parsers that - use LAC are also able to achieve this behavior without sacrificing - ‘%nonassoc’ or default reductions. For details and a few caveats - of LAC, *note LAC::. - - For a more detailed exposition of the mysterious behavior in LALR -parsers and the benefits of IELR, see *note Denny 2008::, and *note -Denny 2010 November::. - - -File: bison.info, Node: Default Reductions, Next: LAC, Prev: LR Table Construction, Up: Tuning LR - -5.8.2 Default Reductions ------------------------- - -After parser table construction, Bison identifies the reduction with the -largest lookahead set in each parser state. To reduce the size of the -parser state, traditional Bison behavior is to remove that lookahead set -and to assign that reduction to be the default parser action. Such a -reduction is known as a “default reduction”. - - Default reductions affect more than the size of the parser tables. -They also affect the behavior of the parser: - - • Delayed ‘yylex’ invocations. - - A “consistent state” is a state that has only one possible parser - action. If that action is a reduction and is encoded as a default - reduction, then that consistent state is called a “defaulted - state”. Upon reaching a defaulted state, a Bison-generated parser - does not bother to invoke ‘yylex’ to fetch the next token before - performing the reduction. In other words, whether default - reductions are enabled in consistent states determines how soon a - Bison-generated parser invokes ‘yylex’ for a token: immediately - when it _reaches_ that token in the input or when it eventually - _needs_ that token as a lookahead to determine the next parser - action. Traditionally, default reductions are enabled, and so the - parser exhibits the latter behavior. - - The presence of defaulted states is an important consideration when - designing ‘yylex’ and the grammar file. That is, if the behavior - of ‘yylex’ can influence or be influenced by the semantic actions - associated with the reductions in defaulted states, then the delay - of the next ‘yylex’ invocation until after those reductions is - significant. For example, the semantic actions might pop a scope - stack that ‘yylex’ uses to determine what token to return. Thus, - the delay might be necessary to ensure that ‘yylex’ does not look - up the next token in a scope that should already be considered - closed. - - • Delayed syntax error detection. - - When the parser fetches a new token by invoking ‘yylex’, it checks - whether there is an action for that token in the current parser - state. The parser detects a syntax error if and only if either (1) - there is no action for that token or (2) the action for that token - is the error action (due to the use of ‘%nonassoc’). However, if - there is a default reduction in that state (which might or might - not be a defaulted state), then it is impossible for condition 1 to - exist. That is, all tokens have an action. Thus, the parser - sometimes fails to detect the syntax error until it reaches a later - state. - - While default reductions never cause the parser to accept - syntactically incorrect sentences, the delay of syntax error - detection can have unexpected effects on the behavior of the - parser. However, the delay can be caused anyway by parser state - merging and the use of ‘%nonassoc’, and it can be fixed by another - Bison feature, LAC. We discuss the effects of delayed syntax error - detection and LAC more in the next section (*note LAC::). - - For canonical LR, the only default reduction that Bison enables by -default is the accept action, which appears only in the accepting state, -which has no other action and is thus a defaulted state. However, the -default accept action does not delay any ‘yylex’ invocation or syntax -error detection because the accept action ends the parse. - - For LALR and IELR, Bison enables default reductions in nearly all -states by default. There are only two exceptions. First, states that -have a shift action on the ‘error’ token do not have default reductions -because delayed syntax error detection could then prevent the ‘error’ -token from ever being shifted in that state. However, parser state -merging can cause the same effect anyway, and LAC fixes it in both -cases, so future versions of Bison might drop this exception when LAC is -activated. Second, GLR parsers do not record the default reduction as -the action on a lookahead token for which there is a conflict. The -correct action in this case is to split the parse instead. - - To adjust which states have default reductions enabled, use the -‘%define lr.default-reduction’ directive. - - -- Directive: %define lr.default-reduction WHERE - Specify the kind of states that are permitted to contain default - reductions. The accepted values of WHERE are: - • ‘most’ (default for LALR and IELR) - • ‘consistent’ - • ‘accepting’ (default for canonical LR) - - -File: bison.info, Node: LAC, Next: Unreachable States, Prev: Default Reductions, Up: Tuning LR - -5.8.3 LAC ---------- - -Canonical LR, IELR, and LALR can suffer from a couple of problems upon -encountering a syntax error. First, the parser might perform additional -parser stack reductions before discovering the syntax error. Such -reductions can perform user semantic actions that are unexpected because -they are based on an invalid token, and they cause error recovery to -begin in a different syntactic context than the one in which the invalid -token was encountered. Second, when verbose error messages are enabled -(*note Error Reporting::), the expected token list in the syntax error -message can both contain invalid tokens and omit valid tokens. - - The culprits for the above problems are ‘%nonassoc’, default -reductions in inconsistent states (*note Default Reductions::), and -parser state merging. Because IELR and LALR merge parser states, they -suffer the most. Canonical LR can suffer only if ‘%nonassoc’ is used or -if default reductions are enabled for inconsistent states. - - LAC (Lookahead Correction) is a new mechanism within the parsing -algorithm that solves these problems for canonical LR, IELR, and LALR -without sacrificing ‘%nonassoc’, default reductions, or state merging. -You can enable LAC with the ‘%define parse.lac’ directive. - - -- Directive: %define parse.lac VALUE - Enable LAC to improve syntax error handling. - • ‘none’ (default) - • ‘full’ - This feature is currently only available for deterministic parsers - in C and C++. - - Conceptually, the LAC mechanism is straight-forward. Whenever the -parser fetches a new token from the scanner so that it can determine the -next parser action, it immediately suspends normal parsing and performs -an exploratory parse using a temporary copy of the normal parser state -stack. During this exploratory parse, the parser does not perform user -semantic actions. If the exploratory parse reaches a shift action, -normal parsing then resumes on the normal parser stacks. If the -exploratory parse reaches an error instead, the parser reports a syntax -error. If verbose syntax error messages are enabled, the parser must -then discover the list of expected tokens, so it performs a separate -exploratory parse for each token in the grammar. - - There is one subtlety about the use of LAC. That is, when in a -consistent parser state with a default reduction, the parser will not -attempt to fetch a token from the scanner because no lookahead is needed -to determine the next parser action. Thus, whether default reductions -are enabled in consistent states (*note Default Reductions::) affects -how soon the parser detects a syntax error: immediately when it -_reaches_ an erroneous token or when it eventually _needs_ that token as -a lookahead to determine the next parser action. The latter behavior is -probably more intuitive, so Bison currently provides no way to achieve -the former behavior while default reductions are enabled in consistent -states. - - Thus, when LAC is in use, for some fixed decision of whether to -enable default reductions in consistent states, canonical LR and IELR -behave almost exactly the same for both syntactically acceptable and -syntactically unacceptable input. While LALR still does not support the -full language-recognition power of canonical LR and IELR, LAC at least -enables LALR's syntax error handling to correctly reflect LALR's -language-recognition power. - - There are a few caveats to consider when using LAC: - - • Infinite parsing loops. - - IELR plus LAC does have one shortcoming relative to canonical LR. - Some parsers generated by Bison can loop infinitely. LAC does not - fix infinite parsing loops that occur between encountering a syntax - error and detecting it, but enabling canonical LR or disabling - default reductions sometimes does. - - • Verbose error message limitations. - - Because of internationalization considerations, Bison-generated - parsers limit the size of the expected token list they are willing - to report in a verbose syntax error message. If the number of - expected tokens exceeds that limit, the list is simply dropped from - the message. Enabling LAC can increase the size of the list and - thus cause the parser to drop it. Of course, dropping the list is - better than reporting an incorrect list. - - • Performance. - - Because LAC requires many parse actions to be performed twice, it - can have a performance penalty. However, not all parse actions - must be performed twice. Specifically, during a series of default - reductions in consistent states and shift actions, the parser never - has to initiate an exploratory parse. Moreover, the most - time-consuming tasks in a parse are often the file I/O, the lexical - analysis performed by the scanner, and the user's semantic actions, - but none of these are performed during the exploratory parse. - Finally, the base of the temporary stack used during an exploratory - parse is a pointer into the normal parser state stack so that the - stack is never physically copied. In our experience, the - performance penalty of LAC has proved insignificant for practical - grammars. - - While the LAC algorithm shares techniques that have been recognized -in the parser community for years, for the publication that introduces -LAC, see *note Denny 2010 May::. - - -File: bison.info, Node: Unreachable States, Prev: LAC, Up: Tuning LR - -5.8.4 Unreachable States ------------------------- - -If there exists no sequence of transitions from the parser's start state -to some state S, then Bison considers S to be an “unreachable state”. A -state can become unreachable during conflict resolution if Bison -disables a shift action leading to it from a predecessor state. - - By default, Bison removes unreachable states from the parser after -conflict resolution because they are useless in the generated parser. -However, keeping unreachable states is sometimes useful when trying to -understand the relationship between the parser and the grammar. - - -- Directive: %define lr.keep-unreachable-state VALUE - Request that Bison allow unreachable states to remain in the parser - tables. VALUE must be a Boolean. The default is ‘false’. - - There are a few caveats to consider: - - • Missing or extraneous warnings. - - Unreachable states may contain conflicts and may use rules not used - in any other state. Thus, keeping unreachable states may induce - warnings that are irrelevant to your parser's behavior, and it may - eliminate warnings that are relevant. Of course, the change in - warnings may actually be relevant to a parser table analysis that - wants to keep unreachable states, so this behavior will likely - remain in future Bison releases. - - • Other useless states. - - While Bison is able to remove unreachable states, it is not - guaranteed to remove other kinds of useless states. Specifically, - when Bison disables reduce actions during conflict resolution, some - goto actions may become useless, and thus some additional states - may become useless. If Bison were to compute which goto actions - were useless and then disable those actions, it could identify such - states as unreachable and then remove those states. However, Bison - does not compute which goto actions are useless. - - -File: bison.info, Node: Generalized LR Parsing, Next: Memory Management, Prev: Tuning LR, Up: Algorithm - -5.9 Generalized LR (GLR) Parsing -================================ - -Bison produces _deterministic_ parsers that choose uniquely when to -reduce and which reduction to apply based on a summary of the preceding -input and on one extra token of lookahead. As a result, normal Bison -handles a proper subset of the family of context-free languages. -Ambiguous grammars, since they have strings with more than one possible -sequence of reductions cannot have deterministic parsers in this sense. -The same is true of languages that require more than one symbol of -lookahead, since the parser lacks the information necessary to make a -decision at the point it must be made in a shift/reduce parser. -Finally, as previously mentioned (*note Mysterious Conflicts::), there -are languages where Bison's default choice of how to summarize the input -seen so far loses necessary information. - - When you use the ‘%glr-parser’ declaration in your grammar file, -Bison generates a parser that uses a different algorithm, called -Generalized LR (or GLR). A Bison GLR parser uses the same basic -algorithm for parsing as an ordinary Bison parser, but behaves -differently in cases where there is a shift/reduce conflict that has not -been resolved by precedence rules (*note Precedence::) or a -reduce/reduce conflict. When a GLR parser encounters such a situation, -it effectively _splits_ into a several parsers, one for each possible -shift or reduction. These parsers then proceed as usual, consuming -tokens in lock-step. Some of the stacks may encounter other conflicts -and split further, with the result that instead of a sequence of states, -a Bison GLR parsing stack is what is in effect a tree of states. - - In effect, each stack represents a guess as to what the proper parse -is. Additional input may indicate that a guess was wrong, in which case -the appropriate stack silently disappears. Otherwise, the semantics -actions generated in each stack are saved, rather than being executed -immediately. When a stack disappears, its saved semantic actions never -get executed. When a reduction causes two stacks to become equivalent, -their sets of semantic actions are both saved with the state that -results from the reduction. We say that two stacks are equivalent when -they both represent the same sequence of states, and each pair of -corresponding states represents a grammar symbol that produces the same -segment of the input token stream. - - Whenever the parser makes a transition from having multiple states to -having one, it reverts to the normal deterministic parsing algorithm, -after resolving and executing the saved-up actions. At this transition, -some of the states on the stack will have semantic values that are sets -(actually multisets) of possible actions. The parser tries to pick one -of the actions by first finding one whose rule has the highest dynamic -precedence, as set by the ‘%dprec’ declaration. Otherwise, if the -alternative actions are not ordered by precedence, but there the same -merging function is declared for both rules by the ‘%merge’ declaration, -Bison resolves and evaluates both and then calls the merge function on -the result. Otherwise, it reports an ambiguity. - - It is possible to use a data structure for the GLR parsing tree that -permits the processing of any LR(1) grammar in linear time (in the size -of the input), any unambiguous (not necessarily LR(1)) grammar in -quadratic worst-case time, and any general (possibly ambiguous) -context-free grammar in cubic worst-case time. However, Bison currently -uses a simpler data structure that requires time proportional to the -length of the input times the maximum number of stacks required for any -prefix of the input. Thus, really ambiguous or nondeterministic -grammars can require exponential time and space to process. Such badly -behaving examples, however, are not generally of practical interest. -Usually, nondeterminism in a grammar is local--the parser is "in doubt" -only for a few tokens at a time. Therefore, the current data structure -should generally be adequate. On LR(1) portions of a grammar, in -particular, it is only slightly slower than with the deterministic LR(1) -Bison parser. - - For a more detailed exposition of GLR parsers, see *note Scott -2000::. - - -File: bison.info, Node: Memory Management, Prev: Generalized LR Parsing, Up: Algorithm - -5.10 Memory Management, and How to Avoid Memory Exhaustion -========================================================== - -The Bison parser stack can run out of memory if too many tokens are -shifted and not reduced. When this happens, the parser function -‘yyparse’ calls ‘yyerror’ and then returns 2. - - Because Bison parsers have growing stacks, hitting the upper limit -usually results from using a right recursion instead of a left -recursion, see *note Recursion::. - - By defining the macro ‘YYMAXDEPTH’, you can control how deep the -parser stack can become before memory is exhausted. Define the macro -with a value that is an integer. This value is the maximum number of -tokens that can be shifted (and not reduced) before overflow. - - The stack space allowed is not necessarily allocated. If you specify -a large value for ‘YYMAXDEPTH’, the parser normally allocates a small -stack at first, and then makes it bigger by stages as needed. This -increasing allocation happens automatically and silently. Therefore, -you do not need to make ‘YYMAXDEPTH’ painfully small merely to save -space for ordinary inputs that do not need much stack. - - However, do not allow ‘YYMAXDEPTH’ to be a value so large that -arithmetic overflow could occur when calculating the size of the stack -space. Also, do not allow ‘YYMAXDEPTH’ to be less than ‘YYINITDEPTH’. - - The default value of ‘YYMAXDEPTH’, if you do not define it, is 10000. - - You can control how much stack is allocated initially by defining the -macro ‘YYINITDEPTH’ to a positive integer. For the deterministic parser -in C, this value must be a compile-time constant unless you are assuming -C99 or some other target language or compiler that allows -variable-length arrays. The default is 200. - - Do not allow ‘YYINITDEPTH’ to be greater than ‘YYMAXDEPTH’. - - You can generate a deterministic parser containing C++ user code from -the default (C) skeleton, as well as from the C++ skeleton (*note C++ -Parsers::). However, if you do use the default skeleton and want to -allow the parsing stack to grow, be careful not to use semantic types or -location types that require non-trivial copy constructors. The C -skeleton bypasses these constructors when copying data to new, larger -stacks. - - -File: bison.info, Node: Error Recovery, Next: Context Dependency, Prev: Algorithm, Up: Top - -6 Error Recovery -**************** - -It is not usually acceptable to have a program terminate on a syntax -error. For example, a compiler should recover sufficiently to parse the -rest of the input file and check it for errors; a calculator should -accept another expression. - - In a simple interactive command parser where each input is one line, -it may be sufficient to allow ‘yyparse’ to return 1 on error and have -the caller ignore the rest of the input line when that happens (and then -call ‘yyparse’ again). But this is inadequate for a compiler, because -it forgets all the syntactic context leading up to the error. A syntax -error deep within a function in the compiler input should not cause the -compiler to treat the following line like the beginning of a source -file. - - You can define how to recover from a syntax error by writing rules to -recognize the special token ‘error’. This is a terminal symbol that is -always defined (you need not declare it) and reserved for error -handling. The Bison parser generates an ‘error’ token whenever a syntax -error happens; if you have provided a rule to recognize this token in -the current context, the parse can continue. - - For example: - - stmts: - %empty - | stmts '\n' - | stmts exp '\n' - | stmts error '\n' - - The fourth rule in this example says that an error followed by a -newline makes a valid addition to any ‘stmts’. - - What happens if a syntax error occurs in the middle of an ‘exp’? The -error recovery rule, interpreted strictly, applies to the precise -sequence of a ‘stmts’, an ‘error’ and a newline. If an error occurs in -the middle of an ‘exp’, there will probably be some additional tokens -and subexpressions on the stack after the last ‘stmts’, and there will -be tokens to read before the next newline. So the rule is not -applicable in the ordinary way. - - But Bison can force the situation to fit the rule, by discarding part -of the semantic context and part of the input. First it discards states -and objects from the stack until it gets back to a state in which the -‘error’ token is acceptable. (This means that the subexpressions -already parsed are discarded, back to the last complete ‘stmts’.) At -this point the ‘error’ token can be shifted. Then, if the old lookahead -token is not acceptable to be shifted next, the parser reads tokens and -discards them until it finds a token which is acceptable. In this -example, Bison reads and discards input until the next newline so that -the fourth rule can apply. Note that discarded symbols are possible -sources of memory leaks, see *note Destructor Decl::, for a means to -reclaim this memory. - - The choice of error rules in the grammar is a choice of strategies -for error recovery. A simple and useful strategy is simply to skip the -rest of the current input line or current statement if an error is -detected: - - stmt: error ';' /* On error, skip until ';' is read. */ - - It is also useful to recover to the matching close-delimiter of an -opening-delimiter that has already been parsed. Otherwise the -close-delimiter will probably appear to be unmatched, and generate -another, spurious error message: - - primary: - '(' expr ')' - | '(' error ')' - ... - ; - - Error recovery strategies are necessarily guesses. When they guess -wrong, one syntax error often leads to another. In the above example, -the error recovery rule guesses that an error is due to bad input within -one ‘stmt’. Suppose that instead a spurious semicolon is inserted in -the middle of a valid ‘stmt’. After the error recovery rule recovers -from the first error, another syntax error will be found straight away, -since the text following the spurious semicolon is also an invalid -‘stmt’. - - To prevent an outpouring of error messages, the parser will output no -error message for another syntax error that happens shortly after the -first; only after three consecutive input tokens have been successfully -shifted will error messages resume. - - Note that rules which accept the ‘error’ token may have actions, just -as any other rules can. - - You can make error messages resume immediately by using the macro -‘yyerrok’ in an action. If you do this in the error rule's action, no -error messages will be suppressed. This macro requires no arguments; -‘yyerrok;’ is a valid C statement. - - The previous lookahead token is reanalyzed immediately after an -error. If this is unacceptable, then the macro ‘yyclearin’ may be used -to clear this token. Write the statement ‘yyclearin;’ in the error -rule's action. *Note Action Features::. - - For example, suppose that on a syntax error, an error handling -routine is called that advances the input stream to some point where -parsing should once again commence. The next symbol returned by the -lexical scanner is probably correct. The previous lookahead token ought -to be discarded with ‘yyclearin;’. - - The expression ‘YYRECOVERING ()’ yields 1 when the parser is -recovering from a syntax error, and 0 otherwise. Syntax error -diagnostics are suppressed while recovering from a syntax error. - - -File: bison.info, Node: Context Dependency, Next: Debugging, Prev: Error Recovery, Up: Top - -7 Handling Context Dependencies -******************************* - -The Bison paradigm is to parse tokens first, then group them into larger -syntactic units. In many languages, the meaning of a token is affected -by its context. Although this violates the Bison paradigm, certain -techniques (known as “kludges”) may enable you to write Bison parsers -for such languages. - -* Menu: - -* Semantic Tokens:: Token parsing can depend on the semantic context. -* Lexical Tie-ins:: Token parsing can depend on the syntactic context. -* Tie-in Recovery:: Lexical tie-ins have implications for how - error recovery rules must be written. - - (Actually, "kludge" means any technique that gets its job done but is -neither clean nor robust.) - - -File: bison.info, Node: Semantic Tokens, Next: Lexical Tie-ins, Up: Context Dependency - -7.1 Semantic Info in Token Kinds -================================ - -The C language has a context dependency: the way an identifier is used -depends on what its current meaning is. For example, consider this: - - foo (x); - - This looks like a function call statement, but if ‘foo’ is a typedef -name, then this is actually a declaration of ‘x’. How can a Bison -parser for C decide how to parse this input? - - The method used in GNU C is to have two different token kinds, -‘IDENTIFIER’ and ‘TYPENAME’. When ‘yylex’ finds an identifier, it looks -up the current declaration of the identifier in order to decide which -token kind to return: ‘TYPENAME’ if the identifier is declared as a -typedef, ‘IDENTIFIER’ otherwise. - - The grammar rules can then express the context dependency by the -choice of token kind to recognize. ‘IDENTIFIER’ is accepted as an -expression, but ‘TYPENAME’ is not. ‘TYPENAME’ can start a declaration, -but ‘IDENTIFIER’ cannot. In contexts where the meaning of the -identifier is _not_ significant, such as in declarations that can shadow -a typedef name, either ‘TYPENAME’ or ‘IDENTIFIER’ is accepted--there is -one rule for each of the two token kinds. - - This technique is simple to use if the decision of which kinds of -identifiers to allow is made at a place close to where the identifier is -parsed. But in C this is not always so: C allows a declaration to -redeclare a typedef name provided an explicit type has been specified -earlier: - - typedef int foo, bar; - int baz (void) - { - static bar (bar); /* redeclare ‘bar’ as static variable */ - extern foo foo (foo); /* redeclare ‘foo’ as function */ - return foo (bar); - } - - Unfortunately, the name being declared is separated from the -declaration construct itself by a complicated syntactic structure--the -"declarator". - - As a result, part of the Bison parser for C needs to be duplicated, -with all the nonterminal names changed: once for parsing a declaration -in which a typedef name can be redefined, and once for parsing a -declaration in which that can't be done. Here is a part of the -duplication, with actions omitted for brevity: - - initdcl: - declarator maybeasm '=' init - | declarator maybeasm - ; - - notype_initdcl: - notype_declarator maybeasm '=' init - | notype_declarator maybeasm - ; - -Here ‘initdcl’ can redeclare a typedef name, but ‘notype_initdcl’ -cannot. The distinction between ‘declarator’ and ‘notype_declarator’ is -the same sort of thing. - - There is some similarity between this technique and a lexical tie-in -(described next), in that information which alters the lexical analysis -is changed during parsing by other parts of the program. The difference -is here the information is global, and is used for other purposes in the -program. A true lexical tie-in has a special-purpose flag controlled by -the syntactic context. - - -File: bison.info, Node: Lexical Tie-ins, Next: Tie-in Recovery, Prev: Semantic Tokens, Up: Context Dependency - -7.2 Lexical Tie-ins -=================== - -One way to handle context-dependency is the “lexical tie-in”: a flag -which is set by Bison actions, whose purpose is to alter the way tokens -are parsed. - - For example, suppose we have a language vaguely like C, but with a -special construct ‘hex (HEX-EXPR)’. After the keyword ‘hex’ comes an -expression in parentheses in which all integers are hexadecimal. In -particular, the token ‘a1b’ must be treated as an integer rather than as -an identifier if it appears in that context. Here is how you can do it: - - %{ - int hexflag; - int yylex (void); - void yyerror (char const *); - %} - %% - ... - expr: - IDENTIFIER - | constant - | HEX '(' { hexflag = 1; } - expr ')' { hexflag = 0; $$ = $4; } - | expr '+' expr { $$ = make_sum ($1, $3); } - ... - ; - - constant: - INTEGER - | STRING - ; - -Here we assume that ‘yylex’ looks at the value of ‘hexflag’; when it is -nonzero, all integers are parsed in hexadecimal, and tokens starting -with letters are parsed as integers if possible. - - The declaration of ‘hexflag’ shown in the prologue of the grammar -file is needed to make it accessible to the actions (*note Prologue::). -You must also write the code in ‘yylex’ to obey the flag. - - -File: bison.info, Node: Tie-in Recovery, Prev: Lexical Tie-ins, Up: Context Dependency - -7.3 Lexical Tie-ins and Error Recovery -====================================== - -Lexical tie-ins make strict demands on any error recovery rules you -have. *Note Error Recovery::. - - The reason for this is that the purpose of an error recovery rule is -to abort the parsing of one construct and resume in some larger -construct. For example, in C-like languages, a typical error recovery -rule is to skip tokens until the next semicolon, and then start a new -statement, like this: - - stmt: - expr ';' - | IF '(' expr ')' stmt { ... } - ... - | error ';' { hexflag = 0; } - ; - - If there is a syntax error in the middle of a ‘hex (EXPR)’ construct, -this error rule will apply, and then the action for the completed ‘hex -(EXPR)’ will never run. So ‘hexflag’ would remain set for the entire -rest of the input, or until the next ‘hex’ keyword, causing identifiers -to be misinterpreted as integers. - - To avoid this problem the error recovery rule itself clears -‘hexflag’. - - There may also be an error recovery rule that works within -expressions. For example, there could be a rule which applies within -parentheses and skips to the close-parenthesis: - - expr: - ... - | '(' expr ')' { $$ = $2; } - | '(' error ')' - ... - - If this rule acts within the ‘hex’ construct, it is not going to -abort that construct (since it applies to an inner level of parentheses -within the construct). Therefore, it should not clear the flag: the -rest of the ‘hex’ construct should be parsed with the flag still in -effect. - - What if there is an error recovery rule which might abort out of the -‘hex’ construct or might not, depending on circumstances? There is no -way you can write the action to determine whether a ‘hex’ construct is -being aborted or not. So if you are using a lexical tie-in, you had -better make sure your error recovery rules are not of this kind. Each -rule must be such that you can be sure that it always will, or always -won't, have to clear the flag. - - -File: bison.info, Node: Debugging, Next: Invocation, Prev: Context Dependency, Up: Top - -8 Debugging Your Parser -*********************** - -Developing a parser can be a challenge, especially if you don't -understand the algorithm (*note Algorithm::). This chapter explains how -to understand and debug a parser. - - The most frequent issue users face is solving their conflicts. To -fix them, the first step is understanding how they arise in a given -grammar. This is made much easier by automated generation of -counterexamples, cover in the first section (*note Counterexamples::). - - In most cases though, looking at the structure of the automaton is -still needed. The following sections explain how to generate and read -the detailed structural description of the automaton. There are several -formats available: - − as text, see *note Understanding::; - - − as a graph, see *note Graphviz::; - - − or as a markup report that can be turned, for instance, into HTML, - see *note Xml::. - - The last section focuses on the dynamic part of the parser: how to -enable and understand the parser run-time traces (*note Tracing::). - -* Menu: - -* Counterexamples:: Understanding conflicts. -* Understanding:: Understanding the structure of your parser. -* Graphviz:: Getting a visual representation of the parser. -* Xml:: Getting a markup representation of the parser. -* Tracing:: Tracing the execution of your parser. - - -File: bison.info, Node: Counterexamples, Next: Understanding, Up: Debugging - -8.1 Generation of Counterexamples -================================= - -Solving conflicts is probably the most delicate part of the design of an -LR parser, as demonstrated by the number of sections devoted to them in -this very documentation. To solve a conflict, one must understand it: -when does it occur? Is it because of a flaw in the grammar? Is it -rather because LR(1) cannot cope with this grammar? - - One difficulty is that conflicts occur in the _automaton_, and it can -be tricky to relate them to issues in the _grammar_ itself. With -experience and patience, analysis of the detailed description of the -automaton (*note Understanding::) allows one to find example strings -that reach these conflicts. - - That task is made much easier thanks to the generation of -counterexamples, initially developed by Chinawat Isradisaikul and Andrew -Myers (*note Isradisaikul 2015::). - - As a first example, see the grammar of *note Shift/Reduce::, which -features one shift/reduce conflict: - - $ bison else.y - else.y: warning: 1 shift/reduce conflict [-Wconflicts-sr] - else.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples - -Let's rerun ‘bison’ with the option ‘-Wcex’/‘-Wcounterexamples’(the -following output is actually in color): - - else.y: warning: 1 shift/reduce conflict [-Wconflicts-sr] - else.y: warning: shift/reduce conflict on token "else" [-Wcounterexamples] - Example: "if" expr "then" "if" expr "then" stmt • "else" stmt - Shift derivation - if_stmt - ↳ 3: "if" expr "then" stmt - ↳ 2: if_stmt - ↳ 4: "if" expr "then" stmt • "else" stmt - Example: "if" expr "then" "if" expr "then" stmt • "else" stmt - Reduce derivation - if_stmt - ↳ 4: "if" expr "then" stmt "else" stmt - ↳ 2: if_stmt - ↳ 3: "if" expr "then" stmt • - - This shows two different derivations for one single expression, which -proves that the grammar is ambiguous. - - - As a more delicate example, consider the example grammar of *note -Reduce/Reduce::, which features a reduce/reduce conflict: - - %% - sequence: - %empty - | maybeword - | sequence "word" - ; - maybeword: - %empty - | "word" - ; - - Bison generates the following counterexamples: - - $ bison -Wcex sequence.y - sequence.y: warning: 1 shift/reduce conflict [-Wconflicts-sr] - sequence.y: warning: 2 reduce/reduce conflicts [-Wconflicts-rr] - sequence.y: warning: shift/reduce conflict on token "word" [-Wcounterexamples] - Example: • "word" - Shift derivation - sequence - ↳ 2: maybeword - ↳ 5: • "word" - Example: • "word" - Reduce derivation - sequence - ↳ 3: sequence "word" - ↳ 1: • - sequence.y: warning: reduce/reduce conflict on tokens $end, "word" [-Wcounterexamples] - Example: • - First reduce derivation - sequence - ↳ 1: • - Example: • - Second reduce derivation - sequence - ↳ 2: maybeword - ↳ 4: • - sequence.y: warning: shift/reduce conflict on token "word" [-Wcounterexamples] - Example: • "word" - Shift derivation - sequence - ↳ 2: maybeword - ↳ 5: • "word" - Example: • "word" - Reduce derivation - sequence - ↳ 3: sequence "word" - ↳ 2: maybeword - ↳ 4: • - sequence.y:8.3-45: warning: rule useless in parser due to conflicts [-Wother] - 8 | %empty { printf ("empty maybeword\n"); } - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Each of these three conflicts, again, prove that the grammar is -ambiguous. For instance, the second conflict (the reduce/reduce one) -shows that the grammar accepts the empty input in two different ways. - - - Sometimes, the search will not find an example that can be derived in -two ways. In these cases, counterexample generation will provide two -examples that are the same up until the dot. Most notably, this will -happen when your grammar requires a stronger parser (more lookahead, LR -instead of LALR). The following example isn't LR(1): - - %token ID - %% - s: a ID - a: expr - expr: %empty | expr ID ',' - - ‘bison’ reports: - - ids.y: warning: 1 shift/reduce conflict [-Wconflicts-sr] - ids.y: warning: shift/reduce conflict on token ID [-Wcounterexamples] - First example: expr • ID ',' ID $end - Shift derivation - $accept - ↳ 0: s $end - ↳ 1: a ID - ↳ 2: expr - ↳ 4: expr • ID ',' - Second example: expr • ID $end - Reduce derivation - $accept - ↳ 0: s $end - ↳ 1: a ID - ↳ 2: expr • - ids.y:4.4-7: warning: rule useless in parser due to conflicts [-Wother] - 4 | a: expr - | ^~~~ - - This conflict is caused by the parser not having enough information -to know the difference between these two examples. The parser would -need an additional lookahead token to know whether or not a comma -follows the ‘ID’ after ‘expr’. These types of conflicts tend to be more -difficult to fix, and usually need a rework of the grammar. In this -case, it can be fixed by changing around the recursion: ‘expr: ID | ',' -expr ID’. - - Alternatively, you might also want to consider using a GLR parser -(*note GLR Parsers::). - - - On occasions, it is useful to look at counterexamples _in situ_: with -the automaton report (*Note Understanding::, in particular *note State -8: state-8.). - - -File: bison.info, Node: Understanding, Next: Graphviz, Prev: Counterexamples, Up: Debugging - -8.2 Understanding Your Parser -============================= - -Bison parsers are “shift/reduce automata” (*note Algorithm::). In some -cases (much more frequent than one would hope), looking at this -automaton is required to tune or simply fix a parser. - - The textual file is generated when the options ‘--report’ or -‘--verbose’ are specified, see *note Invocation::. Its name is made by -removing ‘.tab.c’ or ‘.c’ from the parser implementation file name, and -adding ‘.output’ instead. Therefore, if the grammar file is ‘foo.y’, -then the parser implementation file is called ‘foo.tab.c’ by default. -As a consequence, the verbose output file is called ‘foo.output’. - - The following grammar file, ‘calc.y’, will be used in the sequel: - - %union - { - int ival; - const char *sval; - } - %token NUM - %nterm exp - %token STR - %nterm useless - %left '+' '-' - %left '*' - %% - exp: - exp '+' exp - | exp '-' exp - | exp '*' exp - | exp '/' exp - | NUM - ; - useless: STR; - %% - - ‘bison’ reports: - - calc.y: warning: 1 nonterminal useless in grammar [-Wother] - calc.y: warning: 1 rule useless in grammar [-Wother] - calc.y:19.1-7: warning: nonterminal useless in grammar: useless [-Wother] - 19 | useless: STR; - | ^~~~~~~ - calc.y: warning: 7 shift/reduce conflicts [-Wconflicts-sr] - calc.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples - - Going back to the calc example, when given ‘--report=state’, in -addition to ‘calc.tab.c’, it creates a file ‘calc.output’ with contents -detailed below. The order of the output and the exact presentation -might vary, but the interpretation is the same. - -The first section reports useless tokens, nonterminals and rules. -Useless nonterminals and rules are removed in order to produce a smaller -parser, but useless tokens are preserved, since they might be used by -the scanner (note the difference between "useless" and "unused" below): - - Nonterminals useless in grammar - useless - - Terminals unused in grammar - STR - - Rules useless in grammar - 6 useless: STR - -The next section lists states that still have conflicts. - - State 8 conflicts: 1 shift/reduce - State 9 conflicts: 1 shift/reduce - State 10 conflicts: 1 shift/reduce - State 11 conflicts: 4 shift/reduce - -Then Bison reproduces the exact grammar it used: - - Grammar - - 0 $accept: exp $end - - 1 exp: exp '+' exp - 2 | exp '-' exp - 3 | exp '*' exp - 4 | exp '/' exp - 5 | NUM - -and reports the uses of the symbols: - - Terminals, with rules where they appear - - $end (0) 0 - '*' (42) 3 - '+' (43) 1 - '-' (45) 2 - '/' (47) 4 - error (256) - NUM (258) 5 - STR (259) - - Nonterminals, with rules where they appear - - $accept (9) - on left: 0 - exp (10) - on left: 1 2 3 4 5 - on right: 0 1 2 3 4 - -Bison then proceeds onto the automaton itself, describing each state -with its set of “items”, also known as “dotted rules”. Each item is a -production rule together with a point (‘.’) marking the location of the -input cursor. - - State 0 - - 0 $accept: • exp $end - - NUM shift, and go to state 1 - - exp go to state 2 - - This reads as follows: "state 0 corresponds to being at the very -beginning of the parsing, in the initial rule, right before the start -symbol (here, ‘exp’). When the parser returns to this state right after -having reduced a rule that produced an ‘exp’, the control flow jumps to -state 2. If there is no such transition on a nonterminal symbol, and -the lookahead is a ‘NUM’, then this token is shifted onto the parse -stack, and the control flow jumps to state 1. Any other lookahead -triggers a syntax error." - - Even though the only active rule in state 0 seems to be rule 0, the -report lists ‘NUM’ as a lookahead token because ‘NUM’ can be at the -beginning of any rule deriving an ‘exp’. By default Bison reports the -so-called “core” or “kernel” of the item set, but if you want to see -more detail you can invoke ‘bison’ with ‘--report=itemset’ to list the -derived items as well: - - State 0 - - 0 $accept: • exp $end - 1 exp: • exp '+' exp - 2 | • exp '-' exp - 3 | • exp '*' exp - 4 | • exp '/' exp - 5 | • NUM - - NUM shift, and go to state 1 - - exp go to state 2 - -In the state 1... - - State 1 - - 5 exp: NUM • - - $default reduce using rule 5 (exp) - -the rule 5, ‘exp: NUM;’, is completed. Whatever the lookahead token -(‘$default’), the parser will reduce it. If it was coming from State 0, -then, after this reduction it will return to state 0, and will jump to -state 2 (‘exp: go to state 2’). - - State 2 - - 0 $accept: exp • $end - 1 exp: exp • '+' exp - 2 | exp • '-' exp - 3 | exp • '*' exp - 4 | exp • '/' exp - - $end shift, and go to state 3 - '+' shift, and go to state 4 - '-' shift, and go to state 5 - '*' shift, and go to state 6 - '/' shift, and go to state 7 - -In state 2, the automaton can only shift a symbol. For instance, -because of the item ‘exp: exp • '+' exp’, if the lookahead is ‘+’ it is -shifted onto the parse stack, and the automaton jumps to state 4, -corresponding to the item ‘exp: exp '+' • exp’. Since there is no -default action, any lookahead not listed triggers a syntax error. - - The state 3 is named the “final state”, or the “accepting state”: - - State 3 - - 0 $accept: exp $end • - - $default accept - -the initial rule is completed (the start symbol and the end-of-input -were read), the parsing exits successfully. - - The interpretation of states 4 to 7 is straightforward, and is left -to the reader. - - State 4 - - 1 exp: exp '+' • exp - - NUM shift, and go to state 1 - - exp go to state 8 - - - State 5 - - 2 exp: exp '-' • exp - - NUM shift, and go to state 1 - - exp go to state 9 - - - State 6 - - 3 exp: exp '*' • exp - - NUM shift, and go to state 1 - - exp go to state 10 - - - State 7 - - 4 exp: exp '/' • exp - - NUM shift, and go to state 1 - - exp go to state 11 - - As was announced in beginning of the report, ‘State 8 conflicts: 1 -shift/reduce’: - - State 8 - - 1 exp: exp • '+' exp - 1 | exp '+' exp • - 2 | exp • '-' exp - 3 | exp • '*' exp - 4 | exp • '/' exp - - '*' shift, and go to state 6 - '/' shift, and go to state 7 - - '/' [reduce using rule 1 (exp)] - $default reduce using rule 1 (exp) - - Indeed, there are two actions associated to the lookahead ‘/’: either -shifting (and going to state 7), or reducing rule 1. The conflict means -that either the grammar is ambiguous, or the parser lacks information to -make the right decision. Indeed the grammar is ambiguous, as, since we -did not specify the precedence of ‘/’, the sentence ‘NUM + NUM / NUM’ -can be parsed as ‘NUM + (NUM / NUM)’, which corresponds to shifting ‘/’, -or as ‘(NUM + NUM) / NUM’, which corresponds to reducing rule 1. - - Because in deterministic parsing a single decision can be made, Bison -arbitrarily chose to disable the reduction, see *note Shift/Reduce::. -Discarded actions are reported between square brackets. - - Note that all the previous states had a single possible action: -either shifting the next token and going to the corresponding state, or -reducing a single rule. In the other cases, i.e., when shifting _and_ -reducing is possible or when _several_ reductions are possible, the -lookahead is required to select the action. State 8 is one such state: -if the lookahead is ‘*’ or ‘/’ then the action is shifting, otherwise -the action is reducing rule 1. In other words, the first two items, -corresponding to rule 1, are not eligible when the lookahead token is -‘*’, since we specified that ‘*’ has higher precedence than ‘+’. More -generally, some items are eligible only with some set of possible -lookahead tokens. When run with ‘--report=lookahead’, Bison specifies -these lookahead tokens: - - State 8 - - 1 exp: exp • '+' exp - 1 | exp '+' exp • [$end, '+', '-', '/'] - 2 | exp • '-' exp - 3 | exp • '*' exp - 4 | exp • '/' exp - - '*' shift, and go to state 6 - '/' shift, and go to state 7 - - '/' [reduce using rule 1 (exp)] - $default reduce using rule 1 (exp) - - Note however that while ‘NUM + NUM / NUM’ is ambiguous (which results -in the conflicts on ‘/’), ‘NUM + NUM * NUM’ is not: the conflict was -solved thanks to associativity and precedence directives. If invoked -with ‘--report=solved’, Bison includes information about the solved -conflicts in the report: - - Conflict between rule 1 and token '+' resolved as reduce (%left '+'). - Conflict between rule 1 and token '-' resolved as reduce (%left '-'). - Conflict between rule 1 and token '*' resolved as shift ('+' < '*'). - - When given ‘--report=counterexamples’, ‘bison’ will generate -counterexamples within the report, augmented with the corresponding -items (*note Counterexamples::). - - shift/reduce conflict on token '/': - 1 exp: exp '+' exp • - 4 exp: exp • '/' exp - Example: exp '+' exp • '/' exp - Shift derivation - exp - ↳ 1: exp '+' exp - ↳ 4: exp • '/' exp - Example: exp '+' exp • '/' exp - Reduce derivation - exp - ↳ 4: exp '/' exp - ↳ 1: exp '+' exp • - - This shows two separate derivations in the grammar for the same -‘exp’: ‘e1 + e2 / e3’. The derivations show how your rules would parse -the given example. Here, the first derivation completes a reduction -when seeing ‘/’, causing ‘e1 + e2’ to be grouped as an ‘exp’. The -second derivation shifts on ‘/’, resulting in ‘e2 / e3’ being grouped as -an ‘exp’. Therefore, it is easy to see that adding -precedence/associativity directives would fix this conflict. - - The remaining states are similar: - - State 9 - - 1 exp: exp • '+' exp - 2 | exp • '-' exp - 2 | exp '-' exp • - 3 | exp • '*' exp - 4 | exp • '/' exp - - '*' shift, and go to state 6 - '/' shift, and go to state 7 - - '/' [reduce using rule 2 (exp)] - $default reduce using rule 2 (exp) - - State 10 - - 1 exp: exp • '+' exp - 2 | exp • '-' exp - 3 | exp • '*' exp - 3 | exp '*' exp • - 4 | exp • '/' exp - - '/' shift, and go to state 7 - - '/' [reduce using rule 3 (exp)] - $default reduce using rule 3 (exp) - - State 11 - - 1 exp: exp • '+' exp - 2 | exp • '-' exp - 3 | exp • '*' exp - 4 | exp • '/' exp - 4 | exp '/' exp • - - '+' shift, and go to state 4 - '-' shift, and go to state 5 - '*' shift, and go to state 6 - '/' shift, and go to state 7 - - '+' [reduce using rule 4 (exp)] - '-' [reduce using rule 4 (exp)] - '*' [reduce using rule 4 (exp)] - '/' [reduce using rule 4 (exp)] - $default reduce using rule 4 (exp) - -Observe that state 11 contains conflicts not only due to the lack of -precedence of ‘/’ with respect to ‘+’, ‘-’, and ‘*’, but also because -the associativity of ‘/’ is not specified. - - Bison may also produce an HTML version of this output, via an XML -file and XSLT processing (*note Xml::). - - -File: bison.info, Node: Graphviz, Next: Xml, Prev: Understanding, Up: Debugging - -8.3 Visualizing Your Parser -=========================== - -As another means to gain better understanding of the shift/reduce -automaton corresponding to the Bison parser, a DOT file can be -generated. Note that debugging a real grammar with this is tedious at -best, and impractical most of the times, because the generated files are -huge (the generation of a PDF or PNG file from it will take very long, -and more often than not it will fail due to memory exhaustion). This -option was rather designed for beginners, to help them understand LR -parsers. - - This file is generated when the ‘--graph’ option is specified (*note -Invocation::). Its name is made by removing ‘.tab.c’ or ‘.c’ from the -parser implementation file name, and adding ‘.gv’ instead. If the -grammar file is ‘foo.y’, the Graphviz output file is called ‘foo.gv’. A -DOT file may also be produced via an XML file and XSLT processing (*note -Xml::). - - The following grammar file, ‘rr.y’, will be used in the sequel: - - %% - exp: a ";" | b "."; - a: "0"; - b: "0"; - - The graphical output is very similar to the textual one, and as such -it is easier understood by making direct comparisons between them. -*Note Debugging::, for a detailed analysis of the textual report. - -Graphical Representation of States ----------------------------------- - -The items (dotted rules) for each state are grouped together in graph -nodes. Their numbering is the same as in the verbose file. See the -following points, about transitions, for examples - - When invoked with ‘--report=lookaheads’, the lookahead tokens, when -needed, are shown next to the relevant rule between square brackets as a -comma separated list. This is the case in the figure for the -representation of reductions, below. - - - The transitions are represented as directed edges between the current -and the target states. - -Graphical Representation of Shifts ----------------------------------- - -Shifts are shown as solid arrows, labeled with the lookahead token for -that shift. The following describes a reduction in the ‘rr.output’ -file: - - State 3 - - 1 exp: a • ";" - - ";" shift, and go to state 6 - - A Graphviz rendering of this portion of the graph could be: - -[image src="figs/example-shift.svg" text=".----------------. -| State 3 | -| 1 exp: a • \";\" | -`----------------' - | - | \";\" - | - v -.----------------. -| State 6 | -| 1 exp: a \";\" • | -`----------------'"] - -Graphical Representation of Reductions --------------------------------------- - -Reductions are shown as solid arrows, leading to a diamond-shaped node -bearing the number of the reduction rule. The arrow is labeled with the -appropriate comma separated lookahead tokens. If the reduction is the -default action for the given state, there is no such label. - - This is how reductions are represented in the verbose file -‘rr.output’: - State 1 - - 3 a: "0" • [";"] - 4 b: "0" • ["."] - - "." reduce using rule 4 (b) - $default reduce using rule 3 (a) - - A Graphviz rendering of this portion of the graph could be: - -[image src="figs/example-reduce.svg" text=" .------------------. - | State 1 | - | 3 a: \"0\" • [\";\"] | - | 4 b: \"0\" • [\".\"] | - `------------------' - / \\ - / \\ [\".\"] - / \\ - v v - / \\ / \\ - / R \\ / R \\ -(green) \\ 3 / \\ 4 / (green) - \\ / \\ /"] - - When unresolved conflicts are present, because in deterministic -parsing a single decision can be made, Bison can arbitrarily choose to -disable a reduction, see *note Shift/Reduce::. Discarded actions are -distinguished by a red filling color on these nodes, just like how they -are reported between square brackets in the verbose file. - - The reduction corresponding to the rule number 0 is the acceptation -state. It is shown as a blue diamond, labeled "Acc". - -Graphical Representation of Gotos ---------------------------------- - -The ‘go to’ jump transitions are represented as dotted lines bearing the -name of the rule being jumped to. - - -File: bison.info, Node: Xml, Next: Tracing, Prev: Graphviz, Up: Debugging - -8.4 Visualizing your parser in multiple formats -=============================================== - -Bison supports two major report formats: textual output (*note -Understanding::) when invoked with option ‘--verbose’, and DOT (*note -Graphviz::) when invoked with option ‘--graph’. However, another -alternative is to output an XML file that may then be, with ‘xsltproc’, -rendered as either a raw text format equivalent to the verbose file, or -as an HTML version of the same file, with clickable transitions, or even -as a DOT. The ‘.output’ and DOT files obtained via XSLT have no -difference whatsoever with those obtained by invoking ‘bison’ with -options ‘--verbose’ or ‘--graph’. - - The XML file is generated when the options ‘-x’ or ‘--xml[=FILE]’ are -specified, see *note Invocation::. If not specified, its name is made -by removing ‘.tab.c’ or ‘.c’ from the parser implementation file name, -and adding ‘.xml’ instead. For instance, if the grammar file is -‘foo.y’, the default XML output file is ‘foo.xml’. - - Bison ships with a ‘data/xslt’ directory, containing XSL -Transformation files to apply to the XML file. Their names are -non-ambiguous: - -‘xml2dot.xsl’ - Used to output a copy of the DOT visualization of the automaton. -‘xml2text.xsl’ - Used to output a copy of the ‘.output’ file. -‘xml2xhtml.xsl’ - Used to output an xhtml enhancement of the ‘.output’ file. - - Sample usage (requires ‘xsltproc’): - $ bison -x gr.y - $ bison --print-datadir - /usr/local/share/bison - $ xsltproc /usr/local/share/bison/xslt/xml2xhtml.xsl gr.xml >gr.html - - -File: bison.info, Node: Tracing, Prev: Xml, Up: Debugging - -8.5 Tracing Your Parser -======================= - -When a Bison grammar compiles properly but parses "incorrectly", the -‘yydebug’ parser-trace feature helps figuring out why. - -* Menu: - -* Enabling Traces:: Activating run-time trace support -* Mfcalc Traces:: Extending ‘mfcalc’ to support traces - - -File: bison.info, Node: Enabling Traces, Next: Mfcalc Traces, Up: Tracing - -8.5.1 Enabling Traces ---------------------- - -There are several means to enable compilation of trace facilities, in -decreasing order of preference: - -the variable ‘parse.trace’ - Add the ‘%define parse.trace’ directive (*note %define Summary::), - or pass the ‘-Dparse.trace’ option (*note Tuning the Parser::). - This is a Bison extension. Unless POSIX and Yacc portability - matter to you, this is the preferred solution. - -the option ‘-t’ (POSIX Yacc compliant) -the option ‘--debug’ (Bison extension) - Use the ‘-t’ option when you run Bison (*note Invocation::). With - ‘%define api.prefix {c}’, it defines ‘CDEBUG’ to 1, otherwise it - defines ‘YYDEBUG’ to 1. - -the directive ‘%debug’ (deprecated) - Add the ‘%debug’ directive (*note Decl Summary::). This Bison - extension is maintained for backward compatibility; use ‘%define - parse.trace’ instead. - -the macro ‘YYDEBUG’ (C/C++ only) - Define the macro ‘YYDEBUG’ to a nonzero value when you compile the - parser. This is compliant with POSIX Yacc. You could use - ‘-DYYDEBUG=1’ as a compiler option or you could put ‘#define - YYDEBUG 1’ in the prologue of the grammar file (*note Prologue::). - - If the ‘%define’ variable ‘api.prefix’ is used (*note Multiple - Parsers::), for instance ‘%define api.prefix {c}’, then if ‘CDEBUG’ - is defined, its value controls the tracing feature (enabled if and - only if nonzero); otherwise tracing is enabled if and only if - ‘YYDEBUG’ is nonzero. - - In C++, where POSIX compliance makes no sense, avoid this option, - and prefer ‘%define parse.trace’. If you ‘#define’ the ‘YYDEBUG’ - macro at the wrong place (e.g., in ‘%code top’ instead of ‘%code - require’), the parser class will have two different definitions, - thus leading to ODR violations and happy debugging times. - - We suggest that you always enable the trace option so that debugging -is always possible. - - In C the trace facility outputs messages with macro calls of the form -‘YYFPRINTF (stderr, FORMAT, ARGS)’ where FORMAT and ARGS are the usual -‘printf’ format and variadic arguments. If you define ‘YYDEBUG’ to a -nonzero value but do not define ‘YYFPRINTF’, ‘’ is -automatically included and ‘YYFPRINTF’ is defined to ‘fprintf’. - - Once you have compiled the program with trace facilities, the way to -request a trace is to store a nonzero value in the variable ‘yydebug’. -You can do this by making the C code do it (in ‘main’, perhaps), or you -can alter the value with a C debugger. - - Each step taken by the parser when ‘yydebug’ is nonzero produces a -line or two of trace information, written on ‘stderr’. The trace -messages tell you these things: - - • Each time the parser calls ‘yylex’, what kind of token was read. - - • Each time a token is shifted, the depth and complete contents of - the state stack (*note Parser States::). - - • Each time a rule is reduced, which rule it is, and the complete - contents of the state stack afterward. - - To make sense of this information, it helps to refer to the automaton -description file (*note Understanding::). This file shows the meaning -of each state in terms of positions in various rules, and also what each -state will do with each possible input token. As you read the -successive trace messages, you can see that the parser is functioning -according to its specification in the listing file. Eventually you will -arrive at the place where something undesirable happens, and you will -see which parts of the grammar are to blame. - - The parser implementation file is a C/C++/D/Java program and you can -use debuggers on it, but it's not easy to interpret what it is doing. -The parser function is a finite-state machine interpreter, and aside -from the actions it executes the same code over and over. Only the -values of variables show where in the grammar it is working. - - -File: bison.info, Node: Mfcalc Traces, Prev: Enabling Traces, Up: Tracing - -8.5.2 Enabling Debug Traces for ‘mfcalc’ ----------------------------------------- - -The debugging information normally gives the token kind of each token -read, but not its semantic value. The ‘%printer’ directive allows -specify how semantic values are reported, see *note Printer Decl::. - - As a demonstration of ‘%printer’, consider the multi-function -calculator, ‘mfcalc’ (*note Multi-function Calc::). To enable run-time -traces, and semantic value reports, insert the following directives in -its prologue: - - /* Generate the parser description file. */ - %verbose - /* Enable run-time traces (yydebug). */ - %define parse.trace - - /* Formatting semantic values. */ - %printer { fprintf (yyo, "%s", $$->name); } VAR; - %printer { fprintf (yyo, "%s()", $$->name); } FUN; - %printer { fprintf (yyo, "%g", $$); } ; - - The ‘%define’ directive instructs Bison to generate run-time trace -support. Then, activation of these traces is controlled at run-time by -the ‘yydebug’ variable, which is disabled by default. Because these -traces will refer to the "states" of the parser, it is helpful to ask -for the creation of a description of that parser; this is the purpose of -(admittedly ill-named) ‘%verbose’ directive. - - The set of ‘%printer’ directives demonstrates how to format the -semantic value in the traces. Note that the specification can be done -either on the symbol type (e.g., ‘VAR’ or ‘FUN’), or on the type tag: -since ‘’ is the type for both ‘NUM’ and ‘exp’, this printer will -be used for them. - - Here is a sample of the information provided by run-time traces. The -traces are sent onto standard error. - - $ echo 'sin(1-1)' | ./mfcalc -p - Starting parse - Entering state 0 - Reducing stack by rule 1 (line 34): - -> $$ = nterm input () - Stack now 0 - Entering state 1 - -This first batch shows a specific feature of this grammar: the first -rule (which is in line 34 of ‘mfcalc.y’ can be reduced without even -having to look for the first token. The resulting left-hand symbol -(‘$$’) is a valueless (‘()’) ‘input’ nonterminal (‘nterm’). - - Then the parser calls the scanner. - Reading a token - Next token is token FUN (sin()) - Shifting token FUN (sin()) - Entering state 6 - -That token (‘token’) is a function (‘FUN’) whose value is ‘sin’ as -formatted per our ‘%printer’ specification: ‘sin()’. The parser stores -(‘Shifting’) that token, and others, until it can do something about it. - - Reading a token - Next token is token '(' () - Shifting token '(' () - Entering state 14 - Reading a token - Next token is token NUM (1.000000) - Shifting token NUM (1.000000) - Entering state 4 - Reducing stack by rule 6 (line 44): - $1 = token NUM (1.000000) - -> $$ = nterm exp (1.000000) - Stack now 0 1 6 14 - Entering state 24 - -The previous reduction demonstrates the ‘%printer’ directive for -‘’: both the token ‘NUM’ and the resulting nonterminal ‘exp’ -have ‘1’ as value. - - Reading a token - Next token is token '-' () - Shifting token '-' () - Entering state 17 - Reading a token - Next token is token NUM (1.000000) - Shifting token NUM (1.000000) - Entering state 4 - Reducing stack by rule 6 (line 44): - $1 = token NUM (1.000000) - -> $$ = nterm exp (1.000000) - Stack now 0 1 6 14 24 17 - Entering state 26 - Reading a token - Next token is token ')' () - Reducing stack by rule 11 (line 49): - $1 = nterm exp (1.000000) - $2 = token '-' () - $3 = nterm exp (1.000000) - -> $$ = nterm exp (0.000000) - Stack now 0 1 6 14 - Entering state 24 - -The rule for the subtraction was just reduced. The parser is about to -discover the end of the call to ‘sin’. - - Next token is token ')' () - Shifting token ')' () - Entering state 31 - Reducing stack by rule 9 (line 47): - $1 = token FUN (sin()) - $2 = token '(' () - $3 = nterm exp (0.000000) - $4 = token ')' () - -> $$ = nterm exp (0.000000) - Stack now 0 1 - Entering state 11 - -Finally, the end-of-line allow the parser to complete the computation, -and display its result. - - Reading a token - Next token is token '\n' () - Shifting token '\n' () - Entering state 22 - Reducing stack by rule 4 (line 40): - $1 = nterm exp (0.000000) - $2 = token '\n' () - ⇒ 0 - -> $$ = nterm line () - Stack now 0 1 - Entering state 10 - Reducing stack by rule 2 (line 35): - $1 = nterm input () - $2 = nterm line () - -> $$ = nterm input () - Stack now 0 - Entering state 1 - - The parser has returned into state 1, in which it is waiting for the -next expression to evaluate, or for the end-of-file token, which causes -the completion of the parsing. - - Reading a token - Now at end of input. - Shifting token $end () - Entering state 2 - Stack now 0 1 2 - Cleanup: popping token $end () - Cleanup: popping nterm input () - - -File: bison.info, Node: Invocation, Next: Other Languages, Prev: Debugging, Up: Top - -9 Invoking Bison -**************** - -The usual way to invoke Bison is as follows: - - $ bison FILE - - Here FILE is the grammar file name, which usually ends in ‘.y’. The -parser implementation file's name is made by replacing the ‘.y’ with -‘.tab.c’ and removing any leading directory. Thus, the ‘bison foo.y’ -file name yields ‘foo.tab.c’, and the ‘bison hack/foo.y’ file name -yields ‘foo.tab.c’. It's also possible, in case you are writing C++ -code instead of C in your grammar file, to name it ‘foo.ypp’ or -‘foo.y++’. Then, the output files will take an extension like the given -one as input (respectively ‘foo.tab.cpp’ and ‘foo.tab.c++’). This -feature takes effect with all options that manipulate file names like -‘-o’ or ‘-d’. - - For example: - - $ bison -d FILE.YXX -will produce ‘file.tab.cxx’ and ‘file.tab.hxx’, and - - $ bison -d -o OUTPUT.C++ FILE.Y -will produce ‘output.c++’ and ‘output.h++’. - - For compatibility with POSIX, the standard Bison distribution also -contains a shell script called ‘yacc’ that invokes Bison with the ‘-y’ -option. - - - The exit status of ‘bison’ is: -0 (success) - when there were no errors. Warnings, which are diagnostics about - dubious constructs, do not change the exit status, unless they are - turned into errors (*note ‘-Werror’: Werror.). - -1 (failure) - when there were errors. No file was generated (except the reports - generated by ‘--verbose’, etc.). In particular, the output files - that possibly existed were not changed. - -63 (mismatch) - when ‘bison’ does not meet the version requirements of the grammar - file. *Note Require Decl::. No file was generated or changed. - -* Menu: - -* Bison Options:: All the options described in detail, - in alphabetical order by short options. -* Option Cross Key:: Alphabetical list of long options. -* Yacc Library:: Yacc-compatible ‘yylex’ and ‘main’. - - -File: bison.info, Node: Bison Options, Next: Option Cross Key, Up: Invocation - -9.1 Bison Options -================= - -Bison supports both traditional single-letter options and mnemonic long -option names. Long option names are indicated with ‘--’ instead of ‘-’. -Abbreviations for option names are allowed as long as they are unique. -When a long option takes an argument, like ‘--file-prefix’, connect the -option name and the argument with ‘=’. - - Here is a list of options that can be used with Bison. It is -followed by a cross key alphabetized by long option. - -* Menu: - -* Operation Modes:: Options controlling the global behavior of ‘bison’ -* Diagnostics:: Options controlling the diagnostics -* Tuning the Parser:: Options changing the generated parsers -* Output Files:: Options controlling the output - - -File: bison.info, Node: Operation Modes, Next: Diagnostics, Up: Bison Options - -9.1.1 Operation Modes ---------------------- - -Options controlling the global behavior of ‘bison’. - -‘-h’ -‘--help’ - Print a summary of the command-line options to Bison and exit. - -‘-V’ -‘--version’ - Print the version number of Bison and exit. - -‘--print-localedir’ - Print the name of the directory containing locale-dependent data. - -‘--print-datadir’ - Print the name of the directory containing skeletons, CSS and XSLT. - -‘-u’ -‘--update’ - Update the grammar file (remove duplicates, update deprecated - directives, etc.) and exit (i.e., do not generate any of the - output files). Leaves a backup of the original file with a ‘~’ - appended. For instance: - - $ cat foo.y - %error-verbose - %define parse.error verbose - %% - exp:; - $ bison -u foo.y - foo.y:1.1-14: warning: deprecated directive, use '%define parse.error verbose' [-Wdeprecated] - 1 | %error-verbose - | ^~~~~~~~~~~~~~ - foo.y:2.1-27: warning: %define variable 'parse.error' redefined [-Wother] - 2 | %define parse.error verbose - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ - foo.y:1.1-14: previous definition - 1 | %error-verbose - | ^~~~~~~~~~~~~~ - bison: file 'foo.y' was updated (backup: 'foo.y~') - $ cat foo.y - %define parse.error verbose - %% - exp:; - - See the documentation of ‘--feature=fixit’ below for more details. - -‘-f [FEATURE]’ -‘--feature[=FEATURE]’ - Activate miscellaneous FEATUREs. FEATURE can be one of: - ‘caret’ - ‘diagnostics-show-caret’ - Show caret errors, in a manner similar to GCC's - ‘-fdiagnostics-show-caret’, or Clang's ‘-fcaret-diagnostics’. - The location provided with the message is used to quote the - corresponding line of the source file, underlining the - important part of it with carets (‘^’). Here is an example, - using the following file ‘in.y’: - - %nterm exp - %% - exp: exp '+' exp { $exp = $1 + $2; }; - - When invoked with ‘-fcaret’ (or nothing), Bison will report: - - in.y:3.20-23: error: ambiguous reference: '$exp' - 3 | exp: exp '+' exp { $exp = $1 + $2; }; - | ^~~~ - in.y:3.1-3: refers to: $exp at $$ - 3 | exp: exp '+' exp { $exp = $1 + $2; }; - | ^~~ - in.y:3.6-8: refers to: $exp at $1 - 3 | exp: exp '+' exp { $exp = $1 + $2; }; - | ^~~ - in.y:3.14-16: refers to: $exp at $3 - 3 | exp: exp '+' exp { $exp = $1 + $2; }; - | ^~~ - in.y:3.32-33: error: $2 of 'exp' has no declared type - 3 | exp: exp '+' exp { $exp = $1 + $2; }; - | ^~ - - Whereas, when invoked with ‘-fno-caret’, Bison will only - report: - - in.y:3.20-23: error: ambiguous reference: '$exp' - in.y:3.1-3: refers to: $exp at $$ - in.y:3.6-8: refers to: $exp at $1 - in.y:3.14-16: refers to: $exp at $3 - in.y:3.32-33: error: $2 of 'exp' has no declared type - - This option is activated by default. - - ‘fixit’ - ‘diagnostics-parseable-fixits’ - Show machine-readable fixes, in a manner similar to GCC's and - Clang's ‘-fdiagnostics-parseable-fixits’. - - Fix-its are generated for duplicate directives: - - $ cat foo.y - %define api.prefix {foo} - %define api.prefix {bar} - %% - exp:; - - $ bison -ffixit foo.y - foo.y:2.1-24: error: %define variable 'api.prefix' redefined - 2 | %define api.prefix {bar} - | ^~~~~~~~~~~~~~~~~~~~~~~~ - foo.y:1.1-24: previous definition - 1 | %define api.prefix {foo} - | ^~~~~~~~~~~~~~~~~~~~~~~~ - fix-it:"foo.y":{2:1-2:25}:"" - foo.y: warning: fix-its can be applied. Rerun with option '--update'. [-Wother] - - They are also generated to update deprecated directives, - unless ‘-Wno-deprecated’ was given: - - $ cat /tmp/foo.yy - %error-verbose - %name-prefix "foo" - %% - exp:; - $ bison foo.y - foo.y:1.1-14: warning: deprecated directive, use '%define parse.error verbose' [-Wdeprecated] - 1 | %error-verbose - | ^~~~~~~~~~~~~~ - foo.y:2.1-18: warning: deprecated directive, use '%define api.prefix {foo}' [-Wdeprecated] - 2 | %name-prefix "foo" - | ^~~~~~~~~~~~~~~~~~ - foo.y: warning: fix-its can be applied. Rerun with option '--update'. [-Wother] - - The fix-its are applied by ‘bison’ itself when given the - option ‘-u’/‘--update’. See its documentation above. - - ‘syntax-only’ - Do not generate the output files. The name of this feature is - somewhat misleading as more than just checking the syntax is - done: every stage is run (including checking for conflicts for - instance), except the generation of the output files. - - -File: bison.info, Node: Diagnostics, Next: Tuning the Parser, Prev: Operation Modes, Up: Bison Options - -9.1.2 Diagnostics ------------------ - -Options controlling the diagnostics. - -‘-W [CATEGORY]’ -‘--warnings[=CATEGORY]’ - Output warnings falling in CATEGORY. CATEGORY can be one of: - ‘conflicts-sr’ - ‘conflicts-rr’ - S/R and R/R conflicts. These warnings are enabled by default. - However, if the ‘%expect’ or ‘%expect-rr’ directive is - specified, an unexpected number of conflicts is an error, and - an expected number of conflicts is not reported, so ‘-W’ and - ‘--warning’ then have no effect on the conflict report. - - ‘counterexamples’ - ‘cex’ - Provide counterexamples for conflicts. *Note - Counterexamples::. Counterexamples take time to compute. The - option ‘-Wcex’ should be used by the developer when working on - the grammar; it hardly makes sense to use it in a CI. - - ‘dangling-alias’ - Report string literals that are not bound to a token symbol. - - String literals, which allow for better error messages, are - (too) liberally accepted by Bison, which might result in - silent errors. For instance - - %type cond "condition" - - does not define "condition" as a string alias to - ‘cond’--nonterminal symbols do not have string aliases. It is - rather equivalent to - - %nterm cond - %token "condition" - - i.e., it gives the ‘"condition"’ token the type ‘exVal’. - - Also, because string aliases do not need to be defined, typos - such as ‘"baz"’ instead of ‘"bar"’ will be not reported. - - The option ‘-Wdangling-alias’ catches these situations. On - - %token BAR "bar" - %type foo "foo" - %% - foo: "baz" {} - - ‘bison -Wdangling-alias’ reports - - warning: string literal not attached to a symbol - | %type foo "foo" - | ^~~~~ - warning: string literal not attached to a symbol - | foo: "baz" {} - | ^~~~~ - - ‘deprecated’ - Deprecated constructs whose support will be removed in future - versions of Bison. - - ‘empty-rule’ - Empty rules without ‘%empty’. *Note Empty Rules::. Disabled - by default, but enabled by uses of ‘%empty’, unless - ‘-Wno-empty-rule’ was specified. - - ‘midrule-values’ - Warn about midrule values that are set but not used within any - of the actions of the parent rule. For example, warn about - unused ‘$2’ in: - - exp: '1' { $$ = 1; } '+' exp { $$ = $1 + $4; }; - - Also warn about midrule values that are used but not set. For - example, warn about unset ‘$$’ in the midrule action in: - - exp: '1' { $1 = 1; } '+' exp { $$ = $2 + $4; }; - - These warnings are not enabled by default since they sometimes - prove to be false alarms in existing grammars employing the - Yacc constructs ‘$0’ or ‘$-N’ (where N is some positive - integer). - - ‘precedence’ - Useless precedence and associativity directives. Disabled by - default. - - Consider for instance the following grammar: - - %nonassoc "=" - %left "+" - %left "*" - %precedence "(" - %% - stmt: - exp - | "var" "=" exp - ; - - exp: - exp "+" exp - | exp "*" "number" - | "(" exp ")" - | "number" - ; - - Bison reports: - - warning: useless precedence and associativity for "=" - | %nonassoc "=" - | ^~~ - warning: useless associativity for "*", use %precedence - | %left "*" - | ^~~ - warning: useless precedence for "(" - | %precedence "(" - | ^~~ - - One would get the exact same parser with the following - directives instead: - - %left "+" - %precedence "*" - - ‘yacc’ - Incompatibilities with POSIX Yacc. - - ‘other’ - All warnings not categorized above. These warnings are - enabled by default. - - This category is provided merely for the sake of completeness. - Future releases of Bison may move warnings from this category - to new, more specific categories. - - ‘all’ - All the warnings except ‘counterexamples’, ‘dangling-alias’ - and ‘yacc’. - - ‘none’ - Turn off all the warnings. - - ‘error’ - See ‘-Werror’, below. - - A category can be turned off by prefixing its name with ‘no-’. For - instance, ‘-Wno-yacc’ will hide the warnings about POSIX Yacc - incompatibilities. - -‘-Werror’ - Turn enabled warnings for every CATEGORY into errors, unless they - are explicitly disabled by ‘-Wno-error=CATEGORY’. - -‘-Werror=CATEGORY’ - Enable warnings falling in CATEGORY, and treat them as errors. - - CATEGORY is the same as for ‘--warnings’, with the exception that - it may not be prefixed with ‘no-’ (see above). - - Note that the precedence of the ‘=’ and ‘,’ operators is such that - the following commands are _not_ equivalent, as the first will not - treat S/R conflicts as errors. - - $ bison -Werror=yacc,conflicts-sr input.y - $ bison -Werror=yacc,error=conflicts-sr input.y - -‘-Wno-error’ - Do not turn enabled warnings for every CATEGORY into errors, unless - they are explicitly enabled by ‘-Werror=CATEGORY’. - -‘-Wno-error=CATEGORY’ - Deactivate the error treatment for this CATEGORY. However, the - warning itself won't be disabled, or enabled, by this option. - -‘--color’ - Equivalent to ‘--color=always’. - -‘--color=WHEN’ - Control whether diagnostics are colorized, depending on WHEN: - ‘always’ - ‘yes’ - Enable colorized diagnostics. - - ‘never’ - ‘no’ - Disable colorized diagnostics. - - ‘auto (default)’ - ‘tty’ - Diagnostics will be colorized if the output device is a tty, - i.e. when the output goes directly to a text screen or - terminal emulator window. - -‘--style=FILE’ - Specifies the CSS style FILE to use when colorizing. It has an - effect only when the ‘--color’ option is effective. The - ‘bison-default.css’ file provide a good example from which to - define your own style file. See the documentation of libtextstyle - for more details. - - -File: bison.info, Node: Tuning the Parser, Next: Output Files, Prev: Diagnostics, Up: Bison Options - -9.1.3 Tuning the Parser ------------------------ - -Options changing the generated parsers. - -‘-t’ -‘--debug’ - In the parser implementation file, define the macro ‘YYDEBUG’ to 1 - if it is not already defined, so that the debugging facilities are - compiled. *Note Tracing::. - -‘-D NAME[=VALUE]’ -‘--define=NAME[=VALUE]’ -‘-F NAME[=VALUE]’ -‘--force-define=NAME[=VALUE]’ - Each of these is equivalent to ‘%define NAME VALUE’ (*note %define - Summary::). Note that the delimiters are part of VALUE: - ‘-Dapi.value.type=union’, ‘-Dapi.value.type={union}’ and - ‘-Dapi.value.type="union"’ correspond to ‘%define api.value.type - union’, ‘%define api.value.type {union}’ and ‘%define - api.value.type "union"’. - - Bison processes multiple definitions for the same NAME as follows: - - • Bison quietly ignores all command-line definitions for NAME - except the last. - • If that command-line definition is specified by a ‘-D’ or - ‘--define’, Bison reports an error for any ‘%define’ - definition for NAME. - • If that command-line definition is specified by a ‘-F’ or - ‘--force-define’ instead, Bison quietly ignores all ‘%define’ - definitions for NAME. - • Otherwise, Bison reports an error if there are multiple - ‘%define’ definitions for NAME. - - You should avoid using ‘-F’ and ‘--force-define’ in your make files - unless you are confident that it is safe to quietly ignore any - conflicting ‘%define’ that may be added to the grammar file. - -‘-L LANGUAGE’ -‘--language=LANGUAGE’ - Specify the programming language for the generated parser, as if - ‘%language’ was specified (*note Decl Summary::). Currently - supported languages include C, C++, D and Java. LANGUAGE is - case-insensitive. - -‘--locations’ - Pretend that ‘%locations’ was specified. *Note Decl Summary::. - -‘-p PREFIX’ -‘--name-prefix=PREFIX’ - Pretend that ‘%name-prefix "PREFIX"’ was specified (*note Decl - Summary::). The option ‘-p’ is specified by POSIX. When POSIX - compatibility is not a requirement, ‘-Dapi.prefix=PREFIX’ is a - better option (*note Multiple Parsers::). - -‘-l’ -‘--no-lines’ - Don't put any ‘#line’ preprocessor commands in the parser - implementation file. Ordinarily Bison puts them in the parser - implementation file so that the C compiler and debuggers will - associate errors with your source file, the grammar file. This - option causes them to associate errors with the parser - implementation file, treating it as an independent source file in - its own right. - -‘-S FILE’ -‘--skeleton=FILE’ - Specify the skeleton to use, similar to ‘%skeleton’ (*note Decl - Summary::). - - If FILE does not contain a ‘/’, FILE is the name of a skeleton file - in the Bison installation directory. If it does, FILE is an - absolute file name or a file name relative to the current working - directory. This is similar to how most shells resolve commands. - -‘-k’ -‘--token-table’ - Pretend that ‘%token-table’ was specified. *Note Decl Summary::. - -‘-y’ -‘--yacc’ - Act more like the traditional ‘yacc’ command: - • Generate different diagnostics (it implies ‘-Wyacc’). - • Generate ‘#define’ statements in addition to an ‘enum’ to - associate token codes with token kind names. - • If the ‘POSIXLY_CORRECT’ environment variable is defined, - generate prototypes for ‘yyerror’ and ‘yylex’(1) (since Bison - 3.8): - int yylex (void); - void yyerror (const char *); - As a Bison extension, additional arguments required by - ‘%pure-parser’, ‘%locations’, ‘%lex-param’ and ‘%parse-param’ - are taken into account. You may disable ‘yyerror’'s prototype - with ‘#define yyerror yyerror’ (as specified by POSIX), or - with ‘#define YYERROR_IS_DECLARED’ (a Bison extension). - Likewise for ‘yylex’. - • Imitate Yacc's output file name conventions, so that the - parser implementation file is called ‘y.tab.c’, and the other - outputs are called ‘y.output’ and ‘y.tab.h’. Do not use - ‘--yacc’ just to change the output file names since it also - triggers all the aforementioned behavior changes; rather use - ‘-o y.tab.c’. - - The ‘-y’/‘--yacc’ option is intended for use with traditional Yacc - grammars. This option only makes sense for the default C skeleton, - ‘yacc.c’. If your grammar uses Bison extensions Bison cannot be - Yacc-compatible, even if this option is specified. - - Thus, the following shell script can substitute for Yacc, and the - Bison distribution contains such a ‘yacc’ script for compatibility - with POSIX: - - #! /bin/sh - bison -y "$@" - - ---------- Footnotes ---------- - - (1) See . - - -File: bison.info, Node: Output Files, Prev: Tuning the Parser, Up: Bison Options - -9.1.4 Output Files ------------------- - -Options controlling the output. - -‘-H [FILE]’ -‘--header=[FILE]’ - Pretend that ‘%header’ was specified, i.e., write an extra output - file containing definitions for the token kind names defined in the - grammar, as well as a few other declarations. *Note Decl - Summary::. - -‘--defines[=FILE]’ - Historical name for option ‘--header’ before Bison 3.8. - -‘-d’ - This is the same as ‘--header’ except ‘-d’ does not accept a FILE - argument since POSIX Yacc requires that ‘-d’ can be bundled with - other short options. - -‘-b FILE-PREFIX’ -‘--file-prefix=PREFIX’ - Pretend that ‘%file-prefix’ was specified, i.e., specify prefix to - use for all Bison output file names. *Note Decl Summary::. - -‘-r THINGS’ -‘--report=THINGS’ - Write an extra output file containing verbose description of the - comma separated list of THINGS among: - - ‘state’ - Description of the grammar, conflicts (resolved and - unresolved), and parser's automaton. - - ‘itemset’ - Implies ‘state’ and augments the description of the automaton - with the full set of items for each state, instead of its core - only. - - ‘lookahead’ - Implies ‘state’ and augments the description of the automaton - with each rule's lookahead set. - - ‘solved’ - Implies ‘state’. Explain how conflicts were solved thanks to - precedence and associativity directives. - - ‘counterexamples’ - ‘cex’ - Look for counterexamples for the conflicts. *Note - Counterexamples::. Counterexamples take time to compute. The - option ‘-rcex’ should be used by the developer when working on - the grammar; it hardly makes sense to use it in a CI. - - ‘all’ - Enable all the items. - - ‘none’ - Do not generate the report. - -‘--report-file=FILE’ - Specify the FILE for the verbose description. - -‘-v’ -‘--verbose’ - Pretend that ‘%verbose’ was specified, i.e., write an extra output - file containing verbose descriptions of the grammar and parser. - *Note Decl Summary::. - -‘-o FILE’ -‘--output=FILE’ - Specify the FILE for the parser implementation file. - - The names of the other output files are constructed from FILE as - described under the ‘-v’ and ‘-d’ options. - -‘-g [FILE]’ -‘--graph[=FILE]’ - Output a graphical representation of the parser's automaton - computed by Bison, in Graphviz (https://www.graphviz.org/) DOT - (https://www.graphviz.org/doc/info/lang.html) format. ‘FILE’ is - optional. If omitted and the grammar file is ‘foo.y’, the output - file will be ‘foo.gv’. - -‘-x [FILE]’ -‘--xml[=FILE]’ - Output an XML report of the parser's automaton computed by Bison. - ‘FILE’ is optional. If omitted and the grammar file is ‘foo.y’, - the output file will be ‘foo.xml’. - -‘-M OLD=NEW’ -‘--file-prefix-map=OLD=NEW’ - Replace prefix OLD with NEW when writing file paths in output - files. - - -File: bison.info, Node: Option Cross Key, Next: Yacc Library, Prev: Bison Options, Up: Invocation - -9.2 Option Cross Key -==================== - -Here is a list of options, alphabetized by long option, to help you find -the corresponding short option and directive. - -Long Option Short Option Bison Directive ---------------------------------------------------------------------------------- -‘--color[=WHEN]’ -‘--debug’ ‘-t’ ‘%debug’ -‘--define=NAME[=VALUE]’ ‘-D NAME[=VALUE]’ ‘%define NAME [VALUE]’ -‘--feature[=FEATURES]’ ‘-f [FEATURES]’ -‘--file-prefix-map=OLD=NEW’ ‘-M OLD=NEW’ -‘--file-prefix=PREFIX’ ‘-b PREFIX’ ‘%file-prefix "PREFIX"’ -‘--force-define=NAME[=VALUE]’ ‘-F NAME[=VALUE]’ ‘%define NAME [VALUE]’ -‘--graph[=FILE]’ ‘-g [FILE]’ -‘--header=[FILE]’ ‘-H [FILE]’ ‘%header ["FILE"]’ -‘--help’ ‘-h’ -‘--html[=FILE]’ -‘--language=LANGUAGE’ ‘-L LANGUAGE’ ‘%language "LANGUAGE"’ -‘--locations’ ‘%locations’ -‘--name-prefix=PREFIX’ ‘-p PREFIX’ ‘%name-prefix "PREFIX"’ -‘--no-lines’ ‘-l’ ‘%no-lines’ -‘--output=FILE’ ‘-o FILE’ ‘%output "FILE"’ -‘--print-datadir’ -‘--print-localedir’ -‘--report-file=FILE’ -‘--report=THINGS’ ‘-r THINGS’ -‘--skeleton=FILE’ ‘-S FILE’ ‘%skeleton "FILE"’ -‘--style=FILE’ -‘--token-table’ ‘-k’ ‘%token-table’ -‘--update’ ‘-u’ -‘--verbose’ ‘-v’ ‘%verbose’ -‘--version’ ‘-V’ -‘--warnings[=CATEGORY]’ ‘-W [CATEGORY]’ -‘--xml[=FILE]’ ‘-x [FILE]’ -‘--yacc’ ‘-y’ ‘%yacc’ - - -File: bison.info, Node: Yacc Library, Prev: Option Cross Key, Up: Invocation - -9.3 Yacc Library -================ - -The Yacc library contains default implementations of the ‘yyerror’ and -‘main’ functions. These default implementations are normally not -useful, but POSIX requires them. To use the Yacc library, link your -program with the ‘-ly’ option. Note that Bison's implementation of the -Yacc library is distributed under the terms of the GNU General Public -License (*note Copying::). - - If you use the Yacc library's ‘yyerror’ function, you should declare -‘yyerror’ as follows: - - int yyerror (char const *); - -The ‘int’ value returned by this ‘yyerror’ is ignored. - - The implementation of Yacc library's ‘main’ function is: - - int main (void) - { - setlocale (LC_ALL, ""); - return yyparse (); - } - -so if you use it, the internationalization support is enabled (e.g., -error messages are translated), and your ‘yyparse’ function should have -the following type signature: - - int yyparse (void); - - -File: bison.info, Node: Other Languages, Next: History, Prev: Invocation, Up: Top - -10 Parsers Written In Other Languages -************************************* - -In addition to C, Bison can generate parsers in C++, D and Java. This -chapter is devoted to these languages. The reader is expected to -understand how Bison works; read the introductory chapters first if you -don't. - -* Menu: - -* C++ Parsers:: The interface to generate C++ parser classes -* D Parsers:: The interface to generate D parser classes -* Java Parsers:: The interface to generate Java parser classes - - -File: bison.info, Node: C++ Parsers, Next: D Parsers, Up: Other Languages - -10.1 C++ Parsers -================ - -The Bison parser in C++ is an object, an instance of the class -‘yy::parser’. - -* Menu: - -* A Simple C++ Example:: A short introduction to C++ parsers -* C++ Bison Interface:: Asking for C++ parser generation -* C++ Parser Interface:: Instantiating and running the parser -* C++ Semantic Values:: %union vs. C++ -* C++ Location Values:: The position and location classes -* C++ Parser Context:: You can supply a ‘report_syntax_error’ function. -* C++ Scanner Interface:: Exchanges between yylex and parse -* A Complete C++ Example:: Demonstrating their use - - -File: bison.info, Node: A Simple C++ Example, Next: C++ Bison Interface, Up: C++ Parsers - -10.1.1 A Simple C++ Example ---------------------------- - -This tutorial about C++ parsers is based on a simple, self contained -example.(1) The following sections are the reference manual for Bison -with C++, the last one showing a fully blown example (*note A Complete -C++ Example::). - - To look nicer, our example will be in C++14. It is not required: -Bison supports the original C++98 standard. - - A Bison file has three parts. In the first part, the prologue, we -start by making sure we run a version of Bison which is recent enough, -and that we generate C++. - - %require "3.2" - %language "c++" - - Let's dive directly into the middle part: the grammar. Our input is -a simple list of strings, that we display once the parsing is done. - - %% - result: - list { std::cout << $1 << '\n'; } - ; - - %nterm > list; - list: - %empty { /* Generates an empty string list */ } - | list item { $$ = $1; $$.push_back ($2); } - ; - - We used a vector of strings as a semantic value! To use genuine C++ -objects as semantic values--not just PODs--we cannot rely on the union -that Bison uses by default to store them, we need _variants_ (*note C++ -Variants::): - - %define api.value.type variant - - Obviously, the rule for ‘result’ needs to print a vector of strings. -In the prologue, we add: - - %code - { - // Print a list of strings. - auto - operator<< (std::ostream& o, const std::vector& ss) - -> std::ostream& - { - o << '{'; - const char *sep = ""; - for (const auto& s: ss) - { - o << sep << s; - sep = ", "; - } - return o << '}'; - } - } - -You may want to move it into the ‘yy’ namespace to avoid leaking it in -your default namespace. We recommend that you keep the actions simple, -and move details into auxiliary functions, as we did with ‘operator<<’. - - Our list of strings will be built from two types of items: numbers -and strings: - - %nterm item; - %token TEXT; - %token NUMBER; - item: - TEXT - | NUMBER { $$ = std::to_string ($1); } - ; - - In the case of ‘TEXT’, the implicit default action applies: -‘$$ = $1’. - - - Our scanner deserves some attention. The traditional interface of -‘yylex’ is not type safe: since the token kind and the token value are -not correlated, you may return a ‘NUMBER’ with a string as semantic -value. To avoid this, we use _token constructors_ (*note Complete -Symbols::). This directive: - - %define api.token.constructor - -requests that Bison generates the functions ‘make_TEXT’ and -‘make_NUMBER’, but also ‘make_YYEOF’, for the end of input. - - Everything is in place for our scanner: - - %code - { - namespace yy - { - // Return the next token. - auto yylex () -> parser::symbol_type - { - static int count = 0; - switch (int stage = count++) - { - case 0: - return parser::make_TEXT ("I have three numbers for you."); - case 1: case 2: case 3: - return parser::make_NUMBER (stage); - case 4: - return parser::make_TEXT ("And that's all!"); - default: - return parser::make_YYEOF (); - } - } - } - } - - In the epilogue, the third part of a Bison grammar file, we leave -simple details: the error reporting function, and the main function. - - %% - namespace yy - { - // Report an error to the user. - auto parser::error (const std::string& msg) -> void - { - std::cerr << msg << '\n'; - } - } - - int main () - { - yy::parser parse; - return parse (); - } - - Compile, and run! - - $ bison simple.yy -o simple.cc - $ g++ -std=c++14 simple.cc -o simple - $ ./simple - {I have three numbers for you., 1, 2, 3, And that's all!} - - ---------- Footnotes ---------- - - (1) The sources of this example are available as -‘examples/c++/simple.yy’. - - -File: bison.info, Node: C++ Bison Interface, Next: C++ Parser Interface, Prev: A Simple C++ Example, Up: C++ Parsers - -10.1.2 C++ Bison Interface --------------------------- - -The C++ deterministic parser is selected using the skeleton directive, -‘%skeleton "lalr1.cc"’. *Note Decl Summary::. - - When run, ‘bison’ will create several entities in the ‘yy’ namespace. -Use the ‘%define api.namespace’ directive to change the namespace name, -see *note %define Summary::. The various classes are generated in the -following files: - -‘FILE.hh’ - (Assuming the extension of the grammar file was ‘.yy’.) The - declaration of the C++ parser class and auxiliary types. By - default, this file is not generated (*note Decl Summary::). - -‘FILE.cc’ - The implementation of the C++ parser class. The basename and - extension of these two files (‘FILE.hh’ and ‘FILE.cc’) follow the - same rules as with regular C parsers (*note Invocation::). - -‘location.hh’ - Generated when both ‘%header’ and ‘%locations’ are enabled, this - file contains the definition of the classes ‘position’ and - ‘location’, used for location tracking. It is not generated if - ‘%define api.location.file none’ is specified, or if user defined - locations are used. *Note C++ Location Values::. - -‘position.hh’ -‘stack.hh’ - Useless legacy files. To get rid of then, use ‘%require "3.2"’ or - newer. - - All these files are documented using Doxygen; run ‘doxygen’ for a -complete and accurate documentation. - - -File: bison.info, Node: C++ Parser Interface, Next: C++ Semantic Values, Prev: C++ Bison Interface, Up: C++ Parsers - -10.1.3 C++ Parser Interface ---------------------------- - -The output files ‘FILE.hh’ and ‘FILE.cc’ declare and define the parser -class in the namespace ‘yy’. The class name defaults to ‘parser’, but -may be changed using ‘%define api.parser.class {NAME}’. The interface -of this class is detailed below. It can be extended using the -‘%parse-param’ feature: its semantics is slightly changed since it -describes an additional member of the parser class, and an additional -argument for its constructor. - - -- Type of parser: token - A structure that contains (only) the ‘token_kind_type’ enumeration, - which defines the tokens. To refer to the token ‘FOO’, use - ‘yy::parser::token::FOO’. The scanner can use ‘typedef - yy::parser::token token;’ to "import" the token enumeration (*note - Calc++ Scanner::). - - -- Type of parser: token_kind_type - An enumeration of the token kinds. Its enumerators are forged from - the token names, with a possible token prefix (*note - ‘api.token.prefix’: api-token-prefix.): - - /// Token kinds. - struct token - { - enum token_kind_type - { - YYEMPTY = -2, // No token. - YYEOF = 0, // "end of file" - YYerror = 256, // error - YYUNDEF = 257, // "invalid token" - PLUS = 258, // "+" - MINUS = 259, // "-" - [...] - VAR = 271, // "variable" - NEG = 272 // NEG - }; - }; - - /// Token kind, as returned by yylex. - typedef token::token_kind_type token_kind_type; - - -- Type of parser: value_type - The types for semantic values. *Note C++ Semantic Values::. - - -- Type of parser: location_type - The type of locations, if location tracking is enabled. *Note C++ - Location Values::. - - -- Type of parser: syntax_error - This class derives from ‘std::runtime_error’. Throw instances of - it from the scanner or from the actions to raise parse errors. - This is equivalent with first invoking ‘error’ to report the - location and message of the syntax error, and then to invoke - ‘YYERROR’ to enter the error-recovery mode. But contrary to - ‘YYERROR’ which can only be invoked from user actions (i.e., - written in the action itself), the exception can be thrown from - functions invoked from the user action. - - -- Constructor on parser: parser () - -- Constructor on parser: parser (TYPE1 ARG1, ...) - Build a new parser object. There are no arguments, unless - ‘%parse-param {TYPE1 ARG1}’ was used. - - -- Constructor on syntax_error: syntax_error (const location_type& L, - const std::string& M) - -- Constructor on syntax_error: syntax_error (const std::string& M) - Instantiate a syntax-error exception. - - -- Method on parser: int operator() () - -- Method on parser: int parse () - Run the syntactic analysis, and return 0 on success, 1 otherwise. - Both routines are equivalent, ‘operator()’ being more C++ish. - - The whole function is wrapped in a ‘try’/‘catch’ block, so that - when an exception is thrown, the ‘%destructor’s are called to - release the lookahead symbol, and the symbols pushed on the stack. - - Exception related code in the generated parser is protected by CPP - guards (‘#if’) and disabled when exceptions are not supported - (i.e., passing ‘-fno-exceptions’ to the C++ compiler). - - -- Method on parser: std::ostream& debug_stream () - -- Method on parser: void set_debug_stream (std::ostream& O) - Get or set the stream used for tracing the parsing. It defaults to - ‘std::cerr’. - - -- Method on parser: debug_level_type debug_level () - -- Method on parser: void set_debug_level (debug_level_type L) - Get or set the tracing level (an integral). Currently its value is - either 0, no trace, or nonzero, full tracing. - - -- Method on parser: void error (const location_type& L, const - std::string& M) - -- Method on parser: void error (const std::string& M) - The definition for this member function must be supplied by the - user: the parser uses it to report a parser error occurring at L, - described by M. If location tracking is not enabled, the second - signature is used. - - -File: bison.info, Node: C++ Semantic Values, Next: C++ Location Values, Prev: C++ Parser Interface, Up: C++ Parsers - -10.1.4 C++ Semantic Values --------------------------- - -Bison supports two different means to handle semantic values in C++. -One is alike the C interface, and relies on unions. As C++ -practitioners know, unions are inconvenient in C++, therefore another -approach is provided, based on variants. - -* Menu: - -* C++ Unions:: Semantic values cannot be objects -* C++ Variants:: Using objects as semantic values - - -File: bison.info, Node: C++ Unions, Next: C++ Variants, Up: C++ Semantic Values - -10.1.4.1 C++ Unions -................... - -The ‘%union’ directive works as for C, see *note Union Decl::. In -particular it produces a genuine ‘union’, which have a few specific -features in C++. - − The value type is ‘yy::parser::value_type’, not ‘YYSTYPE’. - − Non POD (Plain Old Data) types cannot be used. C++98 forbids any - instance of classes with constructors in unions: only _pointers_ to - such objects are allowed. C++11 relaxed this constraints, but at - the cost of safety. - - Because objects have to be stored via pointers, memory is not -reclaimed automatically: using the ‘%destructor’ directive is the only -means to avoid leaks. *Note Destructor Decl::. - - -File: bison.info, Node: C++ Variants, Prev: C++ Unions, Up: C++ Semantic Values - -10.1.4.2 C++ Variants -..................... - -Bison provides a _variant_ based implementation of semantic values for -C++. This alleviates all the limitations reported in the previous -section, and in particular, object types can be used without pointers. - - To enable variant-based semantic values, set the ‘%define’ variable -‘api.value.type’ to ‘variant’ (*note %define Summary::). Then ‘%union’ -is ignored; instead of using the name of the fields of the ‘%union’ to -"type" the symbols, use genuine types. - - For instance, instead of: - - %union - { - int ival; - std::string* sval; - } - %token NUMBER; - %token STRING; - -write: - - %token NUMBER; - %token STRING; - - ‘STRING’ is no longer a pointer, which should fairly simplify the -user actions in the grammar and in the scanner (in particular the memory -management). - - Since C++ features destructors, and since it is customary to -specialize ‘operator<<’ to support uniform printing of values, variants -also typically simplify Bison printers and destructors. - - Variants are stricter than unions. When based on unions, you may -play any dirty game with ‘yylval’, say storing an ‘int’, reading a -‘char*’, and then storing a ‘double’ in it. This is no longer possible -with variants: they must be initialized, then assigned to, and -eventually, destroyed. As a matter of fact, Bison variants forbid the -use of alternative types such as ‘$2’ or ‘$$’, even in -midrule actions. It is mandatory to use typed midrule actions (*note -Typed Midrule Actions::). - - -- Method on value_type: T& emplace () - -- Method on value_type: T& emplace (const T& T) - Available in C++98/C++03 only. Default construct/copy-construct - from T. Return a reference to where the actual value may be - stored. Requires that the variant was not initialized yet. - - -- Method on value_type: T& emplace (U&&... U) - Available in C++11 and later only. Build a variant of type ‘T’ - from the variadic forwarding references U.... - - *Warning*: We do not use Boost.Variant, for two reasons. First, it -appeared unacceptable to require Boost on the user's machine (i.e., the -machine on which the generated parser will be compiled, not the machine -on which ‘bison’ was run). Second, for each possible semantic value, -Boost.Variant not only stores the value, but also a tag specifying its -type. But the parser already "knows" the type of the semantic value, so -that would be duplicating the information. - - We do not use C++17's ‘std::variant’ either: we want to support all -the C++ standards, and of course ‘std::variant’ also stores a tag to -record the current type. - - Therefore we developed light-weight variants whose type tag is -external (so they are really like ‘unions’ for C++ actually). There is -a number of limitations in (the current implementation of) variants: - • Alignment must be enforced: values should be aligned in memory - according to the most demanding type. Computing the smallest - alignment possible requires meta-programming techniques that are - not currently implemented in Bison, and therefore, since, as far as - we know, ‘double’ is the most demanding type on all platforms, - alignments are enforced for ‘double’ whatever types are actually - used. This may waste space in some cases. - - • There might be portability issues we are not aware of. - - As far as we know, these limitations _can_ be alleviated. All it -takes is some time and/or some talented C++ hacker willing to contribute -to Bison. - - -File: bison.info, Node: C++ Location Values, Next: C++ Parser Context, Prev: C++ Semantic Values, Up: C++ Parsers - -10.1.5 C++ Location Values --------------------------- - -When the directive ‘%locations’ is used, the C++ parser supports -location tracking, see *note Tracking Locations::. - - By default, two auxiliary classes define a ‘position’, a single point -in a file, and a ‘location’, a range composed of a pair of ‘position’s -(possibly spanning several files). If the ‘%define’ variable -‘api.location.type’ is defined, then these classes will not be -generated, and the user defined type will be used. - -* Menu: - -* C++ position:: One point in the source file -* C++ location:: Two points in the source file -* Exposing the Location Classes:: Using the Bison location class in your - project -* User Defined Location Type:: Required interface for locations - - -File: bison.info, Node: C++ position, Next: C++ location, Up: C++ Location Values - -10.1.5.1 C++ ‘position’ -....................... - - -- Type of position: filename_type - The base type for file names. Defaults to ‘const std::string’. - *Note ‘api.filename.type’: api-filename-type, to change its - definition. - - -- Type of position: counter_type - The type used to store line and column numbers. Defined as ‘int’. - - -- Constructor on position: position (filename_type* FILE = nullptr, - counter_type LINE = 1, counter_type COL = 1) - Create a ‘position’ denoting a given point. Note that ‘file’ is - not reclaimed when the ‘position’ is destroyed: memory managed must - be handled elsewhere. - - -- Method on position: void initialize (filename_type* FILE = nullptr, - counter_type LINE = 1, counter_type COL = 1) - Reset the position to the given values. - - -- Instance Variable of position: filename_type* file - The name of the file. It will always be handled as a pointer, the - parser will never duplicate nor deallocate it. - - -- Instance Variable of position: counter_type line - The line, starting at 1. - - -- Method on position: void lines (counter_type HEIGHT = 1) - If HEIGHT is not null, advance by HEIGHT lines, resetting the - column number. The resulting line number cannot be less than 1. - - -- Instance Variable of position: counter_type column - The column, starting at 1. - - -- Method on position: void columns (counter_type WIDTH = 1) - Advance by WIDTH columns, without changing the line number. The - resulting column number cannot be less than 1. - - -- Method on position: position& operator+= (counter_type WIDTH) - -- Method on position: position operator+ (counter_type WIDTH) - -- Method on position: position& operator-= (counter_type WIDTH) - -- Method on position: position operator- (counter_type WIDTH) - Various forms of syntactic sugar for ‘columns’. - - -- Method on position: bool operator== (const position& THAT) - -- Method on position: bool operator!= (const position& THAT) - Whether ‘*this’ and ‘that’ denote equal/different positions. - - -- Function: std::ostream& operator<< (std::ostream& O, const position& - P) - Report P on O like this: ‘FILE:LINE.COLUMN’, or ‘LINE.COLUMN’ if - FILE is null. - - -File: bison.info, Node: C++ location, Next: Exposing the Location Classes, Prev: C++ position, Up: C++ Location Values - -10.1.5.2 C++ ‘location’ -....................... - - -- Constructor on location: location (const position& BEGIN, const - position& END) - Create a ‘Location’ from the endpoints of the range. - - -- Constructor on location: location (const position& POS = position()) - -- Constructor on location: location (filename_type* FILE, counter_type - LINE, counter_type COL) - Create a ‘Location’ denoting an empty range located at a given - point. - - -- Method on location: void initialize (filename_type* FILE = nullptr, - counter_type LINE = 1, counter_type COL = 1) - Reset the location to an empty range at the given values. - - -- Instance Variable of location: position begin - -- Instance Variable of location: position end - The first, inclusive, position of the range, and the first beyond. - - -- Method on location: void columns (counter_type WIDTH = 1) - -- Method on location: void lines (counter_type HEIGHT = 1) - Forwarded to the ‘end’ position. - - -- Method on location: location operator+ (counter_type WIDTH) - -- Method on location: location operator+= (counter_type WIDTH) - -- Method on location: location operator- (counter_type WIDTH) - -- Method on location: location operator-= (counter_type WIDTH) - Various forms of syntactic sugar for ‘columns’. - - -- Method on location: location operator+ (const location& END) - -- Method on location: location operator+= (const location& END) - Join two locations: starts at the position of the first one, and - ends at the position of the second. - - -- Method on location: void step () - Move ‘begin’ onto ‘end’. - - -- Method on location: bool operator== (const location& THAT) - -- Method on location: bool operator!= (const location& THAT) - Whether ‘*this’ and ‘that’ denote equal/different ranges of - positions. - - -- Function: std::ostream& operator<< (std::ostream& O, const location& - P) - Report P on O, taking care of special cases such as: no ‘filename’ - defined, or equal filename/line or column. - - -File: bison.info, Node: Exposing the Location Classes, Next: User Defined Location Type, Prev: C++ location, Up: C++ Location Values - -10.1.5.3 Exposing the Location Classes -...................................... - -When both ‘%header’ and ‘%locations’ are enabled, Bison generates an -additional file: ‘location.hh’. If you don't use locations outside of -the parser, you may avoid its creation with ‘%define api.location.file -none’. - - However this file is useful if, for instance, your parser builds an -abstract syntax tree decorated with locations: you may use Bison's -‘location’ type independently of Bison's parser. You may name the file -differently, e.g., ‘%define api.location.file -"include/ast/location.hh"’: this name can have directory components, or -even be absolute. The way the location file is included is controlled -by ‘api.location.include’. - - This way it is possible to have several parsers share the same -location file. - - For instance, in ‘src/foo/parser.yy’, generate the -‘include/ast/loc.hh’ file: - - // src/foo/parser.yy - %locations - %define api.namespace {foo} - %define api.location.file "include/ast/loc.hh" - %define api.location.include {} - -and use it in ‘src/bar/parser.yy’: - - // src/bar/parser.yy - %locations - %define api.namespace {bar} - %code requires {#include } - %define api.location.type {bar::location} - - Absolute file names are supported; it is safe in your ‘Makefile’ to -pass the flag ‘-Dapi.location.file='"$(top_srcdir)/include/ast/loc.hh"'’ -to ‘bison’ for ‘src/foo/parser.yy’. The generated file will not have -references to this absolute path, thanks to ‘%define -api.location.include {}’. Adding ‘-I $(top_srcdir)/include’ -to your ‘CPPFLAGS’ will suffice for the compiler to find ‘ast/loc.hh’. - - -File: bison.info, Node: User Defined Location Type, Prev: Exposing the Location Classes, Up: C++ Location Values - -10.1.5.4 User Defined Location Type -................................... - -Instead of using the built-in types you may use the ‘%define’ variable -‘api.location.type’ to specify your own type: - - %define api.location.type {LOCATIONTYPE} - - The requirements over your LOCATIONTYPE are: - • it must be copyable; - - • in order to compute the (default) value of ‘@$’ in a reduction, the - parser basically runs - @$.begin = @1.begin; - @$.end = @N.end; // The location of last right-hand side symbol. - so there must be copyable ‘begin’ and ‘end’ members; - - • alternatively you may redefine the computation of the default - location, in which case these members are not required (*note - Location Default Action::); - - • if traces are enabled, then there must exist an ‘std::ostream& - operator<< (std::ostream& o, const LOCATIONTYPE& s)’ function. - - - In programs with several C++ parsers, you may also use the ‘%define’ -variable ‘api.location.type’ to share a common set of built-in -definitions for ‘position’ and ‘location’. For instance, one parser -‘master/parser.yy’ might use: - - %header - %locations - %define api.namespace {master::} - -to generate the ‘master/position.hh’ and ‘master/location.hh’ files, -reused by other parsers as follows: - - %define api.location.type {master::location} - %code requires { #include } - - -File: bison.info, Node: C++ Parser Context, Next: C++ Scanner Interface, Prev: C++ Location Values, Up: C++ Parsers - -10.1.6 C++ Parser Context -------------------------- - -When ‘%define parse.error custom’ is used (*note Syntax Error Reporting -Function::), the user must define the following function. - - -- Method on parser: void report_syntax_error (const context_type&CTX) - const - Report a syntax error to the user. Whether it uses ‘yyerror’ is up - to the user. - - Use the following types and functions to build the error message. - - -- Type of parser: context - A type that captures the circumstances of the syntax error. - - -- Type of parser: symbol_kind_type - An enum of all the grammar symbols, tokens and nonterminals. Its - enumerators are forged from the symbol names: - - struct symbol_kind - { - enum symbol_kind_type - { - S_YYEMPTY = -2, // No symbol. - S_YYEOF = 0, // "end of file" - S_YYERROR = 1, // error - S_YYUNDEF = 2, // "invalid token" - S_PLUS = 3, // "+" - S_MINUS = 4, // "-" - [...] - S_VAR = 14, // "variable" - S_NEG = 15, // NEG - S_YYACCEPT = 16, // $accept - S_exp = 17, // exp - S_input = 18 // input - }; - }; - typedef symbol_kind::symbol_kind_t symbol_kind_type; - - -- Method on context: const symbol_type& lookahead () const - The "unexpected" token: the lookahead that caused the syntax error. - - -- Method on context: symbol_kind_type token () const - The symbol kind of the lookahead token that caused the syntax - error. Returns ‘symbol_kind::S_YYEMPTY’ if there is no lookahead. - - -- Method on context: const location& location () const - The location of the syntax error (that of the lookahead). - - -- Method on context: int expected_tokens (symbol_kind_type ARGV[], int - ARGC) const - Fill ARGV with the expected tokens, which never includes - ‘symbol_kind::S_YYEMPTY’, ‘symbol_kind::S_YYERROR’, or - ‘symbol_kind::S_YYUNDEF’. - - Never put more than ARGC elements into ARGV, and on success return - the number of tokens stored in ARGV. If there are more expected - tokens than ARGC, fill ARGV up to ARGC and return 0. If there are - no expected tokens, also return 0, but set ‘argv[0]’ to - ‘symbol_kind::S_YYEMPTY’. - - If ARGV is null, return the size needed to store all the possible - values, which is always less than ‘YYNTOKENS’. - - -- Method on parser: const char * symbol_name (symbol_kind_t SYMBOL) - const - The name of the symbol whose kind is SYMBOL, possibly translated. - - Returns a ‘std::string’ when ‘parse.error’ is ‘verbose’. - - A custom syntax error function looks as follows. This implementation -is inappropriate for internationalization, see the ‘c/bistromathic’ -example for a better alternative. - - void - yy::parser::report_syntax_error (const context& ctx) - { - int res = 0; - std::cerr << ctx.location () << ": syntax error"; - // Report the tokens expected at this point. - { - enum { TOKENMAX = 5 }; - symbol_kind_type expected[TOKENMAX]; - int n = ctx.expected_tokens (ctx, expected, TOKENMAX); - for (int i = 0; i < n; ++i) - std::cerr << i == 0 ? ": expected " : " or " - << symbol_name (expected[i]); - } - // Report the unexpected token. - { - symbol_kind_type lookahead = ctx.token (); - if (lookahead != symbol_kind::S_YYEMPTY) - std::cerr << " before " << symbol_name (lookahead)); - } - std::cerr << '\n'; - } - - You still must provide a ‘yyerror’ function, used for instance to -report memory exhaustion. - - -File: bison.info, Node: C++ Scanner Interface, Next: A Complete C++ Example, Prev: C++ Parser Context, Up: C++ Parsers - -10.1.7 C++ Scanner Interface ----------------------------- - -The parser invokes the scanner by calling ‘yylex’. Contrary to C -parsers, C++ parsers are always pure: there is no point in using the -‘%define api.pure’ directive. The actual interface with ‘yylex’ depends -whether you use unions, or variants. - -* Menu: - -* Split Symbols:: Passing symbols as two/three components -* Complete Symbols:: Making symbols a whole - - -File: bison.info, Node: Split Symbols, Next: Complete Symbols, Up: C++ Scanner Interface - -10.1.7.1 Split Symbols -...................... - -The generated parser expects ‘yylex’ to have the following prototype. - - -- Function: int yylex (value_type* YYLVAL, location_type* YYLLOC, - TYPE1 ARG1, ...) - -- Function: int yylex (value_type* YYLVAL, TYPE1 ARG1, ...) - Return the next token. Its kind is the return value, its semantic - value and location (if enabled) being YYLVAL and YYLLOC. - Invocations of ‘%lex-param {TYPE1 ARG1}’ yield additional - arguments. - - Note that when using variants, the interface for ‘yylex’ is the same, -but ‘yylval’ is handled differently. - - Regular union-based code in Lex scanner typically looks like: - - [0-9]+ { - yylval->ival = text_to_int (yytext); - return yy::parser::token::INTEGER; - } - [a-z]+ { - yylval->sval = new std::string (yytext); - return yy::parser::token::IDENTIFIER; - } - - Using variants, ‘yylval’ is already constructed, but it is not -initialized. So the code would look like: - - [0-9]+ { - yylval->emplace () = text_to_int (yytext); - return yy::parser::token::INTEGER; - } - [a-z]+ { - yylval->emplace () = yytext; - return yy::parser::token::IDENTIFIER; - } - -or - - [0-9]+ { - yylval->emplace (text_to_int (yytext)); - return yy::parser::token::INTEGER; - } - [a-z]+ { - yylval->emplace (yytext); - return yy::parser::token::IDENTIFIER; - } - - -File: bison.info, Node: Complete Symbols, Prev: Split Symbols, Up: C++ Scanner Interface - -10.1.7.2 Complete Symbols -......................... - -With both ‘%define api.value.type variant’ and ‘%define -api.token.constructor’, the parser defines the type ‘symbol_type’, and -expects ‘yylex’ to have the following prototype. - - -- Function: parser::symbol_type yylex () - -- Function: parser::symbol_type yylex (TYPE1 ARG1, ...) - Return a _complete_ symbol, aggregating its type (i.e., the - traditional value returned by ‘yylex’), its semantic value, and - possibly its location. Invocations of ‘%lex-param {TYPE1 ARG1}’ - yield additional arguments. - - -- Type of parser: symbol_type - A "complete symbol", that binds together its kind, value and (when - applicable) location. - - -- Method on symbol_type: symbol_kind_type kind () const - The kind of this symbol. - - -- Method on symbol_type: const char * name () const - The name of the kind of this symbol. - - Returns a ‘std::string’ when ‘parse.error’ is ‘verbose’. - - - For each token kind, Bison generates named constructors as follows. - - -- Constructor on parser::symbol_type: symbol_type (int TOKEN, const - VALUE_TYPE& VALUE, const location_type& LOCATION) - -- Constructor on parser::symbol_type: symbol_type (int TOKEN, const - location_type& LOCATION) - -- Constructor on parser::symbol_type: symbol_type (int TOKEN, const - VALUE_TYPE& VALUE) - -- Constructor on parser::symbol_type: symbol_type (int TOKEN) - Build a complete terminal symbol for the token kind TOKEN - (including the ‘api.token.prefix’), whose semantic value, if it has - one, is VALUE of adequate VALUE_TYPE. Pass the LOCATION iff - location tracking is enabled. - - Consistency between TOKEN and VALUE_TYPE is checked via an - ‘assert’. - - For instance, given the following declarations: - - %define api.token.prefix {TOK_} - %token IDENTIFIER; - %token INTEGER; - %token ':'; - -you may use these constructors: - - symbol_type (int token, const std::string&, const location_type&); - symbol_type (int token, const int&, const location_type&); - symbol_type (int token, const location_type&); - - Correct matching between token kinds and value types is checked via -‘assert’; for instance, ‘symbol_type (ID, 42)’ would abort. Named -constructors are preferable (see below), as they offer better type -safety (for instance ‘make_ID (42)’ would not even compile), but -symbol_type constructors may help when token kinds are discovered at -run-time, e.g., - - [a-z]+ { - if (auto i = lookup_keyword (yytext)) - return yy::parser::symbol_type (i, loc); - else - return yy::parser::make_ID (yytext, loc); - } - - - Note that it is possible to generate and compile type incorrect code -(e.g. ‘symbol_type (':', yytext, loc)’). It will fail at run time, -provided the assertions are enabled (i.e., ‘-DNDEBUG’ was not passed to -the compiler). Bison supports an alternative that guarantees that type -incorrect code will not even compile. Indeed, it generates _named -constructors_ as follows. - - -- Method on parser: symbol_type make_TOKEN (const VALUE_TYPE& VALUE, - const location_type& LOCATION) - -- Method on parser: symbol_type make_TOKEN (const location_type& - LOCATION) - -- Method on parser: symbol_type make_TOKEN (const VALUE_TYPE& VALUE) - -- Method on parser: symbol_type make_TOKEN () - Build a complete terminal symbol for the token kind TOKEN (not - including the ‘api.token.prefix’), whose semantic value, if it has - one, is VALUE of adequate VALUE_TYPE. Pass the LOCATION iff - location tracking is enabled. - - For instance, given the following declarations: - - %define api.token.prefix {TOK_} - %token IDENTIFIER; - %token INTEGER; - %token COLON; - %token EOF 0; - -Bison generates: - - symbol_type make_IDENTIFIER (const std::string&, const location_type&); - symbol_type make_INTEGER (const int&, const location_type&); - symbol_type make_COLON (const location_type&); - symbol_type make_EOF (const location_type&); - -which should be used in a scanner as follows. - - [a-z]+ return yy::parser::make_IDENTIFIER (yytext, loc); - [0-9]+ return yy::parser::make_INTEGER (text_to_int (yytext), loc); - ":" return yy::parser::make_COLON (loc); - <> return yy::parser::make_EOF (loc); - - Tokens that do not have an identifier are not accessible: you cannot -simply use characters such as ‘':'’, they must be declared with -‘%token’, including the end-of-file token. - - -File: bison.info, Node: A Complete C++ Example, Prev: C++ Scanner Interface, Up: C++ Parsers - -10.1.8 A Complete C++ Example ------------------------------ - -This section demonstrates the use of a C++ parser with a simple but -complete example. This example should be available on your system, -ready to compile, in the directory ‘examples/c++/calc++’. It focuses on -the use of Bison, therefore the design of the various C++ classes is -very naive: no accessors, no encapsulation of members etc. We will use -a Lex scanner, and more precisely, a Flex scanner, to demonstrate the -various interactions. A hand-written scanner is actually easier to -interface with. - -* Menu: - -* Calc++ --- C++ Calculator:: The specifications -* Calc++ Parsing Driver:: An active parsing context -* Calc++ Parser:: A parser class -* Calc++ Scanner:: A pure C++ Flex scanner -* Calc++ Top Level:: Conducting the band - - -File: bison.info, Node: Calc++ --- C++ Calculator, Next: Calc++ Parsing Driver, Up: A Complete C++ Example - -10.1.8.1 Calc++ -- C++ Calculator -................................. - -Of course the grammar is dedicated to arithmetic, a single expression, -possibly preceded by variable assignments. An environment containing -possibly predefined variables such as ‘one’ and ‘two’, is exchanged with -the parser. An example of valid input follows. - - three := 3 - seven := one + two * three - seven * seven - - -File: bison.info, Node: Calc++ Parsing Driver, Next: Calc++ Parser, Prev: Calc++ --- C++ Calculator, Up: A Complete C++ Example - -10.1.8.2 Calc++ Parsing Driver -.............................. - -To support a pure interface with the parser (and the scanner) the -technique of the "parsing context" is convenient: a structure containing -all the data to exchange. Since, in addition to simply launch the -parsing, there are several auxiliary tasks to execute (open the file for -scanning, instantiate the parser etc.), we recommend transforming the -simple parsing context structure into a fully blown “parsing driver” -class. - - The declaration of this driver class, in ‘driver.hh’, is as follows. -The first part includes the CPP guard and imports the required standard -library components, and the declaration of the parser class. - - #ifndef DRIVER_HH - # define DRIVER_HH - # include - # include - # include "parser.hh" - -Then comes the declaration of the scanning function. Flex expects the -signature of ‘yylex’ to be defined in the macro ‘YY_DECL’, and the C++ -parser expects it to be declared. We can factor both as follows. - - // Give Flex the prototype of yylex we want ... - # define YY_DECL \ - yy::parser::symbol_type yylex (driver& drv) - // ... and declare it for the parser's sake. - YY_DECL; - -The ‘driver’ class is then declared with its most obvious members. - - // Conducting the whole scanning and parsing of Calc++. - class driver - { - public: - driver (); - - std::map variables; - - int result; - -The main routine is of course calling the parser. - - // Run the parser on file F. Return 0 on success. - int parse (const std::string& f); - // The name of the file being parsed. - std::string file; - // Whether to generate parser debug traces. - bool trace_parsing; - -To encapsulate the coordination with the Flex scanner, it is useful to -have member functions to open and close the scanning phase. - - // Handling the scanner. - void scan_begin (); - void scan_end (); - // Whether to generate scanner debug traces. - bool trace_scanning; - // The token's location used by the scanner. - yy::location location; - }; - #endif // ! DRIVER_HH - - The implementation of the driver (‘driver.cc’) is straightforward. - - #include "driver.hh" - #include "parser.hh" - - driver::driver () - : trace_parsing (false), trace_scanning (false) - { - variables["one"] = 1; - variables["two"] = 2; - } - - The ‘parse’ member function deserves some attention. - - int - driver::parse (const std::string &f) - { - file = f; - location.initialize (&file); - scan_begin (); - yy::parser parse (*this); - parse.set_debug_level (trace_parsing); - int res = parse (); - scan_end (); - return res; - } - - -File: bison.info, Node: Calc++ Parser, Next: Calc++ Scanner, Prev: Calc++ Parsing Driver, Up: A Complete C++ Example - -10.1.8.3 Calc++ Parser -...................... - -The grammar file ‘parser.yy’ starts by asking for the C++ deterministic -parser skeleton, the creation of the parser header file. Because the -C++ skeleton changed several times, it is safer to require the version -you designed the grammar for. - - %skeleton "lalr1.cc" // -*- C++ -*- - %require "3.8.2" - %header - -Because our scanner returns only genuine tokens and never simple -characters (i.e., it returns ‘PLUS’, not ‘'+'’), we can avoid -conversions. - - %define api.token.raw - -This example uses genuine C++ objects as semantic values, therefore, we -require the variant-based storage of semantic values. To make sure we -properly use it, we enable assertions. To fully benefit from -type-safety and more natural definition of "symbol", we enable -‘api.token.constructor’. - - %define api.token.constructor - %define api.value.type variant - %define parse.assert - -Then come the declarations/inclusions needed by the semantic values. -Because the parser uses the parsing driver and reciprocally, both would -like to include the header of the other, which is, of course, insane. -This mutual dependency will be broken using forward declarations. -Because the driver's header needs detailed knowledge about the parser -class (in particular its inner types), it is the parser's header which -will use a forward declaration of the driver. *Note %code Summary::. - - %code requires { - # include - class driver; - } - -The driver is passed by reference to the parser and to the scanner. -This provides a simple but effective pure interface, not relying on -global variables. - - // The parsing context. - %param { driver& drv } - -Then we request location tracking. - - %locations - -Use the following two directives to enable parser tracing and detailed -error messages. However, detailed error messages can contain incorrect -information if lookahead correction is not enabled (*note LAC::). - - %define parse.trace - %define parse.error detailed - %define parse.lac full - -The code between ‘%code {’ and ‘}’ is output in the ‘*.cc’ file; it -needs detailed knowledge about the driver. - - %code { - # include "driver.hh" - } - -User friendly names are provided for each symbol. To avoid name clashes -in the generated files (*note Calc++ Scanner::), prefix tokens with -‘TOK_’ (*note %define Summary::). - - %define api.token.prefix {TOK_} - %token - ASSIGN ":=" - MINUS "-" - PLUS "+" - STAR "*" - SLASH "/" - LPAREN "(" - RPAREN ")" - ; - -Since we use variant-based semantic values, ‘%union’ is not used, and -‘%token’, ‘%nterm’ and ‘%type’ expect genuine types, not type tags. - - %token IDENTIFIER "identifier" - %token NUMBER "number" - %nterm exp - -No ‘%destructor’ is needed to enable memory deallocation during error -recovery; the memory, for strings for instance, will be reclaimed by the -regular destructors. All the values are printed using their -‘operator<<’ (*note Printer Decl::). - - %printer { yyo << $$; } <*>; - -The grammar itself is straightforward (*note Location Tracking Calc::). - - %% - %start unit; - unit: assignments exp { drv.result = $2; }; - - assignments: - %empty {} - | assignments assignment {}; - - assignment: - "identifier" ":=" exp { drv.variables[$1] = $3; }; - - %left "+" "-"; - %left "*" "/"; - exp: - "number" - | "identifier" { $$ = drv.variables[$1]; } - | exp "+" exp { $$ = $1 + $3; } - | exp "-" exp { $$ = $1 - $3; } - | exp "*" exp { $$ = $1 * $3; } - | exp "/" exp { $$ = $1 / $3; } - | "(" exp ")" { $$ = $2; } - %% - -Finally the ‘error’ member function reports the errors. - - void - yy::parser::error (const location_type& l, const std::string& m) - { - std::cerr << l << ": " << m << '\n'; - } - - -File: bison.info, Node: Calc++ Scanner, Next: Calc++ Top Level, Prev: Calc++ Parser, Up: A Complete C++ Example - -10.1.8.4 Calc++ Scanner -....................... - -In addition to standard headers, the Flex scanner includes the driver's, -then the parser's to get the set of defined tokens. - - %{ /* -*- C++ -*- */ - # include - # include - # include - # include // strerror - # include - # include "driver.hh" - # include "parser.hh" - %} - -Since our calculator has no ‘#include’-like feature, we don't need -‘yywrap’. We don't need the ‘unput’ and ‘input’ functions either, and -we parse an actual file, this is not an interactive session with the -user. Finally, we enable scanner tracing. - - %option noyywrap nounput noinput batch debug - -The following function will be handy to convert a string denoting a -number into a ‘NUMBER’ token. - - %{ - // A number symbol corresponding to the value in S. - yy::parser::symbol_type - make_NUMBER (const std::string &s, const yy::parser::location_type& loc); - %} - -Abbreviations allow for more readable rules. - - id [a-zA-Z][a-zA-Z_0-9]* - int [0-9]+ - blank [ \t\r] - -The following paragraph suffices to track locations accurately. Each -time ‘yylex’ is invoked, the begin position is moved onto the end -position. Then when a pattern is matched, its width is added to the end -column. When matching ends of lines, the end cursor is adjusted, and -each time blanks are matched, the begin cursor is moved onto the end -cursor to effectively ignore the blanks preceding tokens. Comments -would be treated equally. - - %{ - // Code run each time a pattern is matched. - # define YY_USER_ACTION loc.columns (yyleng); - %} - %% - %{ - // A handy shortcut to the location held by the driver. - yy::location& loc = drv.location; - // Code run each time yylex is called. - loc.step (); - %} - {blank}+ loc.step (); - \n+ loc.lines (yyleng); loc.step (); - -The rules are simple. The driver is used to report errors. - - "-" return yy::parser::make_MINUS (loc); - "+" return yy::parser::make_PLUS (loc); - "*" return yy::parser::make_STAR (loc); - "/" return yy::parser::make_SLASH (loc); - "(" return yy::parser::make_LPAREN (loc); - ")" return yy::parser::make_RPAREN (loc); - ":=" return yy::parser::make_ASSIGN (loc); - - {int} return make_NUMBER (yytext, loc); - {id} return yy::parser::make_IDENTIFIER (yytext, loc); - . { - throw yy::parser::syntax_error - (loc, "invalid character: " + std::string(yytext)); - } - <> return yy::parser::make_YYEOF (loc); - %% - -You should keep your rules simple, both in the parser and in the -scanner. Throwing from the auxiliary functions is then very handy to -report errors. - - yy::parser::symbol_type - make_NUMBER (const std::string &s, const yy::parser::location_type& loc) - { - errno = 0; - long n = strtol (s.c_str(), NULL, 10); - if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE)) - throw yy::parser::syntax_error (loc, "integer is out of range: " + s); - return yy::parser::make_NUMBER ((int) n, loc); - } - -Finally, because the scanner-related driver's member-functions depend on -the scanner's data, it is simpler to implement them in this file. - - void - driver::scan_begin () - { - yy_flex_debug = trace_scanning; - if (file.empty () || file == "-") - yyin = stdin; - else if (!(yyin = fopen (file.c_str (), "r"))) - { - std::cerr << "cannot open " << file << ": " << strerror (errno) << '\n'; - exit (EXIT_FAILURE); - } - } - - void - driver::scan_end () - { - fclose (yyin); - } - - -File: bison.info, Node: Calc++ Top Level, Prev: Calc++ Scanner, Up: A Complete C++ Example - -10.1.8.5 Calc++ Top Level -......................... - -The top level file, ‘calc++.cc’, poses no problem. - - #include - #include "driver.hh" - - int - main (int argc, char *argv[]) - { - int res = 0; - driver drv; - for (int i = 1; i < argc; ++i) - if (argv[i] == std::string ("-p")) - drv.trace_parsing = true; - else if (argv[i] == std::string ("-s")) - drv.trace_scanning = true; - else if (!drv.parse (argv[i])) - std::cout << drv.result << '\n'; - else - res = 1; - return res; - } - - -File: bison.info, Node: D Parsers, Next: Java Parsers, Prev: C++ Parsers, Up: Other Languages - -10.2 D Parsers -============== - -* Menu: - -* D Bison Interface:: Asking for D parser generation -* D Semantic Values:: %token and %nterm vs. D -* D Location Values:: The position and location classes -* D Parser Interface:: Instantiating and running the parser -* D Parser Context Interface:: Circumstances of a syntax error -* D Scanner Interface:: Specifying the scanner for the parser -* D Action Features:: Special features for use in actions -* D Push Parser Interface:: Instantiating and running the push parser -* D Complete Symbols:: Using token constructors - - -File: bison.info, Node: D Bison Interface, Next: D Semantic Values, Up: D Parsers - -10.2.1 D Bison Interface ------------------------- - -The D parser skeletons are selected using the ‘%language "D"’ directive -or the ‘-L D’/‘--language=D’ option. - - When generating a D parser, ‘bison BASENAME.y’ will create a single D -source file named ‘BASENAME.d’ containing the parser implementation. -Using a grammar file without a ‘.y’ suffix is currently broken. The -basename of the parser implementation file can be changed by the -‘%file-prefix’ directive or the ‘-b’/‘--file-prefix’ option. The entire -parser implementation file name can be changed by the ‘%output’ -directive or the ‘-o’/‘--output’ option. The parser implementation file -contains a single class for the parser. - - You can create documentation for generated parsers using Ddoc. - - GLR parsers are currently unsupported in D. Do not use the -‘glr-parser’ directive. - - No header file can be generated for D parsers. Do not use the -‘%header’ directive or the ‘-d’/‘--header’ options. - - -File: bison.info, Node: D Semantic Values, Next: D Location Values, Prev: D Bison Interface, Up: D Parsers - -10.2.2 D Semantic Values ------------------------- - -Semantic types are handled by ‘%union’ and ‘%define api.value.type -union’, similar to C/C++ parsers. In the latter case, the union of the -values is handled by the backend. In D, unions can hold classes, -structs, etc., so this directive is more similar to ‘%define -api.value.type variant’ from C++. - - D parsers do not support ‘%destructor’, since the language adopts -garbage collection. The parser will try to hold references to semantic -values for as little time as needed. - - D parsers support ‘%printer’. An example for the output of type -‘int’, where ‘yyo’ is the parser's debug output: - - %printer { yyo.write($$); } - - -File: bison.info, Node: D Location Values, Next: D Parser Interface, Prev: D Semantic Values, Up: D Parsers - -10.2.3 D Location Values ------------------------- - -When the directive ‘%locations’ is used, the D parser supports location -tracking, see *note Tracking Locations::. The position and the location -structures are provided. - - -- Instance Variable of Location: Position begin - -- Instance Variable of Location: Position end - The first, inclusive, position of the range, and the first beyond. - - -- Constructor on Location: this(Position LOC) - Create a ‘Location’ denoting an empty range located at a given - point. - - -- Constructor on Location: this(Position BEGIN, Position END) - Create a ‘Location’ from the endpoints of the range. - - -- Method on Location: string toString() - The range represented by the location as a string. - - -File: bison.info, Node: D Parser Interface, Next: D Parser Context Interface, Prev: D Location Values, Up: D Parsers - -10.2.4 D Parser Interface -------------------------- - -The name of the generated parser class defaults to ‘YYParser’. The ‘YY’ -prefix may be changed using the ‘%define api.prefix’. Alternatively, -use ‘%define api.parser.class {NAME}’ to give a custom name to the -class. The interface of this class is detailed below. - - By default, the parser class has public visibility. To add modifiers -to the parser class, ‘%define’ ‘api.parser.public’, -‘api.parser.abstract’ and/or ‘api.parser.final’. - - The superclass and the implemented interfaces of the parser class can -be specified with the ‘%define api.parser.extends’ and ‘%define -api.parser.implements’ directives. - - The parser class defines an interface, ‘Lexer’ (*note D Scanner -Interface::). Other than this interface and the members described in -the interface below, all the other members and fields are preceded with -a ‘yy’ or ‘YY’ prefix to avoid clashes with user code. - - The parser class can be extended using the ‘%parse-param’ directive. -Each occurrence of the directive will add a by default public field to -the parser class, and an argument to its constructor, which initializes -them automatically. - - -- Constructor on YYParser: this(LEX_PARAM, ..., PARSE_PARAM, ...) - Build a new parser object with embedded ‘%code lexer’. There are - no parameters, unless ‘%param’s and/or ‘%parse-param’s and/or - ‘%lex-param’s are used. - - -- Constructor on YYParser: this(Lexer LEXER, PARSE_PARAM, ...) - Build a new parser object using the specified scanner. There are - no additional parameters unless ‘%param’s and/or ‘%parse-param’s - are used. - - -- Method on YYParser: boolean parse() - Run the syntactic analysis, and return ‘true’ on success, ‘false’ - otherwise. - - -- Method on YYParser: boolean getErrorVerbose() - -- Method on YYParser: void setErrorVerbose(boolean VERBOSE) - Get or set the option to produce verbose error messages. These are - only available with ‘%define parse.error detailed’, which also - turns on verbose error messages. - - -- Method on YYParser: void yyerror(string MSG) - -- Method on YYParser: void yyerror(Location LOC, string MSG) - Print an error message using the ‘yyerror’ method of the scanner - instance in use. The ‘Location’ and ‘Position’ parameters are - available only if location tracking is active. - - -- Method on YYParser: boolean recovering() - During the syntactic analysis, return ‘true’ if recovering from a - syntax error. *Note Error Recovery::. - - -- Method on YYParser: File getDebugStream() - -- Method on YYParser: void setDebugStream(File O) - Get or set the stream used for tracing the parsing. It defaults to - ‘stderr’. - - -- Method on YYParser: int getDebugLevel() - -- Method on YYParser: void setDebugLevel(int L) - Get or set the tracing level. Currently its value is either 0, no - trace, or nonzero, full tracing. - - -- Constant of YYParser: string bisonVersion - -- Constant of YYParser: string bisonSkeleton - Identify the Bison version and skeleton used to generate this - parser. - - The internationalization in D is very similar to the one in C. The D -parser uses ‘dgettext’ for translating Bison messages. - - To enable internationalization, compile using ‘-version ENABLE_NLS --version YYENABLE_NLS’ and import ‘bindtextdomain’ and ‘textdomain’ from -C: - - extern(C) char* bindtextdomain(const char* domainname, const char* dirname); - extern(C) char* textdomain(const char* domainname); - - The main function should load the translation catalogs, similarly to -the ‘c/bistromathic’ example: - - int main() - { - import core.stdc.locale; - - // Set up internationalization. - setlocale(LC_ALL, ""); - // Use Bison's standard translation catalog for error messages - // (the generated messages). - bindtextdomain("bison-runtime", BISON_LOCALEDIR); - // For the translation catalog of your own project, use the - // name of your project. - bindtextdomain("bison", LOCALEDIR); - textdomain("bison"); - - // usual main content - ... - } - - For user message translations, the user must implement the ‘string -_(const char* MSG)’ function. It is recommended to use ‘gettext’: - - %code imports { - static if (!is(typeof(_))) - { - version(ENABLE_NLS) - { - extern(C) char* gettext(const char*); - string _(const char* s) - { - return to!string(gettext(s)); - } - } - } - static if (!is(typeof(_))) - { - pragma(inline, true) - string _(string msg) { return msg; } - } - } - - -File: bison.info, Node: D Parser Context Interface, Next: D Scanner Interface, Prev: D Parser Interface, Up: D Parsers - -10.2.5 D Parser Context Interface ---------------------------------- - -The parser context provides information to build error reports when you -invoke ‘%define parse.error custom’. - - -- Type of YYParser: SymbolKind - A struct containing an enum of all the grammar symbols, tokens and - nonterminals. Its enumerators are forged from the symbol names. - Use ‘void toString(W)(W sink)’ to get the symbol names. - - -- Method on YYParser.Context: YYParser.SymbolKind getToken() - The kind of the lookahead. Return ‘null’ iff there is no - lookahead. - - -- Method on YYParser.Context: YYParser.Location getLocation() - The location of the lookahead. - - -- Method on YYParser.Context: int - getExpectedTokens(YYParser.SymbolKind[] ARGV, int ARGC) - Fill ARGV with the expected tokens, which never includes - ‘SymbolKind.YYERROR’, or ‘SymbolKind.YYUNDEF’. - - Never put more than ARGC elements into ARGV, and on success return - the number of tokens stored in ARGV. If there are more expected - tokens than ARGC, fill ARGV up to ARGC and return 0. If there are - no expected tokens, also return 0, but set ‘argv[0]’ to ‘null’. - - If ARGV is null, return the size needed to store all the possible - values, which is always less than ‘YYNTOKENS’. - - -File: bison.info, Node: D Scanner Interface, Next: D Action Features, Prev: D Parser Context Interface, Up: D Parsers - -10.2.6 D Scanner Interface --------------------------- - -There are two possible ways to interface a Bison-generated D parser with -a scanner: the scanner may be defined by ‘%code lexer’, or defined -elsewhere. In either case, the scanner has to implement the ‘Lexer’ -inner interface of the parser class. This interface also contains -constants for all user-defined token names and the predefined ‘YYEOF’ -token. - - In the first case, the body of the scanner class is placed in ‘%code -lexer’ blocks. If you want to pass parameters from the parser -constructor to the scanner constructor, specify them with ‘%lex-param’; -they are passed before ‘%parse-param’s to the constructor. - - In the second case, the scanner has to implement the ‘Lexer’ -interface, which is defined within the parser class (e.g., -‘YYParser.Lexer’). The constructor of the parser object will then -accept an object implementing the interface; ‘%lex-param’ is not used in -this case. - - In both cases, the scanner has to implement the following methods. - - -- Method on Lexer: void yyerror(Location LOC, string MSG) - This method is defined by the user to emit an error message. The - first parameter is omitted if location tracking is not active. - - -- Method on Lexer: Symbol yylex() - Return the next token. The return value is of type ‘Symbol’, which - binds together the kind, the semantic value and the location. - - -- Method on Lexer: void reportSyntaxError(YYParser.Context CTX) - If you invoke ‘%define parse.error custom’ (*note Bison - Declarations::), then the parser no longer passes syntax error - messages to ‘yyerror’, rather it delegates that task to the user by - calling the ‘reportSyntaxError’ function. - - Whether it uses ‘yyerror’ is up to the user. - - Here is an example of a reporting function (*note D Parser Context - Interface::). - - public void reportSyntaxError(YYParser.Context ctx) - { - stderr.write(ctx.getLocation(), ": syntax error"); - // Report the expected tokens. - { - immutable int TOKENMAX = 5; - YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX]; - int n = ctx.getExpectedTokens(arg, TOKENMAX); - if (n < TOKENMAX) - for (int i = 0; i < n; ++i) - stderr.write((i == 0 ? ": expected " : " or "), arg[i]); - } - // Report the unexpected token which triggered the error. - { - YYParser.SymbolKind lookahead = ctx.getToken(); - stderr.writeln(" before ", lookahead); - } - } - - This implementation is inappropriate for internationalization, see - the ‘c/bistromathic’ example for a better alternative. - - -File: bison.info, Node: D Action Features, Next: D Push Parser Interface, Prev: D Scanner Interface, Up: D Parsers - -10.2.7 Special Features for Use in D Actions --------------------------------------------- - -Here is a table of Bison constructs, variables and functions that are -useful in actions. - - -- Variable: $$ - Acts like a variable that contains the semantic value for the - grouping made by the current rule. *Note Actions::. - - -- Variable: $N - Acts like a variable that contains the semantic value for the Nth - component of the current rule. *Note Actions::. - - -- Function: yyerrok - Resume generating error messages immediately for subsequent syntax - errors. This is useful primarily in error rules. *Note Error - Recovery::. - - -File: bison.info, Node: D Push Parser Interface, Next: D Complete Symbols, Prev: D Action Features, Up: D Parsers - -10.2.8 D Push Parser Interface ------------------------------- - -Normally, Bison generates a pull parser for D. The following Bison -declaration says that you want the parser to be a push parser (*note -%define Summary::): - - %define api.push-pull push - - Most of the discussion about the D pull Parser Interface, (*note D -Parser Interface::) applies to the push parser interface as well. - - When generating a push parser, the method ‘pushParse’ is created with -the following signature: - - -- Method on YYParser: int pushParse (Symbol SYM) - - The primary difference with respect to a pull parser is that the -parser method ‘pushParse’ is invoked repeatedly to parse each token. -This function is available if either the ‘%define api.push-pull push’ or -‘%define api.push-pull both’ declaration is used (*note %define -Summary::). - - The value returned by the ‘pushParse’ method is one of the following: -‘ACCEPT’, ‘ABORT’, or ‘PUSH_MORE’. This new value, ‘PUSH_MORE’, may be -returned if more input is required to finish parsing the input. - - If ‘api.push-pull’ is defined as ‘both’, then the generated parser -class will also implement the ‘parse’ method. This method's body is a -loop that repeatedly invokes the scanner and then passes the values -obtained from the scanner to the ‘pushParse’ method. - - -File: bison.info, Node: D Complete Symbols, Prev: D Push Parser Interface, Up: D Parsers - -10.2.9 D Complete Symbols -------------------------- - -To build return values for ‘yylex’, call the ‘Symbol’ method of the same -name as the token kind reported, and adding the parameters for value and -location if necessary. These methods generate compile-time errors if -the parameters are inconsistent. Token constructors work with both -‘%union’ and ‘%define api.value.type union’. - - The order of the parameters is the same as for the ‘Symbol’ -constructor. An example for the token kind ‘NUM’, which has value -‘ival’ and with location tracking activated: - - Symbol.NUM(ival, location); - - -File: bison.info, Node: Java Parsers, Prev: D Parsers, Up: Other Languages - -10.3 Java Parsers -================= - -* Menu: - -* Java Bison Interface:: Asking for Java parser generation -* Java Semantic Values:: %token and %nterm vs. Java -* Java Location Values:: The position and location classes -* Java Parser Interface:: Instantiating and running the parser -* Java Parser Context Interface:: Circumstances of a syntax error -* Java Scanner Interface:: Specifying the scanner for the parser -* Java Action Features:: Special features for use in actions -* Java Push Parser Interface:: Instantiating and running the push parser -* Java Differences:: Differences between C/C++ and Java Grammars -* Java Declarations Summary:: List of Bison declarations used with Java - - -File: bison.info, Node: Java Bison Interface, Next: Java Semantic Values, Up: Java Parsers - -10.3.1 Java Bison Interface ---------------------------- - -The Java parser skeletons are selected using the ‘%language "Java"’ -directive or the ‘-L java’/‘--language=java’ option. - - When generating a Java parser, ‘bison BASENAME.y’ will create a -single Java source file named ‘BASENAME.java’ containing the parser -implementation. Using a grammar file without a ‘.y’ suffix is currently -broken. The basename of the parser implementation file can be changed -by the ‘%file-prefix’ directive or the ‘-b’/‘--file-prefix’ option. The -entire parser implementation file name can be changed by the ‘%output’ -directive or the ‘-o’/‘--output’ option. The parser implementation file -contains a single class for the parser. - - You can create documentation for generated parsers using Javadoc. - - Contrary to C parsers, Java parsers do not use global variables; the -state of the parser is always local to an instance of the parser class. -Therefore, all Java parsers are "pure", and the ‘%define api.pure’ -directive does nothing when used in Java. - - GLR parsers are currently unsupported in Java. Do not use the -‘glr-parser’ directive. - - No header file can be generated for Java parsers. Do not use the -‘%header’ directive or the ‘-d’/‘-H’/‘--header’ options. - - Currently, support for tracing is always compiled in. Thus the -‘%define parse.trace’ and ‘%token-table’ directives and the -‘-t’/‘--debug’ and ‘-k’/‘--token-table’ options have no effect. This -may change in the future to eliminate unused code in the generated -parser, so use ‘%define parse.trace’ explicitly if needed. Also, in the -future the ‘%token-table’ directive might enable a public interface to -access the token names and codes. - - Getting a "code too large" error from the Java compiler means the -code hit the 64KB bytecode per method limitation of the Java class file. -Try reducing the amount of code in actions and static initializers; -otherwise, report a bug so that the parser skeleton will be improved. - - -File: bison.info, Node: Java Semantic Values, Next: Java Location Values, Prev: Java Bison Interface, Up: Java Parsers - -10.3.2 Java Semantic Values ---------------------------- - -There is no ‘%union’ directive in Java parsers. Instead, the semantic -values' types (class names) should be specified in the ‘%nterm’ or -‘%token’ directive: - - %nterm expr assignment_expr term factor - %nterm number - - By default, the semantic stack is declared to have ‘Object’ members, -which means that the class types you specify can be of any class. To -improve the type safety of the parser, you can declare the common -superclass of all the semantic values using the ‘%define api.value.type’ -directive. For example, after the following declaration: - - %define api.value.type {ASTNode} - -any ‘%token’, ‘%nterm’ or ‘%type’ specifying a semantic type which is -not a subclass of ‘ASTNode’, will cause a compile-time error. - - Types used in the directives may be qualified with a package name. -Primitive data types are accepted for Java version 1.5 or later. Note -that in this case the autoboxing feature of Java 1.5 will be used. -Generic types may not be used; this is due to a limitation in the -implementation of Bison, and may change in future releases. - - Java parsers do not support ‘%destructor’, since the language adopts -garbage collection. The parser will try to hold references to semantic -values for as little time as needed. - - Java parsers do not support ‘%printer’, as ‘toString()’ can be used -to print the semantic values. This however may change (in a -backwards-compatible way) in future versions of Bison. - - -File: bison.info, Node: Java Location Values, Next: Java Parser Interface, Prev: Java Semantic Values, Up: Java Parsers - -10.3.3 Java Location Values ---------------------------- - -When the directive ‘%locations’ is used, the Java parser supports -location tracking, see *note Tracking Locations::. An auxiliary -user-defined class defines a “position”, a single point in a file; Bison -itself defines a class representing a “location”, a range composed of a -pair of positions (possibly spanning several files). The location class -is an inner class of the parser; the name is ‘Location’ by default, and -may also be renamed using ‘%define api.location.type {CLASS-NAME}’. - - The location class treats the position as a completely opaque value. -By default, the class name is ‘Position’, but this can be changed with -‘%define api.position.type {CLASS-NAME}’. This class must be supplied -by the user. - - -- Instance Variable of Location: Position begin - -- Instance Variable of Location: Position end - The first, inclusive, position of the range, and the first beyond. - - -- Constructor on Location: Location (Position LOC) - Create a ‘Location’ denoting an empty range located at a given - point. - - -- Constructor on Location: Location (Position BEGIN, Position END) - Create a ‘Location’ from the endpoints of the range. - - -- Method on Location: String toString () - Prints the range represented by the location. For this to work - properly, the position class should override the ‘equals’ and - ‘toString’ methods appropriately. - - -File: bison.info, Node: Java Parser Interface, Next: Java Parser Context Interface, Prev: Java Location Values, Up: Java Parsers - -10.3.4 Java Parser Interface ----------------------------- - -The name of the generated parser class defaults to ‘YYParser’. The ‘YY’ -prefix may be changed using the ‘%define api.prefix’. Alternatively, -use ‘%define api.parser.class {NAME}’ to give a custom name to the -class. The interface of this class is detailed below. - - By default, the parser class has package visibility. A declaration -‘%define api.parser.public’ will change to public visibility. Remember -that, according to the Java language specification, the name of the -‘.java’ file should match the name of the class in this case. -Similarly, you can use ‘api.parser.abstract’, ‘api.parser.final’ and -‘api.parser.strictfp’ with the ‘%define’ declaration to add other -modifiers to the parser class. A single ‘%define api.parser.annotations -{ANNOTATIONS}’ directive can be used to add any number of annotations to -the parser class. - - The Java package name of the parser class can be specified using the -‘%define package’ directive. The superclass and the implemented -interfaces of the parser class can be specified with the ‘%define -api.parser.extends’ and ‘%define api.parser.implements’ directives. - - The parser class defines an inner class, ‘Location’, that is used for -location tracking (see *note Java Location Values::), and a inner -interface, ‘Lexer’ (see *note Java Scanner Interface::). Other than -these inner class/interface, and the members described in the interface -below, all the other members and fields are preceded with a ‘yy’ or ‘YY’ -prefix to avoid clashes with user code. - - The parser class can be extended using the ‘%parse-param’ directive. -Each occurrence of the directive will add a ‘protected final’ field to -the parser class, and an argument to its constructor, which initializes -them automatically. - - -- Constructor on YYParser: YYParser (LEX_PARAM, ..., PARSE_PARAM, ...) - Build a new parser object with embedded ‘%code lexer’. There are - no parameters, unless ‘%param’s and/or ‘%parse-param’s and/or - ‘%lex-param’s are used. - - Use ‘%code init’ for code added to the start of the constructor - body. This is especially useful to initialize superclasses. Use - ‘%define init_throws’ to specify any uncaught exceptions. - - -- Constructor on YYParser: YYParser (Lexer LEXER, PARSE_PARAM, ...) - Build a new parser object using the specified scanner. There are - no additional parameters unless ‘%param’s and/or ‘%parse-param’s - are used. - - If the scanner is defined by ‘%code lexer’, this constructor is - declared ‘protected’ and is called automatically with a scanner - created with the correct ‘%param’s and/or ‘%lex-param’s. - - Use ‘%code init’ for code added to the start of the constructor - body. This is especially useful to initialize superclasses. Use - ‘%define init_throws’ to specify any uncaught exceptions. - - -- Method on YYParser: boolean parse () - Run the syntactic analysis, and return ‘true’ on success, ‘false’ - otherwise. - - -- Method on YYParser: boolean getErrorVerbose () - -- Method on YYParser: void setErrorVerbose (boolean VERBOSE) - Get or set the option to produce verbose error messages. These are - only available with ‘%define parse.error detailed’ (or ‘verbose’), - which also turns on verbose error messages. - - -- Method on YYParser: void yyerror (String MSG) - -- Method on YYParser: void yyerror (Position POS, String MSG) - -- Method on YYParser: void yyerror (Location LOC, String MSG) - Print an error message using the ‘yyerror’ method of the scanner - instance in use. The ‘Location’ and ‘Position’ parameters are - available only if location tracking is active. - - -- Method on YYParser: boolean recovering () - During the syntactic analysis, return ‘true’ if recovering from a - syntax error. *Note Error Recovery::. - - -- Method on YYParser: java.io.PrintStream getDebugStream () - -- Method on YYParser: void setDebugStream (java.io.PrintStream O) - Get or set the stream used for tracing the parsing. It defaults to - ‘System.err’. - - -- Method on YYParser: int getDebugLevel () - -- Method on YYParser: void setDebugLevel (int L) - Get or set the tracing level. Currently its value is either 0, no - trace, or nonzero, full tracing. - - -- Constant of YYParser: String bisonVersion - -- Constant of YYParser: String bisonSkeleton - Identify the Bison version and skeleton used to generate this - parser. - - If you enabled token internationalization (*note Token I18n::), you -must provide the parser with the following function: - - -- Static Method of YYParser: String i18n (string S) - Return the translation of S in the user's language. As an example: - - %code { - static ResourceBundle myResources - = ResourceBundle.getBundle("domain-name"); - static final String i18n(String s) { - return myResources.getString(s); - } - } - - -File: bison.info, Node: Java Parser Context Interface, Next: Java Scanner Interface, Prev: Java Parser Interface, Up: Java Parsers - -10.3.5 Java Parser Context Interface ------------------------------------- - -The parser context provides information to build error reports when you -invoke ‘%define parse.error custom’. - - -- Type of YYParser: SymbolKind - An enum of all the grammar symbols, tokens and nonterminals. Its - enumerators are forged from the symbol names: - - public enum SymbolKind - { - S_YYEOF(0), /* "end of file" */ - S_YYERROR(1), /* error */ - S_YYUNDEF(2), /* "invalid token" */ - S_BANG(3), /* "!" */ - S_PLUS(4), /* "+" */ - S_MINUS(5), /* "-" */ - [...] - S_NUM(13), /* "number" */ - S_NEG(14), /* NEG */ - S_YYACCEPT(15), /* $accept */ - S_input(16), /* input */ - S_line(17); /* line */ - }; - - -- Method on YYParser.SymbolKind: String getName () - The name of this symbol, possibly translated. - - -- Method on YYParser.Context: YYParser.SymbolKind getToken () - The kind of the lookahead. Return ‘null’ iff there is no - lookahead. - - -- Method on YYParser.Context: YYParser.Location getLocation () - The location of the lookahead. - - -- Method on YYParser.Context: int getExpectedTokens - (YYParser.SymbolKind[] ARGV, int ARGC) - Fill ARGV with the expected tokens, which never includes - ‘SymbolKind.S_YYERROR’, or ‘SymbolKind.S_YYUNDEF’. - - Never put more than ARGC elements into ARGV, and on success return - the number of tokens stored in ARGV. If there are more expected - tokens than ARGC, fill ARGV up to ARGC and return 0. If there are - no expected tokens, also return 0, but set ‘argv[0]’ to ‘null’. - - If ARGV is null, return the size needed to store all the possible - values, which is always less than ‘YYNTOKENS’. - - -File: bison.info, Node: Java Scanner Interface, Next: Java Action Features, Prev: Java Parser Context Interface, Up: Java Parsers - -10.3.6 Java Scanner Interface ------------------------------ - -There are two possible ways to interface a Bison-generated Java parser -with a scanner: the scanner may be defined by ‘%code lexer’, or defined -elsewhere. In either case, the scanner has to implement the ‘Lexer’ -inner interface of the parser class. This interface also contains -constants for all user-defined token names and the predefined ‘YYEOF’ -token. - - In the first case, the body of the scanner class is placed in ‘%code -lexer’ blocks. If you want to pass parameters from the parser -constructor to the scanner constructor, specify them with ‘%lex-param’; -they are passed before ‘%parse-param’s to the constructor. - - In the second case, the scanner has to implement the ‘Lexer’ -interface, which is defined within the parser class (e.g., -‘YYParser.Lexer’). The constructor of the parser object will then -accept an object implementing the interface; ‘%lex-param’ is not used in -this case. - - In both cases, the scanner has to implement the following methods. - - -- Method on Lexer: void yyerror (Location LOC, String MSG) - This method is defined by the user to emit an error message. The - first parameter is omitted if location tracking is not active. Its - type can be changed using ‘%define api.location.type {CLASS-NAME}’. - - -- Method on Lexer: int yylex () - Return the next token. Its type is the return value, its semantic - value and location are saved and returned by the their methods in - the interface. Not needed for push-only parsers. - - Use ‘%define lex_throws’ to specify any uncaught exceptions. - Default is ‘java.io.IOException’. - - -- Method on Lexer: Position getStartPos () - -- Method on Lexer: Position getEndPos () - Return respectively the first position of the last token that - ‘yylex’ returned, and the first position beyond it. These methods - are not needed unless location tracking and pull parsing are - active. - - They should return new objects for each call, to avoid that all the - symbol share the same Position boundaries. - - The return type can be changed using ‘%define api.position.type - {CLASS-NAME}’. - - -- Method on Lexer: Object getLVal () - Return the semantic value of the last token that yylex returned. - Not needed for push-only parsers. - - The return type can be changed using ‘%define api.value.type - {CLASS-NAME}’. - - -- Method on Lexer: void reportSyntaxError (YYParser.Context CTX) - If you invoke ‘%define parse.error custom’ (*note Bison - Declarations::), then the parser no longer passes syntax error - messages to ‘yyerror’, rather it delegates that task to the user by - calling the ‘reportSyntaxError’ function. - - Whether it uses ‘yyerror’ is up to the user. - - Here is an example of a reporting function (*note Java Parser - Context Interface::). - - public void reportSyntaxError(YYParser.Context ctx) { - System.err.print(ctx.getLocation() + ": syntax error"); - // Report the expected tokens. - { - final int TOKENMAX = 5; - YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX]; - int n = ctx.getExpectedTokens(arg, TOKENMAX); - for (int i = 0; i < n; ++i) - System.err.print((i == 0 ? ": expected " : " or ") - + arg[i].getName()); - } - // Report the unexpected token which triggered the error. - { - YYParser.SymbolKind lookahead = ctx.getToken(); - if (lookahead != null) - System.err.print(" before " + lookahead.getName()); - } - System.err.println(""); - } - - This implementation is inappropriate for internationalization, see - the ‘c/bistromathic’ example for a better alternative. - - -File: bison.info, Node: Java Action Features, Next: Java Push Parser Interface, Prev: Java Scanner Interface, Up: Java Parsers - -10.3.7 Special Features for Use in Java Actions ------------------------------------------------ - -The following special constructs can be uses in Java actions. Other -analogous C action features are currently unavailable for Java. - - Use ‘%define throws’ to specify any uncaught exceptions from parser -actions, and initial actions specified by ‘%initial-action’. - - -- Variable: $N - The semantic value for the Nth component of the current rule. This - may not be assigned to. *Note Java Semantic Values::. - - -- Variable: $N - Like ‘$N’ but specifies a alternative type TYPEALT. *Note Java - Semantic Values::. - - -- Variable: $$ - The semantic value for the grouping made by the current rule. As a - value, this is in the base type (‘Object’ or as specified by - ‘%define api.value.type’) as in not cast to the declared subtype - because casts are not allowed on the left-hand side of Java - assignments. Use an explicit Java cast if the correct subtype is - needed. *Note Java Semantic Values::. - - -- Variable: $$ - Same as ‘$$’ since Java always allow assigning to the base type. - Perhaps we should use this and ‘$<>$’ for the value and ‘$$’ for - setting the value but there is currently no easy way to distinguish - these constructs. *Note Java Semantic Values::. - - -- Variable: @N - The location information of the Nth component of the current rule. - This may not be assigned to. *Note Java Location Values::. - - -- Variable: @$ - The location information of the grouping made by the current rule. - *Note Java Location Values::. - - -- Statement: return YYABORT ; - Return immediately from the parser, indicating failure. *Note Java - Parser Interface::. - - -- Statement: return YYACCEPT ; - Return immediately from the parser, indicating success. *Note Java - Parser Interface::. - - -- Statement: return YYERROR ; - Start error recovery (without printing an error message). *Note - Error Recovery::. - - -- Function: boolean recovering () - Return whether error recovery is being done. In this state, the - parser reads token until it reaches a known state, and then - restarts normal operation. *Note Error Recovery::. - - -- Function: void yyerror (String MSG) - -- Function: void yyerror (Position LOC, String MSG) - -- Function: void yyerror (Location LOC, String MSG) - Print an error message using the ‘yyerror’ method of the scanner - instance in use. The ‘Location’ and ‘Position’ parameters are - available only if location tracking is active. - - -File: bison.info, Node: Java Push Parser Interface, Next: Java Differences, Prev: Java Action Features, Up: Java Parsers - -10.3.8 Java Push Parser Interface ---------------------------------- - -Normally, Bison generates a pull parser for Java. The following Bison -declaration says that you want the parser to be a push parser (*note -%define Summary::): - - %define api.push-pull push - - Most of the discussion about the Java pull Parser Interface, (*note -Java Parser Interface::) applies to the push parser interface as well. - - When generating a push parser, the method ‘push_parse’ is created -with the following signature (depending on if locations are enabled). - - -- Method on YYParser: void push_parse (int TOKEN, Object YYLVAL) - -- Method on YYParser: void push_parse (int TOKEN, Object YYLVAL, - Location YYLOC) - -- Method on YYParser: void push_parse (int TOKEN, Object YYLVAL, - Position YYPOS) - - The primary difference with respect to a pull parser is that the -parser method ‘push_parse’ is invoked repeatedly to parse each token. -This function is available if either the ‘%define api.push-pull push’ or -‘%define api.push-pull both’ declaration is used (*note %define -Summary::). The ‘Location’ and ‘Position’ parameters are available only -if location tracking is active. - - The value returned by the ‘push_parse’ method is one of the -following: 0 (success), 1 (abort), 2 (memory exhaustion), or -‘YYPUSH_MORE’. This new value, ‘YYPUSH_MORE’, may be returned if more -input is required to finish parsing the grammar. - - If ‘api.push-pull’ is defined as ‘both’, then the generated parser -class will also implement the ‘parse’ method. This method's body is a -loop that repeatedly invokes the scanner and then passes the values -obtained from the scanner to the ‘push_parse’ method. - - There is one additional complication. Technically, the push parser -does not need to know about the scanner (i.e. an object implementing -the ‘YYParser.Lexer’ interface), but it does need access to the -‘yyerror’ method. Currently, the ‘yyerror’ method is defined in the -‘YYParser.Lexer’ interface. Hence, an implementation of that interface -is still required in order to provide an implementation of ‘yyerror’. -The current approach (and subject to change) is to require the -‘YYParser’ constructor to be given an object implementing the -‘YYParser.Lexer’ interface. This object need only implement the -‘yyerror’ method; the other methods can be stubbed since they will never -be invoked. The simplest way to do this is to add a trivial scanner -implementation to your grammar file using whatever implementation of -‘yyerror’ is desired. The following code sample shows a simple way to -accomplish this. - - %code lexer - { - public Object getLVal () {return null;} - public int yylex () {return 0;} - public void yyerror (String s) {System.err.println(s);} - } - - -File: bison.info, Node: Java Differences, Next: Java Declarations Summary, Prev: Java Push Parser Interface, Up: Java Parsers - -10.3.9 Differences between C/C++ and Java Grammars --------------------------------------------------- - -The different structure of the Java language forces several differences -between C/C++ grammars, and grammars designed for Java parsers. This -section summarizes these differences. - - • Java has no a preprocessor, so obviously the ‘YYERROR’, ‘YYACCEPT’, - ‘YYABORT’ symbols (*note Table of Symbols::) cannot be macros. - Instead, they should be preceded by ‘return’ when they appear in an - action. The actual definition of these symbols is opaque to the - Bison grammar, and it might change in the future. The only - meaningful operation that you can do, is to return them. *Note - Java Action Features::. - - Note that of these three symbols, only ‘YYACCEPT’ and ‘YYABORT’ - will cause a return from the ‘yyparse’ method(1). - - • Java lacks unions, so ‘%union’ has no effect. Instead, semantic - values have a common base type: ‘Object’ or as specified by - ‘%define api.value.type’. Angle brackets on ‘%token’, ‘type’, ‘$N’ - and ‘$$’ specify subtypes rather than fields of an union. The type - of ‘$$’, even with angle brackets, is the base type since Java - casts are not allow on the left-hand side of assignments. Also, - ‘$N’ and ‘@N’ are not allowed on the left-hand side of assignments. - *Note Java Semantic Values::, and *note Java Action Features::. - - • The prologue declarations have a different meaning than in C/C++ - code. - ‘%code imports’ - blocks are placed at the beginning of the Java source code. - They may include copyright notices. For a ‘package’ - declarations, use ‘%define api.package’ instead. - - unqualified ‘%code’ - blocks are placed inside the parser class. - - ‘%code lexer’ - blocks, if specified, should include the implementation of the - scanner. If there is no such block, the scanner can be any - class that implements the appropriate interface (*note Java - Scanner Interface::). - - Other ‘%code’ blocks are not supported in Java parsers. In - particular, ‘%{ ... %}’ blocks should not be used and may give an - error in future versions of Bison. - - The epilogue has the same meaning as in C/C++ code and it can be - used to define other classes used by the parser _outside_ the - parser class. - - ---------- Footnotes ---------- - - (1) Java parsers include the actions in a separate method than -‘yyparse’ in order to have an intuitive syntax that corresponds to these -C macros. - - -File: bison.info, Node: Java Declarations Summary, Prev: Java Differences, Up: Java Parsers - -10.3.10 Java Declarations Summary ---------------------------------- - -This summary only include declarations specific to Java or have special -meaning when used in a Java parser. - - -- Directive: %language "Java" - Generate a Java class for the parser. - - -- Directive: %lex-param {TYPE NAME} - A parameter for the lexer class defined by ‘%code lexer’ _only_, - added as parameters to the lexer constructor and the parser - constructor that _creates_ a lexer. Default is none. *Note Java - Scanner Interface::. - - -- Directive: %parse-param {TYPE NAME} - A parameter for the parser class added as parameters to - constructor(s) and as fields initialized by the constructor(s). - Default is none. *Note Java Parser Interface::. - - -- Directive: %token TOKEN ... - Declare tokens. Note that the angle brackets enclose a Java - _type_. *Note Java Semantic Values::. - - -- Directive: %nterm NONTERMINAL ... - Declare the type of nonterminals. Note that the angle brackets - enclose a Java _type_. *Note Java Semantic Values::. - - -- Directive: %code { CODE ... } - Code appended to the inside of the parser class. *Note Java - Differences::. - - -- Directive: %code imports { CODE ... } - Code inserted just after the ‘package’ declaration. *Note Java - Differences::. - - -- Directive: %code init { CODE ... } - Code inserted at the beginning of the parser constructor body. - *Note Java Parser Interface::. - - -- Directive: %code lexer { CODE ... } - Code added to the body of a inner lexer class within the parser - class. *Note Java Scanner Interface::. - - -- Directive: %% CODE ... - Code (after the second ‘%%’) appended to the end of the file, - _outside_ the parser class. *Note Java Differences::. - - -- Directive: %{ CODE ... %} - Not supported. Use ‘%code imports’ instead. *Note Java - Differences::. - - -- Directive: %define api.prefix {PREFIX} - The prefix of the parser class name ‘PREFIXParser’ if ‘%define - api.parser.class’ is not used. Default is ‘YY’. *Note Java Bison - Interface::. - - -- Directive: %define api.parser.abstract - Whether the parser class is declared ‘abstract’. Default is false. - *Note Java Bison Interface::. - - -- Directive: %define api.parser.annotations {ANNOTATIONS} - The Java annotations for the parser class. Default is none. *Note - Java Bison Interface::. - - -- Directive: %define api.parser.class {NAME} - The name of the parser class. Default is ‘YYParser’ or - ‘API.PREFIXParser’. *Note Java Bison Interface::. - - -- Directive: %define api.parser.extends {SUPERCLASS} - The superclass of the parser class. Default is none. *Note Java - Bison Interface::. - - -- Directive: %define api.parser.final - Whether the parser class is declared ‘final’. Default is false. - *Note Java Bison Interface::. - - -- Directive: %define api.parser.implements {INTERFACES} - The implemented interfaces of the parser class, a comma-separated - list. Default is none. *Note Java Bison Interface::. - - -- Directive: %define api.parser.public - Whether the parser class is declared ‘public’. Default is false. - *Note Java Bison Interface::. - - -- Directive: %define api.parser.strictfp - Whether the parser class is declared ‘strictfp’. Default is false. - *Note Java Bison Interface::. - - -- Directive: %define init_throws {EXCEPTIONS} - The exceptions thrown by ‘%code init’ from the parser class - constructor. Default is none. *Note Java Parser Interface::. - - -- Directive: %define lex_throws {EXCEPTIONS} - The exceptions thrown by the ‘yylex’ method of the lexer, a - comma-separated list. Default is ‘java.io.IOException’. *Note - Java Scanner Interface::. - - -- Directive: %define api.location.type {CLASS} - The name of the class used for locations (a range between two - positions). This class is generated as an inner class of the - parser class by ‘bison’. Default is ‘Location’. Formerly named - ‘location_type’. *Note Java Location Values::. - - -- Directive: %define api.package {PACKAGE} - The package to put the parser class in. Default is none. *Note - Java Bison Interface::. Renamed from ‘package’ in Bison 3.7. - - -- Directive: %define api.position.type {CLASS} - The name of the class used for positions. This class must be - supplied by the user. Default is ‘Position’. Formerly named - ‘position_type’. *Note Java Location Values::. - - -- Directive: %define api.value.type {CLASS} - The base type of semantic values. Default is ‘Object’. *Note Java - Semantic Values::. - - -- Directive: %define throws {EXCEPTIONS} - The exceptions thrown by user-supplied parser actions and - ‘%initial-action’, a comma-separated list. Default is none. *Note - Java Parser Interface::. - - -File: bison.info, Node: History, Next: Versioning, Prev: Other Languages, Up: Top - -11 A Brief History of the Greater Ungulates -******************************************* - -* Menu: - -* Yacc:: The original Yacc -* yacchack:: An obscure early implementation of reentrancy -* Byacc:: Berkeley Yacc -* Bison:: This program -* Other Ungulates:: Similar programs - - -File: bison.info, Node: Yacc, Next: yacchack, Up: History - -11.1 The ancestral Yacc -======================= - -Bison originated as a workalike of a program called Yacc -- Yet Another -Compiler Compiler.(1) Yacc was written at Bell Labs as part of the very -early development of Unix; one of its first uses was to develop the -original Portable C Compiler, pcc. The same person, Steven C. Johnson, -wrote Yacc and the original pcc. - - According to the author (2), Yacc was first invented in 1971 and -reached a form recognizably similar to the C version in 1973. Johnson -published ‘A Portable Compiler: Theory and Practice’ (*note Johnson -1978::). - - Yacc was not itself originally written in C but in its predecessor -language, B. This goes far to explain its odd interface, which exposes a -large number of global variables rather than bundling them into a C -struct. All other Yacc-like programs are descended from the C port of -Yacc. - - Yacc, through both its deployment in pcc and as a standalone tool for -generating other parsers, helped drive the early spread of Unix. Yacc -itself, however, passed out of use after around 1990 when workalikes -with less restrictive licenses and more features became available. - - Original Yacc became generally available when Caldera released the -sources of old versions of Unix up to V7 and 32V in 2002. By that time -it had been long superseded in practical use by Bison even on Yacc's -native Unix variants. - - ---------- Footnotes ---------- - - (1) Because of the acronym, the name is sometimes given as "YACC", -but Johnson used "Yacc" in the descriptive paper included in the Version -7 Unix Manual -(https://s3.amazonaws.com/plan9-bell-labs/7thEdMan/v7vol2b.pdf). - - (2) - - -File: bison.info, Node: yacchack, Next: Byacc, Prev: Yacc, Up: History - -11.2 yacchack -============= - -One of the deficiencies of original Yacc was its inability to produce -reentrant parsers. This was first remedied by a set of drop-in -modifications called "yacchack", published by Eric S. Raymond on USENET -around 1983. This code was quickly forgotten when zoo and Berkeley Yacc -became available a few years later. - - -File: bison.info, Node: Byacc, Next: Bison, Prev: yacchack, Up: History - -11.3 Berkeley Yacc -================== - -Berkeley Yacc was originated in 1985 by Robert Corbett (*note Corbett -1984::). It was originally named "zoo", but by October 1989 it became -known as Berkeley Yacc or byacc. - - Berkeley Yacc had three advantages over the ancestral Yacc: it -generated faster parsers, it could generate reentrant parsers, and the -source code was released to the public domain rather than being under an -AT&T proprietary license. The better performance came from implementing -techniques from DeRemer and Penello's seminal paper on LALR parsing -(*note DeRemer 1982::). - - Use of byacc spread rapidly due to its public domain license. -However, once Bison became available, byacc itself passed out of general -use. - - -File: bison.info, Node: Bison, Next: Other Ungulates, Prev: Byacc, Up: History - -11.4 Bison -========== - -Robert Corbett actually wrote two (closely related) LALR parsers in -1985, both using the DeRemer/Penello techniques. One was "zoo", the -other was "Byson". In 1987 Richard Stallman began working on Byson; the -name changed to Bison and the interface became Yacc-compatible. - - The main visible difference between Yacc and Byson/Bison at the time -of Byson's first release is that Byson supported the ‘@N’ construct -(giving access to the starting and ending line number and character -number associated with any of the symbols in the current rule). - - There was also the command ‘%expect N’ which said not to mention the -conflicts if there are N shift/reduce conflicts and no reduce/reduce -conflicts. In more recent versions of Bison, ‘%expect’ and its -‘%expect-rr’ variant for reduce/reduce conflicts can be applied to -individual rules. - - Later versions of Bison added many more new features. - - Bison error reporting has been improved in various ways. Notably. -ancestral Yacc and Byson did not have carets in error messages. - - Compared to Yacc Bison uses a faster but less space-efficient -encoding for the parse tables (*note Corbett 1984::), and more modern -techniques for generating the lookahead sets (*note DeRemer 1982::). -This approach is the standard one since then. - - (It has also been plausibly alleged the differences in the algorithms -stem mainly from the horrible kludges that Johnson had to perpetrate to -make the original Yacc fit in a PDP-11.) - - Named references, semantic predicates, ‘%locations’, ‘%glr-parser’, -‘%printer’, %destructor, dumps to DOT, ‘%parse-param’, ‘%lex-param’, and -dumps to XSLT, LAC, and IELR(1) generation are new in Bison. - - Bison also has many features to support C++ that were not present in -the ancestral Yacc or Byson. - - Bison obsolesced all previous Yacc variants and workalikes generating -C by 1995. - - -File: bison.info, Node: Other Ungulates, Prev: Bison, Up: History - -11.5 Other Ungulates -==================== - -The Yacc concept has frequently been ported to other languages. Some of -the early ports are extinct along with the languages that hosted them; -others have been superseded by parser skeletons shipped with Bison. - - However, independent implementations persist. One of the best-known -still in use is David Beazley's "PLY" (Python Lex-Yacc) for Python. -Another is goyacc, supporting the Go language. An "ocamlyacc" is -shipped as part of the Ocaml compiler suite. - - -File: bison.info, Node: Versioning, Next: FAQ, Prev: History, Up: Top - -12 Bison Version Compatibility: Best Practices -********************************************** - -Bison provides a Yacc compatibility mode in which it strives to conform -with the POSIX standard. Grammar files which are written to the POSIX -standard, and do not take advantage of any of the special capabilities -of Bison, should work with many versions of Bison without modification. - - All other features of Bison are particular to Bison, and are -changing. Bison is actively maintained and continuously evolving. It -should come as no surprise that an older version of Bison will not -accept Bison source code which uses newer features that do no not exist -at all in the older Bison. Regrettably, in spite of reasonable effort -to maintain compatibility, the reverse situation may also occur: it may -happen that code developed using an older version of Bison does not -build with a newer version of Bison without modifications. - - Because Bison is a code generation tool, it is possible to retain its -output and distribute that to the users of the program. The users are -then not required to have Bison installed at all, only an implementation -of the programming language, such as C, which is required for processing -the generated output. - - It is the output of Bison that is intended to be of the utmost -portability. So, that is to say, whereas the Bison grammar source code -may have a dependency on specific versions of Bison, the generated -parser from any version of Bison should work with with a large number of -implementations of C, or whatever language is applicable. - - The recommended best practice for using Bison (in the context of -software that is distributed in source code form) is to ship the -generated parser to the downstream users. Only those downstream users -who engage in active development of the program who need to make changes -to the grammar file need to have Bison installed at all, and those users -can install the specific version of Bison which is required. - - Following this recommended practice also makes it possible to use a -more recent Bison than what is available to users through operating -system distributions, thereby taking advantage of the latest techniques -that Bison allows. - - Some features of Bison have been, or are being adopted into other -Yacc-like programs. Therefore it might seem that is a good idea to -write grammar code which targets multiple implementations, similarly to -the way C programs are often written to target multiple compilers and -language versions. Other than the Yacc subset described by POSIX, the -Bison language is not rigorously standardized. When a Bison feature is -adopted by another parser generator, it may be initially compatible with -that version of Bison on which it was based, but the compatibility may -degrade going forward. Developers who strive to make their Bison code -simultaneously compatible with other parser generators are encouraged to -nevertheless use specific versions of all generators, and still follow -the recommended practice of shipping generated output. For example, a -project can internally maintain compatibility with multiple generators, -and choose the output of a particular one to ship to the users. Or -else, the project could ship all of the outputs, arranging for a way for -the user to specify which one is used to build the program. - - -File: bison.info, Node: FAQ, Next: Table of Symbols, Prev: Versioning, Up: Top - -13 Frequently Asked Questions -***************************** - -Several questions about Bison come up occasionally. Here some of them -are addressed. - -* Menu: - -* Memory Exhausted:: Breaking the Stack Limits -* How Can I Reset the Parser:: ‘yyparse’ Keeps some State -* Strings are Destroyed:: ‘yylval’ Loses Track of Strings -* Implementing Gotos/Loops:: Control Flow in the Calculator -* Multiple start-symbols:: Factoring closely related grammars -* Secure? Conform?:: Is Bison POSIX safe? -* Enabling Relocatability:: Moving Bison/using it through network shares -* I can't build Bison:: Troubleshooting -* Where can I find help?:: Troubleshouting -* Bug Reports:: Troublereporting -* More Languages:: Parsers in C++, Java, and so on -* Beta Testing:: Experimenting development versions -* Mailing Lists:: Meeting other Bison users - - -File: bison.info, Node: Memory Exhausted, Next: How Can I Reset the Parser, Up: FAQ - -13.1 Memory Exhausted -===================== - - My parser returns with error with a ‘memory exhausted’ message. - What can I do? - - This question is already addressed elsewhere, see *note Recursion::. - - -File: bison.info, Node: How Can I Reset the Parser, Next: Strings are Destroyed, Prev: Memory Exhausted, Up: FAQ - -13.2 How Can I Reset the Parser -=============================== - -The following phenomenon has several symptoms, resulting in the -following typical questions: - - I invoke ‘yyparse’ several times, and on correct input it works - properly; but when a parse error is found, all the other calls fail - too. How can I reset the error flag of ‘yyparse’? - -or - - My parser includes support for an ‘#include’-like feature, in which - case I run ‘yyparse’ from ‘yyparse’. This fails although I did - specify ‘%define api.pure full’. - - These problems typically come not from Bison itself, but from -Lex-generated scanners. Because these scanners use large buffers for -speed, they might not notice a change of input file. As a -demonstration, consider the following source file, ‘first-line.l’: - - %{ - #include - #include - %} - %% - .*\n ECHO; return 1; - %% - int - yyparse (char const *file) - { - yyin = fopen (file, "r"); - if (!yyin) - { - perror ("fopen"); - exit (EXIT_FAILURE); - } - /* One token only. */ - yylex (); - if (fclose (yyin) != 0) - { - perror ("fclose"); - exit (EXIT_FAILURE); - } - return 0; - } - - int - main (void) - { - yyparse ("input"); - yyparse ("input"); - return 0; - } - -If the file ‘input’ contains - - input:1: Hello, - input:2: World! - -then instead of getting the first line twice, you get: - - $ flex -ofirst-line.c first-line.l - $ gcc -ofirst-line first-line.c -ll - $ ./first-line - input:1: Hello, - input:2: World! - - Therefore, whenever you change ‘yyin’, you must tell the -Lex-generated scanner to discard its current buffer and switch to the -new one. This depends upon your implementation of Lex; see its -documentation for more. For Flex, it suffices to call ‘YY_FLUSH_BUFFER’ -after each change to ‘yyin’. If your Flex-generated scanner needs to -read from several input streams to handle features like include files, -you might consider using Flex functions like ‘yy_switch_to_buffer’ that -manipulate multiple input buffers. - - If your Flex-generated scanner uses start conditions (*note Start -conditions: (flex)Start conditions.), you might also want to reset the -scanner's state, i.e., go back to the initial start condition, through a -call to ‘BEGIN (0)’. - - -File: bison.info, Node: Strings are Destroyed, Next: Implementing Gotos/Loops, Prev: How Can I Reset the Parser, Up: FAQ - -13.3 Strings are Destroyed -========================== - - My parser seems to destroy old strings, or maybe it loses track of - them. Instead of reporting ‘"foo", "bar"’, it reports ‘"bar", - "bar"’, or even ‘"foo\nbar", "bar"’. - - This error is probably the single most frequent "bug report" sent to -Bison lists, but is only concerned with a misunderstanding of the role -of the scanner. Consider the following Lex code: - - %{ - #include - char *yylval = NULL; - %} - %% - .* yylval = yytext; return 1; - \n continue; - %% - int - main () - { - /* Similar to using $1, $2 in a Bison action. */ - char *fst = (yylex (), yylval); - char *snd = (yylex (), yylval); - printf ("\"%s\", \"%s\"\n", fst, snd); - return 0; - } - - If you compile and run this code, you get: - - $ flex -osplit-lines.c split-lines.l - $ gcc -osplit-lines split-lines.c -ll - $ printf 'one\ntwo\n' | ./split-lines - "one - two", "two" - -this is because ‘yytext’ is a buffer provided for _reading_ in the -action, but if you want to keep it, you have to duplicate it (e.g., -using ‘strdup’). Note that the output may depend on how your -implementation of Lex handles ‘yytext’. For instance, when given the -Lex compatibility option ‘-l’ (which triggers the option ‘%array’) Flex -generates a different behavior: - - $ flex -l -osplit-lines.c split-lines.l - $ gcc -osplit-lines split-lines.c -ll - $ printf 'one\ntwo\n' | ./split-lines - "two", "two" - - -File: bison.info, Node: Implementing Gotos/Loops, Next: Multiple start-symbols, Prev: Strings are Destroyed, Up: FAQ - -13.4 Implementing Gotos/Loops -============================= - - My simple calculator supports variables, assignments, and - functions, but how can I implement gotos, or loops? - - Although very pedagogical, the examples included in the document blur -the distinction to make between the parser--whose job is to recover the -structure of a text and to transmit it to subsequent modules of the -program--and the processing (such as the execution) of this structure. -This works well with so called straight line programs, i.e., precisely -those that have a straightforward execution model: execute simple -instructions one after the others. - - If you want a richer model, you will probably need to use the parser -to construct a tree that does represent the structure it has recovered; -this tree is usually called the “abstract syntax tree”, or “AST” for -short. Then, walking through this tree, traversing it in various ways, -will enable treatments such as its execution or its translation, which -will result in an interpreter or a compiler. - - This topic is way beyond the scope of this manual, and the reader is -invited to consult the dedicated literature. - - -File: bison.info, Node: Multiple start-symbols, Next: Secure? Conform?, Prev: Implementing Gotos/Loops, Up: FAQ - -13.5 Multiple start-symbols -=========================== - - I have several closely related grammars, and I would like to share - their implementations. In fact, I could use a single grammar but - with multiple entry points. - - Bison does not support multiple start-symbols, but there is a very -simple means to simulate them. If ‘foo’ and ‘bar’ are the two pseudo -start-symbols, then introduce two new tokens, say ‘START_FOO’ and -‘START_BAR’, and use them as switches from the real start-symbol: - - %token START_FOO START_BAR; - %start start; - start: - START_FOO foo - | START_BAR bar; - - These tokens prevent the introduction of new conflicts. As far as -the parser goes, that is all that is needed. - - Now the difficult part is ensuring that the scanner will send these -tokens first. If your scanner is hand-written, that should be -straightforward. If your scanner is generated by Lex, them there is -simple means to do it: recall that anything between ‘%{ ... %}’ after -the first ‘%%’ is copied verbatim in the top of the generated ‘yylex’ -function. Make sure a variable ‘start_token’ is available in the -scanner (e.g., a global variable or using ‘%lex-param’ etc.), and use -the following: - - /* Prologue. */ - %% - %{ - if (start_token) - { - int t = start_token; - start_token = 0; - return t; - } - %} - /* The rules. */ - - -File: bison.info, Node: Secure? Conform?, Next: Enabling Relocatability, Prev: Multiple start-symbols, Up: FAQ - -13.6 Secure? Conform? -===================== - - Is Bison secure? Does it conform to POSIX? - - If you're looking for a guarantee or certification, we don't provide -it. However, Bison is intended to be a reliable program that conforms -to the POSIX specification for Yacc. If you run into problems, please -send us a bug report. - - -File: bison.info, Node: Enabling Relocatability, Next: I can't build Bison, Prev: Secure? Conform?, Up: FAQ - -13.7 Enabling Relocatability -============================ - -It has been a pain for many users of GNU packages for a long time that -packages are not relocatable. It means a user cannot copy a program, -installed by another user on the same machine, to his home directory, -and have it work correctly (including i18n). So many users need to go -through ‘configure; make; make install’ with all its dependencies, -options, and hurdles. - - Most package management systems, that allow the user to install -pre-built binaries of the packages, solve the "ease of installation" -problem, but they hardwire path names, usually to ‘/usr’ or -‘/usr/local’. This means that users need root privileges to install a -binary package, and prevents installing two different versions of the -same binary package. - - A relocatable program can be moved or copied to a different location -on the file system. It is possible to make symlinks to the installed -and moved programs, and invoke them through the symlink. It is possible -to do the same thing with a hard link _only_ if the hard link file is in -the same directory as the real program. - - To configure a program to be relocatable, add ‘--enable-relocatable’ -to the ‘configure’ command line. - - On some OSes the executables remember the location of shared -libraries and prefer them over any other search path. Therefore, such -an executable will look for its shared libraries first in the original -installation directory and only then in the current installation -directory. Thus, for reliability, it is best to also give a ‘--prefix’ -option pointing to a directory that does not exist now and which never -will be created, e.g. ‘--prefix=/nonexistent’. You may use -‘DESTDIR=DEST-DIR’ on the ‘make’ command line to avoid installing into -that directory. - - We do not recommend using a prefix writable by unprivileged users -(e.g. ‘/tmp/inst$$’) because such a directory can be recreated by an -unprivileged user after the original directory has been removed. We -also do not recommend prefixes that might be behind an automounter (e.g. -‘$HOME/inst$$’) because of the performance impact of directory -searching. - - Here's a sample installation run that takes into account all these -recommendations: - - ./configure --enable-relocatable --prefix=/nonexistent - make - make install DESTDIR=/tmp/inst$$ - - Installation with ‘--enable-relocatable’ will not work for setuid or -setgid executables, because such executables search only system library -paths for security reasons. - - The runtime penalty and size penalty are negligible on GNU/Linux -(just one system call more when an executable is launched), and small on -other systems (the wrapper program just sets an environment variable and -executes the real program). - - -File: bison.info, Node: I can't build Bison, Next: Where can I find help?, Prev: Enabling Relocatability, Up: FAQ - -13.8 I can't build Bison -======================== - - I can't build Bison because ‘make’ complains that ‘msgfmt’ is not - found. What should I do? - - Like most GNU packages with internationalization support, that -feature is turned on by default. If you have problems building in the -‘po’ subdirectory, it indicates that your system's internationalization -support is lacking. You can re-configure Bison with ‘--disable-nls’ to -turn off this support, or you can install GNU gettext from - and re-configure Bison. See the file -‘ABOUT-NLS’ for more information. - - I can't build Bison because my C compiler is too old. - - Except for GLR parsers (which require C99), the C code that Bison -generates requires only C89 or later. However, Bison itself requires -common C99 features such as declarations after statements. Bison's -‘configure’ script attempts to enable C99 (or later) support on -compilers that default to pre-C99. If your compiler lacks these C99 -features entirely, GCC may well be a better choice; or you can try -upgrading to your compiler's latest version. - - -File: bison.info, Node: Where can I find help?, Next: Bug Reports, Prev: I can't build Bison, Up: FAQ - -13.9 Where can I find help? -=========================== - - I'm having trouble using Bison. Where can I find help? - - First, read this fine manual. Beyond that, you can send mail to -. This mailing list is intended to be populated -with people who are willing to answer questions about using and -installing Bison. Please keep in mind that (most of) the people on the -list have aspects of their lives which are not related to Bison (!), so -you may not receive an answer to your question right away. This can be -frustrating, but please try not to honk them off; remember that any help -they provide is purely voluntary and out of the kindness of their -hearts. - - -File: bison.info, Node: Bug Reports, Next: More Languages, Prev: Where can I find help?, Up: FAQ - -13.10 Bug Reports -================= - - I found a bug. What should I include in the bug report? - - Before sending a bug report, make sure you are using the latest -version. Check or one of its -mirrors. Be sure to include the version number in your bug report. If -the bug is present in the latest version but not in a previous version, -try to determine the most recent version which did not contain the bug. - - If the bug is parser-related, you should include the smallest grammar -you can which demonstrates the bug. The grammar file should also be -complete (i.e., I should be able to run it through Bison without having -to edit or add anything). The smaller and simpler the grammar, the -easier it will be to fix the bug. - - Include information about your compilation environment, including -your operating system's name and version and your compiler's name and -version. If you have trouble compiling, you should also include a -transcript of the build session, starting with the invocation of -‘configure’. Depending on the nature of the bug, you may be asked to -send additional files as well (such as ‘config.h’ or ‘config.cache’). - - Patches are most welcome, but not required. That is, do not hesitate -to send a bug report just because you cannot provide a fix. - - Send bug reports to . - - -File: bison.info, Node: More Languages, Next: Beta Testing, Prev: Bug Reports, Up: FAQ - -13.11 More Languages -==================== - - Will Bison ever have C++ and Java support? How about INSERT YOUR - FAVORITE LANGUAGE HERE? - - C++, D and Java are supported. We'd love to add other languages; -contributions are welcome. - - -File: bison.info, Node: Beta Testing, Next: Mailing Lists, Prev: More Languages, Up: FAQ - -13.12 Beta Testing -================== - - What is involved in being a beta tester? - - It's not terribly involved. Basically, you would download a test -release, compile it, and use it to build and run a parser or two. After -that, you would submit either a bug report or a message saying that -everything is okay. It is important to report successes as well as -failures because test releases eventually become mainstream releases, -but only if they are adequately tested. If no one tests, development is -essentially halted. - - Beta testers are particularly needed for operating systems to which -the developers do not have easy access. They currently have easy access -to recent GNU/Linux and Solaris versions. Reports about other operating -systems are especially welcome. - - -File: bison.info, Node: Mailing Lists, Prev: Beta Testing, Up: FAQ - -13.13 Mailing Lists -=================== - - How do I join the help-bison and bug-bison mailing lists? - - See . - - -File: bison.info, Node: Table of Symbols, Next: Glossary, Prev: FAQ, Up: Top - -Appendix A Bison Symbols -************************ - - -- Variable: @$ - In an action, the location of the left-hand side of the rule. - *Note Tracking Locations::. - - -- Variable: @N - -- Symbol: @N - In an action, the location of the N-th symbol of the right-hand - side of the rule. *Note Tracking Locations::. - - In a grammar, the Bison-generated nonterminal symbol for a midrule - action with a semantic value. *Note Midrule Action Translation::. - - -- Variable: @NAME - -- Variable: @[NAME] - In an action, the location of a symbol addressed by NAME. *Note - Tracking Locations::. - - -- Symbol: $@N - In a grammar, the Bison-generated nonterminal symbol for a midrule - action with no semantics value. *Note Midrule Action - Translation::. - - -- Variable: $$ - In an action, the semantic value of the left-hand side of the rule. - *Note Actions::. - - -- Variable: $N - In an action, the semantic value of the N-th symbol of the - right-hand side of the rule. *Note Actions::. - - -- Variable: $NAME - -- Variable: $[NAME] - In an action, the semantic value of a symbol addressed by NAME. - *Note Actions::. - - -- Delimiter: %% - Delimiter used to separate the grammar rule section from the Bison - declarations section or the epilogue. *Note Grammar Layout::. - - -- Delimiter: %{CODE%} - All code listed between ‘%{’ and ‘%}’ is copied verbatim to the - parser implementation file. Such code forms the prologue of the - grammar file. *Note Grammar Outline::. - - -- Directive: %?{EXPRESSION} - Predicate actions. This is a type of action clause that may appear - in rules. The expression is evaluated, and if false, causes a - syntax error. In GLR parsers during nondeterministic operation, - this silently causes an alternative parse to die. During - deterministic operation, it is the same as the effect of YYERROR. - *Note Semantic Predicates::. - - -- Construct: /* ... */ - -- Construct: // ... - Comments, as in C/C++. - - -- Delimiter: : - Separates a rule's result from its components. *Note Rules::. - - -- Delimiter: ; - Terminates a rule. *Note Rules::. - - -- Delimiter: | - Separates alternate rules for the same result nonterminal. *Note - Rules::. - - -- Directive: <*> - Used to define a default tagged ‘%destructor’ or default tagged - ‘%printer’. - - *Note Destructor Decl::. - - -- Directive: <> - Used to define a default tagless ‘%destructor’ or default tagless - ‘%printer’. - - *Note Destructor Decl::. - - -- Symbol: $accept - The predefined nonterminal whose only rule is ‘$accept: START - $end’, where START is the start symbol. *Note Start Decl::. It - cannot be used in the grammar. - - -- Directive: %code {CODE} - -- Directive: %code QUALIFIER {CODE} - Insert CODE verbatim into the output parser source at the default - location or at the location specified by QUALIFIER. *Note %code - Summary::. - - -- Directive: %debug - Equip the parser for debugging. *Note Decl Summary::. - - -- Directive: %define VARIABLE - -- Directive: %define VARIABLE VALUE - -- Directive: %define VARIABLE {VALUE} - -- Directive: %define VARIABLE "VALUE" - Define a variable to adjust Bison's behavior. *Note %define - Summary::. - - -- Directive: %defines - -- Directive: %defines DEFINES-FILE - Historical name for ‘%header’. *Note Decl Summary::. - - -- Directive: %destructor - Specify how the parser should reclaim the memory associated to - discarded symbols. *Note Destructor Decl::. - - -- Directive: %dprec - Bison declaration to assign a precedence to a rule that is used at - parse time to resolve reduce/reduce conflicts. *Note GLR - Parsers::. - - -- Directive: %empty - Bison declaration to declare make explicit that a rule has an empty - right-hand side. *Note Empty Rules::. - - -- Symbol: $end - The predefined token marking the end of the token stream. It - cannot be used in the grammar. - - -- Symbol: error - A token name reserved for error recovery. This token may be used - in grammar rules so as to allow the Bison parser to recognize an - error in the grammar without halting the process. In effect, a - sentence containing an error may be recognized as valid. On a - syntax error, the token ‘error’ becomes the current lookahead - token. Actions corresponding to ‘error’ are then executed, and the - lookahead token is reset to the token that originally caused the - violation. *Note Error Recovery::. - - -- Directive: %error-verbose - An obsolete directive standing for ‘%define parse.error verbose’. - - -- Directive: %file-prefix "PREFIX" - Bison declaration to set the prefix of the output files. *Note - Decl Summary::. - - -- Directive: %glr-parser - Bison declaration to produce a GLR parser. *Note GLR Parsers::. - - -- Directive: %header - Bison declaration to create a parser header file, which is usually - meant for the scanner. *Note Decl Summary::. - - -- Directive: %header HEADER-FILE - Same as above, but save in the file HEADER-FILE. *Note Decl - Summary::. - - -- Directive: %initial-action - Run user code before parsing. *Note Initial Action Decl::. - - -- Directive: %language - Specify the programming language for the generated parser. *Note - Decl Summary::. - - -- Directive: %left - Bison declaration to assign precedence and left associativity to - token(s). *Note Precedence Decl::. - - -- Directive: %lex-param {ARGUMENT-DECLARATION} ... - Bison declaration to specifying additional arguments that ‘yylex’ - should accept. *Note Pure Calling::. - - -- Directive: %merge - Bison declaration to assign a merging function to a rule. If there - is a reduce/reduce conflict with a rule having the same merging - function, the function is applied to the two semantic values to get - a single result. *Note GLR Parsers::. - - -- Directive: %name-prefix "PREFIX" - Obsoleted by the ‘%define’ variable ‘api.prefix’ (*note Multiple - Parsers::). - - Rename the external symbols (variables and functions) used in the - parser so that they start with PREFIX instead of ‘yy’. Contrary to - ‘api.prefix’, do no rename types and macros. - - The precise list of symbols renamed in C parsers is ‘yyparse’, - ‘yylex’, ‘yyerror’, ‘yynerrs’, ‘yylval’, ‘yychar’, ‘yydebug’, and - (if locations are used) ‘yylloc’. If you use a push parser, - ‘yypush_parse’, ‘yypull_parse’, ‘yypstate’, ‘yypstate_new’ and - ‘yypstate_delete’ will also be renamed. For example, if you use - ‘%name-prefix "c_"’, the names become ‘c_parse’, ‘c_lex’, and so - on. For C++ parsers, see the ‘%define api.namespace’ documentation - in this section. - - -- Directive: %no-lines - Bison declaration to avoid generating ‘#line’ directives in the - parser implementation file. *Note Decl Summary::. - - -- Directive: %nonassoc - Bison declaration to assign precedence and nonassociativity to - token(s). *Note Precedence Decl::. - - -- Directive: %nterm - Bison declaration to declare nonterminals. *Note Type Decl::. - - -- Directive: %output "FILE" - Bison declaration to set the name of the parser implementation - file. *Note Decl Summary::. - - -- Directive: %param {ARGUMENT-DECLARATION} ... - Bison declaration to specify additional arguments that both ‘yylex’ - and ‘yyparse’ should accept. *Note Parser Function::. - - -- Directive: %parse-param {ARGUMENT-DECLARATION} ... - Bison declaration to specify additional arguments that ‘yyparse’ - should accept. *Note Parser Function::. - - -- Directive: %prec - Bison declaration to assign a precedence to a specific rule. *Note - Contextual Precedence::. - - -- Directive: %precedence - Bison declaration to assign precedence to token(s), but no - associativity *Note Precedence Decl::. - - -- Directive: %pure-parser - Deprecated version of ‘%define api.pure’ (*note %define Summary::), - for which Bison is more careful to warn about unreasonable usage. - - -- Directive: %require "VERSION" - Require version VERSION or higher of Bison. *Note Require Decl::. - - -- Directive: %right - Bison declaration to assign precedence and right associativity to - token(s). *Note Precedence Decl::. - - -- Directive: %skeleton - Specify the skeleton to use; usually for development. *Note Decl - Summary::. - - -- Directive: %start - Bison declaration to specify the start symbol. *Note Start Decl::. - - -- Directive: %token - Bison declaration to declare token(s) without specifying - precedence. *Note Token Decl::. - - -- Directive: %token-table - Bison declaration to include a token name table in the parser - implementation file. *Note Decl Summary::. - - -- Directive: %type - Bison declaration to declare symbol value types. *Note Type - Decl::. - - -- Symbol: $undefined - The predefined token onto which all undefined values returned by - ‘yylex’ are mapped. It cannot be used in the grammar, rather, use - ‘error’. - - -- Directive: %union - Bison declaration to specify several possible data types for - semantic values. *Note Union Decl::. - - -- Macro: YYABORT - Macro to pretend that an unrecoverable syntax error has occurred, - by making ‘yyparse’ return 1 immediately. The error reporting - function ‘yyerror’ is not called. *Note Parser Function::. - - For Java parsers, this functionality is invoked using ‘return - YYABORT;’ instead. - - -- Macro: YYACCEPT - Macro to pretend that a complete utterance of the language has been - read, by making ‘yyparse’ return 0 immediately. *Note Parser - Function::. - - For Java parsers, this functionality is invoked using ‘return - YYACCEPT;’ instead. - - -- Macro: YYBACKUP - Macro to discard a value from the parser stack and fake a lookahead - token. *Note Action Features::. - - -- Macro: YYBISON - The version of Bison as an integer, for instance 30704 for version - 3.7.4. Defined in ‘yacc.c’ only. Before version 3.7.4, ‘YYBISON’ - was defined to 1. - - -- Variable: yychar - External integer variable that contains the integer value of the - lookahead token. (In a pure parser, it is a local variable within - ‘yyparse’.) Error-recovery rule actions may examine this variable. - *Note Action Features::. - - -- Variable: yyclearin - Macro used in error-recovery rule actions. It clears the previous - lookahead token. *Note Error Recovery::. - - -- Macro: YYDEBUG - Macro to define to equip the parser with tracing code. *Note - Tracing::. - - -- Variable: yydebug - External integer variable set to zero by default. If ‘yydebug’ is - given a nonzero value, the parser will output information on input - symbols and parser action. *Note Tracing::. - - -- Value: YYEMPTY - The pseudo token kind when there is no lookahead token. - - -- Value: YYEOF - The token kind denoting is the end of the input stream. - - -- Macro: yyerrok - Macro to cause parser to recover immediately to its normal mode - after a syntax error. *Note Error Recovery::. - - -- Macro: YYERROR - Cause an immediate syntax error. This statement initiates error - recovery just as if the parser itself had detected an error; - however, it does not call ‘yyerror’, and does not print any - message. If you want to print an error message, call ‘yyerror’ - explicitly before the ‘YYERROR;’ statement. *Note Error - Recovery::. - - For Java parsers, this functionality is invoked using ‘return - YYERROR;’ instead. - - -- Function: yyerror - User-supplied function to be called by ‘yyparse’ on error. *Note - Error Reporting Function::. - - -- Macro: YYFPRINTF - Macro used to output run-time traces in C. *Note Enabling Traces::. - - -- Macro: YYINITDEPTH - Macro for specifying the initial size of the parser stack. *Note - Memory Management::. - - -- Function: yylex - User-supplied lexical analyzer function, called with no arguments - to get the next token. *Note Lexical::. - - -- Variable: yylloc - External variable in which ‘yylex’ should place the line and column - numbers associated with a token. (In a pure parser, it is a local - variable within ‘yyparse’, and its address is passed to ‘yylex’.) - You can ignore this variable if you don't use the ‘@’ feature in - the grammar actions. *Note Token Locations::. In semantic - actions, it stores the location of the lookahead token. *Note - Actions and Locations::. - - -- Type: YYLTYPE - Data type of ‘yylloc’. By default in C, a structure with four - members (start/end line/column). *Note Location Type::. - - -- Variable: yylval - External variable in which ‘yylex’ should place the semantic value - associated with a token. (In a pure parser, it is a local variable - within ‘yyparse’, and its address is passed to ‘yylex’.) *Note - Token Values::. In semantic actions, it stores the semantic value - of the lookahead token. *Note Actions::. - - -- Macro: YYMAXDEPTH - Macro for specifying the maximum size of the parser stack. *Note - Memory Management::. - - -- Variable: yynerrs - Global variable which Bison increments each time it reports a - syntax error. (In a pure parser, it is a local variable within - ‘yyparse’. In a pure push parser, it is a member of ‘yypstate’.) - *Note Error Reporting Function::. - - -- Macro: YYNOMEM - Macro to pretend that memory is exhausted, by making ‘yyparse’ - return 2 immediately. The error reporting function ‘yyerror’ is - called. *Note Parser Function::. - - -- Function: yyparse - The parser function produced by Bison; call this function to start - parsing. *Note Parser Function::. - - -- Function: yypstate_delete - The function to delete a parser instance, produced by Bison in push - mode; call this function to delete the memory associated with a - parser. *Note ‘yypstate_delete’: yypstate_delete. Does nothing - when called with a null pointer. - - -- Function: yypstate_new - The function to create a parser instance, produced by Bison in push - mode; call this function to create a new parser. *Note - ‘yypstate_new’: yypstate_new. - - -- Function: yypull_parse - The parser function produced by Bison in push mode; call this - function to parse the rest of the input stream. *Note - ‘yypull_parse’: yypull_parse. - - -- Function: yypush_parse - The parser function produced by Bison in push mode; call this - function to parse a single token. *Note ‘yypush_parse’: - yypush_parse. - - -- Macro: YYRECOVERING - The expression ‘YYRECOVERING ()’ yields 1 when the parser is - recovering from a syntax error, and 0 otherwise. *Note Action - Features::. - - -- Macro: YYSTACK_USE_ALLOCA - Macro used to control the use of ‘alloca’ when the deterministic - parser in C needs to extend its stacks. If defined to 0, the - parser will use ‘malloc’ to extend its stacks and memory exhaustion - occurs if ‘malloc’ fails (*note Memory Management::). If defined - to 1, the parser will use ‘alloca’. Values other than 0 and 1 are - reserved for future Bison extensions. If not defined, - ‘YYSTACK_USE_ALLOCA’ defaults to 0. - - In the all-too-common case where your code may run on a host with a - limited stack and with unreliable stack-overflow checking, you - should set ‘YYMAXDEPTH’ to a value that cannot possibly result in - unchecked stack overflow on any of your target hosts when ‘alloca’ - is called. You can inspect the code that Bison generates in order - to determine the proper numeric values. This will require some - expertise in low-level implementation details. - - -- Type: YYSTYPE - In C, data type of semantic values; ‘int’ by default. Deprecated - in favor of the ‘%define’ variable ‘api.value.type’. *Note Value - Type::. - - -- Type: yysymbol_kind_t - An enum of all the symbols, tokens and nonterminals, of the - grammar. *Note Syntax Error Reporting Function::. The symbol - kinds are used internally by the parser, and should not be confused - with the token kinds: the symbol kind of a terminal symbol is not - equal to its token kind! (Unless ‘%define api.token.raw’ was - used.) - - -- Type: yytoken_kind_t - An enum of all the “token kinds” declared with ‘%token’ (*note - Token Decl::). These are the return values for ‘yylex’. They - should not be confused with the _symbol kinds_, used internally by - the parser. - - -- Value: YYUNDEF - The token kind denoting an unknown token. - - -File: bison.info, Node: Glossary, Next: GNU Free Documentation License, Prev: Table of Symbols, Up: Top - -Appendix B Glossary -******************* - -Accepting state - A state whose only action is the accept action. The accepting - state is thus a consistent state. *Note Understanding::. - -Backus-Naur Form (BNF; also called "Backus Normal Form") - Formal method of specifying context-free grammars originally - proposed by John Backus, and slightly improved by Peter Naur in his - 1960-01-02 committee document contributing to what became the Algol - 60 report. *Note Language and Grammar::. - -Consistent state - A state containing only one possible action. *Note Default - Reductions::. - -Context-free grammars - Grammars specified as rules that can be applied regardless of - context. Thus, if there is a rule which says that an integer can - be used as an expression, integers are allowed _anywhere_ an - expression is permitted. *Note Language and Grammar::. - -Counterexample - A sequence of tokens and/or nonterminals, with one dot, that - demonstrates a conflict. The dot marks the place where the - conflict occurs. - - A _unifying_ counterexample is a single string that has two - different parses; its existence proves that the grammar is - ambiguous. When a unifying counterexample cannot be found in - reasonable time, a _nonunifying_ counterexample is built: _two_ - different string sharing the prefix up to the dot. - - *Note Counterexamples:: - -Default reduction - The reduction that a parser should perform if the current parser - state contains no other action for the lookahead token. In - permitted parser states, Bison declares the reduction with the - largest lookahead set to be the default reduction and removes that - lookahead set. *Note Default Reductions::. - -Defaulted state - A consistent state with a default reduction. *Note Default - Reductions::. - -Dynamic allocation - Allocation of memory that occurs during execution, rather than at - compile time or on entry to a function. - -Empty string - Analogous to the empty set in set theory, the empty string is a - character string of length zero. - -Finite-state stack machine - A "machine" that has discrete states in which it is said to exist - at each instant in time. As input to the machine is processed, the - machine moves from state to state as specified by the logic of the - machine. In the case of the parser, the input is the language - being parsed, and the states correspond to various stages in the - grammar rules. *Note Algorithm::. - -Generalized LR (GLR) - A parsing algorithm that can handle all context-free grammars, - including those that are not LR(1). It resolves situations that - Bison's deterministic parsing algorithm cannot by effectively - splitting off multiple parsers, trying all possible parsers, and - discarding those that fail in the light of additional right - context. *Note Generalized LR Parsing::. - -Grouping - A language construct that is (in general) grammatically divisible; - for example, 'expression' or 'declaration' in C. *Note Language - and Grammar::. - -IELR(1) (Inadequacy Elimination LR(1)) - A minimal LR(1) parser table construction algorithm. That is, - given any context-free grammar, IELR(1) generates parser tables - with the full language-recognition power of canonical LR(1) but - with nearly the same number of parser states as LALR(1). This - reduction in parser states is often an order of magnitude. More - importantly, because canonical LR(1)'s extra parser states may - contain duplicate conflicts in the case of non-LR(1) grammars, the - number of conflicts for IELR(1) is often an order of magnitude less - as well. This can significantly reduce the complexity of - developing a grammar. *Note LR Table Construction::. - -Infix operator - An arithmetic operator that is placed between the operands on which - it performs some operation. - -Input stream - A continuous flow of data between devices or programs. - -Kind - "Token" and "symbol" are each overloaded to mean either a grammar - symbol (kind) or all parse info (kind, value, location) associated - with occurrences of that grammar symbol from the input. To - disambiguate, - - • we use "token kind" and "symbol kind" to mean both grammar - symbols and the values that represent them in a base - programming language (C, C++, etc.). The names of the types - of these values are typically ‘token_kind_t’, or - ‘token_kind_type’, or ‘TokenKind’, depending on the - programming language. - - • we use "token" and "symbol" without the word "kind" to mean - parsed occurrences, and we append the word "type" to refer to - the types that represent them in a base programming language. - - In summary: When you see "kind", interpret "symbol" or "token" to - mean a _grammar symbol_. When you don't see "kind" (including when - you see "type"), interpret "symbol" or "token" to mean a _parsed - symbol_. - -LAC (Lookahead Correction) - A parsing mechanism that fixes the problem of delayed syntax error - detection, which is caused by LR state merging, default reductions, - and the use of ‘%nonassoc’. Delayed syntax error detection results - in unexpected semantic actions, initiation of error recovery in the - wrong syntactic context, and an incorrect list of expected tokens - in a verbose syntax error message. *Note LAC::. - -Language construct - One of the typical usage schemas of the language. For example, one - of the constructs of the C language is the ‘if’ statement. *Note - Language and Grammar::. - -Left associativity - Operators having left associativity are analyzed from left to - right: ‘a+b+c’ first computes ‘a+b’ and then combines with ‘c’. - *Note Precedence::. - -Left recursion - A rule whose result symbol is also its first component symbol; for - example, ‘expseq1 : expseq1 ',' exp;’. *Note Recursion::. - -Left-to-right parsing - Parsing a sentence of a language by analyzing it token by token - from left to right. *Note Algorithm::. - -Lexical analyzer (scanner) - A function that reads an input stream and returns tokens one by - one. *Note Lexical::. - -Lexical tie-in - A flag, set by actions in the grammar rules, which alters the way - tokens are parsed. *Note Lexical Tie-ins::. - -Literal string token - A token which consists of two or more fixed characters. *Note - Symbols::. - -Lookahead token - A token already read but not yet shifted. *Note Lookahead::. - -LALR(1) - The class of context-free grammars that Bison (like most other - parser generators) can handle by default; a subset of LR(1). *Note - Mysterious Conflicts::. - -LR(1) - The class of context-free grammars in which at most one token of - lookahead is needed to disambiguate the parsing of any piece of - input. - -Nonterminal symbol - A grammar symbol standing for a grammatical construct that can be - expressed through rules in terms of smaller constructs; in other - words, a construct that is not a token. *Note Symbols::. - -Parser - A function that recognizes valid sentences of a language by - analyzing the syntax structure of a set of tokens passed to it from - a lexical analyzer. - -Postfix operator - An arithmetic operator that is placed after the operands upon which - it performs some operation. - -Reduction - Replacing a string of nonterminals and/or terminals with a single - nonterminal, according to a grammar rule. *Note Algorithm::. - -Reentrant - A reentrant subprogram is a subprogram which can be in invoked any - number of times in parallel, without interference between the - various invocations. *Note Pure Decl::. - -Reverse Polish Notation - A language in which all operators are postfix operators. - -Right recursion - A rule whose result symbol is also its last component symbol; for - example, ‘expseq1: exp ',' expseq1;’. *Note Recursion::. - -Semantics - In computer languages, the semantics are specified by the actions - taken for each instance of the language, i.e., the meaning of each - statement. *Note Semantics::. - -Shift - A parser is said to shift when it makes the choice of analyzing - further input from the stream rather than reducing immediately some - already-recognized rule. *Note Algorithm::. - -Single-character literal - A single character that is recognized and interpreted as is. *Note - Grammar in Bison::. - -Start symbol - The nonterminal symbol that stands for a complete valid utterance - in the language being parsed. The start symbol is usually listed - as the first nonterminal symbol in a language specification. *Note - Start Decl::. - -Symbol kind - A (finite) enumeration of the grammar symbols, as processed by the - parser. *Note Symbols::. - -Symbol table - A data structure where symbol names and associated data are stored - during parsing to allow for recognition and use of existing - information in repeated uses of a symbol. *Note Multi-function - Calc::. - -Syntax error - An error encountered during parsing of an input stream due to - invalid syntax. *Note Error Recovery::. - -Terminal symbol - A grammar symbol that has no rules in the grammar and therefore is - grammatically indivisible. The piece of text it represents is a - token. *Note Language and Grammar::. - -Token - A basic, grammatically indivisible unit of a language. The symbol - that describes a token in the grammar is a terminal symbol. The - input of the Bison parser is a stream of tokens which comes from - the lexical analyzer. *Note Symbols::. - -Token kind - A (finite) enumeration of the grammar terminals, as discriminated - by the scanner. *Note Symbols::. - -Unreachable state - A parser state to which there does not exist a sequence of - transitions from the parser's start state. A state can become - unreachable during conflict resolution. *Note Unreachable - States::. - - -File: bison.info, Node: GNU Free Documentation License, Next: Bibliography, Prev: Glossary, Up: Top - -Appendix C GNU Free Documentation License -***************************************** - - Version 1.3, 3 November 2008 - - Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. - - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - 0. PREAMBLE - - The purpose of this License is to make a manual, textbook, or other - functional and useful document “free” in the sense of freedom: to - assure everyone the effective freedom to copy and redistribute it, - with or without modifying it, either commercially or - noncommercially. Secondarily, this License preserves for the - author and publisher a way to get credit for their work, while not - being considered responsible for modifications made by others. - - This License is a kind of "copyleft", which means that derivative - works of the document must themselves be free in the same sense. - It complements the GNU General Public License, which is a copyleft - license designed for free software. - - We have designed this License in order to use it for manuals for - free software, because free software needs free documentation: a - free program should come with manuals providing the same freedoms - that the software does. But this License is not limited to - software manuals; it can be used for any textual work, regardless - of subject matter or whether it is published as a printed book. We - recommend this License principally for works whose purpose is - instruction or reference. - - 1. APPLICABILITY AND DEFINITIONS - - This License applies to any manual or other work, in any medium, - that contains a notice placed by the copyright holder saying it can - be distributed under the terms of this License. Such a notice - grants a world-wide, royalty-free license, unlimited in duration, - to use that work under the conditions stated herein. The - "Document", below, refers to any such manual or work. Any member - of the public is a licensee, and is addressed as "you". You accept - the license if you copy, modify or distribute the work in a way - requiring permission under copyright law. - - A "Modified Version" of the Document means any work containing the - Document or a portion of it, either copied verbatim, or with - modifications and/or translated into another language. - - A "Secondary Section" is a named appendix or a front-matter section - of the Document that deals exclusively with the relationship of the - publishers or authors of the Document to the Document's overall - subject (or to related matters) and contains nothing that could - fall directly within that overall subject. (Thus, if the Document - is in part a textbook of mathematics, a Secondary Section may not - explain any mathematics.) The relationship could be a matter of - historical connection with the subject or with related matters, or - of legal, commercial, philosophical, ethical or political position - regarding them. - - The "Invariant Sections" are certain Secondary Sections whose - titles are designated, as being those of Invariant Sections, in the - notice that says that the Document is released under this License. - If a section does not fit the above definition of Secondary then it - is not allowed to be designated as Invariant. The Document may - contain zero Invariant Sections. If the Document does not identify - any Invariant Sections then there are none. - - The "Cover Texts" are certain short passages of text that are - listed, as Front-Cover Texts or Back-Cover Texts, in the notice - that says that the Document is released under this License. A - Front-Cover Text may be at most 5 words, and a Back-Cover Text may - be at most 25 words. - - A "Transparent" copy of the Document means a machine-readable copy, - represented in a format whose specification is available to the - general public, that is suitable for revising the document - straightforwardly with generic text editors or (for images composed - of pixels) generic paint programs or (for drawings) some widely - available drawing editor, and that is suitable for input to text - formatters or for automatic translation to a variety of formats - suitable for input to text formatters. A copy made in an otherwise - Transparent file format whose markup, or absence of markup, has - been arranged to thwart or discourage subsequent modification by - readers is not Transparent. An image format is not Transparent if - used for any substantial amount of text. A copy that is not - "Transparent" is called "Opaque". - - Examples of suitable formats for Transparent copies include plain - ASCII without markup, Texinfo input format, LaTeX input format, - SGML or XML using a publicly available DTD, and standard-conforming - simple HTML, PostScript or PDF designed for human modification. - Examples of transparent image formats include PNG, XCF and JPG. - Opaque formats include proprietary formats that can be read and - edited only by proprietary word processors, SGML or XML for which - the DTD and/or processing tools are not generally available, and - the machine-generated HTML, PostScript or PDF produced by some word - processors for output purposes only. - - The "Title Page" means, for a printed book, the title page itself, - plus such following pages as are needed to hold, legibly, the - material this License requires to appear in the title page. For - works in formats which do not have any title page as such, "Title - Page" means the text near the most prominent appearance of the - work's title, preceding the beginning of the body of the text. - - The "publisher" means any person or entity that distributes copies - of the Document to the public. - - A section "Entitled XYZ" means a named subunit of the Document - whose title either is precisely XYZ or contains XYZ in parentheses - following text that translates XYZ in another language. (Here XYZ - stands for a specific section name mentioned below, such as - "Acknowledgements", "Dedications", "Endorsements", or "History".) - To "Preserve the Title" of such a section when you modify the - Document means that it remains a section "Entitled XYZ" according - to this definition. - - The Document may include Warranty Disclaimers next to the notice - which states that this License applies to the Document. These - Warranty Disclaimers are considered to be included by reference in - this License, but only as regards disclaiming warranties: any other - implication that these Warranty Disclaimers may have is void and - has no effect on the meaning of this License. - - 2. VERBATIM COPYING - - You may copy and distribute the Document in any medium, either - commercially or noncommercially, provided that this License, the - copyright notices, and the license notice saying this License - applies to the Document are reproduced in all copies, and that you - add no other conditions whatsoever to those of this License. You - may not use technical measures to obstruct or control the reading - or further copying of the copies you make or distribute. However, - you may accept compensation in exchange for copies. If you - distribute a large enough number of copies you must also follow the - conditions in section 3. - - You may also lend copies, under the same conditions stated above, - and you may publicly display copies. - - 3. COPYING IN QUANTITY - - If you publish printed copies (or copies in media that commonly - have printed covers) of the Document, numbering more than 100, and - the Document's license notice requires Cover Texts, you must - enclose the copies in covers that carry, clearly and legibly, all - these Cover Texts: Front-Cover Texts on the front cover, and - Back-Cover Texts on the back cover. Both covers must also clearly - and legibly identify you as the publisher of these copies. The - front cover must present the full title with all words of the title - equally prominent and visible. You may add other material on the - covers in addition. Copying with changes limited to the covers, as - long as they preserve the title of the Document and satisfy these - conditions, can be treated as verbatim copying in other respects. - - If the required texts for either cover are too voluminous to fit - legibly, you should put the first ones listed (as many as fit - reasonably) on the actual cover, and continue the rest onto - adjacent pages. - - If you publish or distribute Opaque copies of the Document - numbering more than 100, you must either include a machine-readable - Transparent copy along with each Opaque copy, or state in or with - each Opaque copy a computer-network location from which the general - network-using public has access to download using public-standard - network protocols a complete Transparent copy of the Document, free - of added material. If you use the latter option, you must take - reasonably prudent steps, when you begin distribution of Opaque - copies in quantity, to ensure that this Transparent copy will - remain thus accessible at the stated location until at least one - year after the last time you distribute an Opaque copy (directly or - through your agents or retailers) of that edition to the public. - - It is requested, but not required, that you contact the authors of - the Document well before redistributing any large number of copies, - to give them a chance to provide you with an updated version of the - Document. - - 4. MODIFICATIONS - - You may copy and distribute a Modified Version of the Document - under the conditions of sections 2 and 3 above, provided that you - release the Modified Version under precisely this License, with the - Modified Version filling the role of the Document, thus licensing - distribution and modification of the Modified Version to whoever - possesses a copy of it. In addition, you must do these things in - the Modified Version: - - A. Use in the Title Page (and on the covers, if any) a title - distinct from that of the Document, and from those of previous - versions (which should, if there were any, be listed in the - History section of the Document). You may use the same title - as a previous version if the original publisher of that - version gives permission. - - B. List on the Title Page, as authors, one or more persons or - entities responsible for authorship of the modifications in - the Modified Version, together with at least five of the - principal authors of the Document (all of its principal - authors, if it has fewer than five), unless they release you - from this requirement. - - C. State on the Title page the name of the publisher of the - Modified Version, as the publisher. - - D. Preserve all the copyright notices of the Document. - - E. Add an appropriate copyright notice for your modifications - adjacent to the other copyright notices. - - F. Include, immediately after the copyright notices, a license - notice giving the public permission to use the Modified - Version under the terms of this License, in the form shown in - the Addendum below. - - G. Preserve in that license notice the full lists of Invariant - Sections and required Cover Texts given in the Document's - license notice. - - H. Include an unaltered copy of this License. - - I. Preserve the section Entitled "History", Preserve its Title, - and add to it an item stating at least the title, year, new - authors, and publisher of the Modified Version as given on the - Title Page. If there is no section Entitled "History" in the - Document, create one stating the title, year, authors, and - publisher of the Document as given on its Title Page, then add - an item describing the Modified Version as stated in the - previous sentence. - - J. Preserve the network location, if any, given in the Document - for public access to a Transparent copy of the Document, and - likewise the network locations given in the Document for - previous versions it was based on. These may be placed in the - "History" section. You may omit a network location for a work - that was published at least four years before the Document - itself, or if the original publisher of the version it refers - to gives permission. - - K. For any section Entitled "Acknowledgements" or "Dedications", - Preserve the Title of the section, and preserve in the section - all the substance and tone of each of the contributor - acknowledgements and/or dedications given therein. - - L. Preserve all the Invariant Sections of the Document, unaltered - in their text and in their titles. Section numbers or the - equivalent are not considered part of the section titles. - - M. Delete any section Entitled "Endorsements". Such a section - may not be included in the Modified Version. - - N. Do not retitle any existing section to be Entitled - "Endorsements" or to conflict in title with any Invariant - Section. - - O. Preserve any Warranty Disclaimers. - - If the Modified Version includes new front-matter sections or - appendices that qualify as Secondary Sections and contain no - material copied from the Document, you may at your option designate - some or all of these sections as invariant. To do this, add their - titles to the list of Invariant Sections in the Modified Version's - license notice. These titles must be distinct from any other - section titles. - - You may add a section Entitled "Endorsements", provided it contains - nothing but endorsements of your Modified Version by various - parties--for example, statements of peer review or that the text - has been approved by an organization as the authoritative - definition of a standard. - - You may add a passage of up to five words as a Front-Cover Text, - and a passage of up to 25 words as a Back-Cover Text, to the end of - the list of Cover Texts in the Modified Version. Only one passage - of Front-Cover Text and one of Back-Cover Text may be added by (or - through arrangements made by) any one entity. If the Document - already includes a cover text for the same cover, previously added - by you or by arrangement made by the same entity you are acting on - behalf of, you may not add another; but you may replace the old - one, on explicit permission from the previous publisher that added - the old one. - - The author(s) and publisher(s) of the Document do not by this - License give permission to use their names for publicity for or to - assert or imply endorsement of any Modified Version. - - 5. COMBINING DOCUMENTS - - You may combine the Document with other documents released under - this License, under the terms defined in section 4 above for - modified versions, provided that you include in the combination all - of the Invariant Sections of all of the original documents, - unmodified, and list them all as Invariant Sections of your - combined work in its license notice, and that you preserve all - their Warranty Disclaimers. - - The combined work need only contain one copy of this License, and - multiple identical Invariant Sections may be replaced with a single - copy. If there are multiple Invariant Sections with the same name - but different contents, make the title of each such section unique - by adding at the end of it, in parentheses, the name of the - original author or publisher of that section if known, or else a - unique number. Make the same adjustment to the section titles in - the list of Invariant Sections in the license notice of the - combined work. - - In the combination, you must combine any sections Entitled - "History" in the various original documents, forming one section - Entitled "History"; likewise combine any sections Entitled - "Acknowledgements", and any sections Entitled "Dedications". You - must delete all sections Entitled "Endorsements." - - 6. COLLECTIONS OF DOCUMENTS - - You may make a collection consisting of the Document and other - documents released under this License, and replace the individual - copies of this License in the various documents with a single copy - that is included in the collection, provided that you follow the - rules of this License for verbatim copying of each of the documents - in all other respects. - - You may extract a single document from such a collection, and - distribute it individually under this License, provided you insert - a copy of this License into the extracted document, and follow this - License in all other respects regarding verbatim copying of that - document. - - 7. AGGREGATION WITH INDEPENDENT WORKS - - A compilation of the Document or its derivatives with other - separate and independent documents or works, in or on a volume of a - storage or distribution medium, is called an "aggregate" if the - copyright resulting from the compilation is not used to limit the - legal rights of the compilation's users beyond what the individual - works permit. When the Document is included in an aggregate, this - License does not apply to the other works in the aggregate which - are not themselves derivative works of the Document. - - If the Cover Text requirement of section 3 is applicable to these - copies of the Document, then if the Document is less than one half - of the entire aggregate, the Document's Cover Texts may be placed - on covers that bracket the Document within the aggregate, or the - electronic equivalent of covers if the Document is in electronic - form. Otherwise they must appear on printed covers that bracket - the whole aggregate. - - 8. TRANSLATION - - Translation is considered a kind of modification, so you may - distribute translations of the Document under the terms of section - 4. Replacing Invariant Sections with translations requires special - permission from their copyright holders, but you may include - translations of some or all Invariant Sections in addition to the - original versions of these Invariant Sections. You may include a - translation of this License, and all the license notices in the - Document, and any Warranty Disclaimers, provided that you also - include the original English version of this License and the - original versions of those notices and disclaimers. In case of a - disagreement between the translation and the original version of - this License or a notice or disclaimer, the original version will - prevail. - - If a section in the Document is Entitled "Acknowledgements", - "Dedications", or "History", the requirement (section 4) to - Preserve its Title (section 1) will typically require changing the - actual title. - - 9. TERMINATION - - You may not copy, modify, sublicense, or distribute the Document - except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense, or distribute it is void, - and will automatically terminate your rights under this License. - - However, if you cease all violation of this License, then your - license from a particular copyright holder is reinstated (a) - provisionally, unless and until the copyright holder explicitly and - finally terminates your license, and (b) permanently, if the - copyright holder fails to notify you of the violation by some - reasonable means prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is - reinstated permanently if the copyright holder notifies you of the - violation by some reasonable means, this is the first time you have - received notice of violation of this License (for any work) from - that copyright holder, and you cure the violation prior to 30 days - after your receipt of the notice. - - Termination of your rights under this section does not terminate - the licenses of parties who have received copies or rights from you - under this License. If your rights have been terminated and not - permanently reinstated, receipt of a copy of some or all of the - same material does not give you any rights to use it. - - 10. FUTURE REVISIONS OF THIS LICENSE - - The Free Software Foundation may publish new, revised versions of - the GNU Free Documentation License from time to time. Such new - versions will be similar in spirit to the present version, but may - differ in detail to address new problems or concerns. See - . - - Each version of the License is given a distinguishing version - number. If the Document specifies that a particular numbered - version of this License "or any later version" applies to it, you - have the option of following the terms and conditions either of - that specified version or of any later version that has been - published (not as a draft) by the Free Software Foundation. If the - Document does not specify a version number of this License, you may - choose any version ever published (not as a draft) by the Free - Software Foundation. If the Document specifies that a proxy can - decide which future versions of this License can be used, that - proxy's public statement of acceptance of a version permanently - authorizes you to choose that version for the Document. - - 11. RELICENSING - - "Massive Multiauthor Collaboration Site" (or "MMC Site") means any - World Wide Web server that publishes copyrightable works and also - provides prominent facilities for anybody to edit those works. A - public wiki that anybody can edit is an example of such a server. - A "Massive Multiauthor Collaboration" (or "MMC") contained in the - site means any set of copyrightable works thus published on the MMC - site. - - "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 - license published by Creative Commons Corporation, a not-for-profit - corporation with a principal place of business in San Francisco, - California, as well as future copyleft versions of that license - published by that same organization. - - "Incorporate" means to publish or republish a Document, in whole or - in part, as part of another Document. - - An MMC is "eligible for relicensing" if it is licensed under this - License, and if all works that were first published under this - License somewhere other than this MMC, and subsequently - incorporated in whole or in part into the MMC, (1) had no cover - texts or invariant sections, and (2) were thus incorporated prior - to November 1, 2008. - - The operator of an MMC Site may republish an MMC contained in the - site under CC-BY-SA on the same site at any time before August 1, - 2009, provided the MMC is eligible for relicensing. - -ADDENDUM: How to use this License for your documents -==================================================== - -To use this License in a document you have written, include a copy of -the License in the document and put the following copyright and license -notices just after the title page: - - Copyright (C) YEAR YOUR NAME. - Permission is granted to copy, distribute and/or modify this document - under the terms of the GNU Free Documentation License, Version 1.3 - or any later version published by the Free Software Foundation; - with no Invariant Sections, no Front-Cover Texts, and no Back-Cover - Texts. A copy of the license is included in the section entitled ``GNU - Free Documentation License''. - - If you have Invariant Sections, Front-Cover Texts and Back-Cover -Texts, replace the "with...Texts." line with this: - - with the Invariant Sections being LIST THEIR TITLES, with - the Front-Cover Texts being LIST, and with the Back-Cover Texts - being LIST. - - If you have Invariant Sections without Cover Texts, or some other -combination of the three, merge those two alternatives to suit the -situation. - - If your document contains nontrivial examples of program code, we -recommend releasing these examples in parallel under your choice of free -software license, such as the GNU General Public License, to permit -their use in free software. - - -File: bison.info, Node: Bibliography, Next: Index of Terms, Prev: GNU Free Documentation License, Up: Top - -Bibliography -************ - -[Corbett 1984] - Robert Paul Corbett, Static Semantics in Compiler Error Recovery - Ph.D. Dissertation, Report No. UCB/CSD 85/251, Department of - Electrical Engineering and Computer Science, Compute Science - Division, University of California, Berkeley, California (June - 1985). - -[Denny 2008] - Joel E. Denny and Brian A. Malloy, IELR(1): Practical LR(1) Parser - Tables for Non-LR(1) Grammars with Conflict Resolution, in - ‘Proceedings of the 2008 ACM Symposium on Applied Computing’ - (SAC'08), ACM, New York, NY, USA, pp. 240-245. - - -[Denny 2010 May] - Joel E. Denny, PSLR(1): Pseudo-Scannerless Minimal LR(1) for the - Deterministic Parsing of Composite Languages, Ph.D. Dissertation, - Clemson University, Clemson, SC, USA (May 2010). - - -[Denny 2010 November] - Joel E. Denny and Brian A. Malloy, The IELR(1) Algorithm for - Generating Minimal LR(1) Parser Tables for Non-LR(1) Grammars with - Conflict Resolution, in ‘Science of Computer Programming’, Vol. 75, - Issue 11 (November 2010), pp. 943-979. - - -[DeRemer 1982] - Frank DeRemer and Thomas Pennello, Efficient Computation of LALR(1) - Look-Ahead Sets, in ‘ACM Transactions on Programming Languages and - Systems’, Vol. 4, No. 4 (October 1982), pp. 615-649. - - -[Isradisaikul 2015] - Chinawat Isradisaikul, Andrew Myers, Finding Counterexamples from - Parsing Conflicts, in ‘Proceedings of the 36th ACM SIGPLAN - Conference on Programming Language Design and Implementation’ (PLDI - '15), ACM, pp. 555-564. - - -[Johnson 1978] - Steven C. Johnson, A portable compiler: theory and practice, in - ‘Proceedings of the 5th ACM SIGACT-SIGPLAN symposium on Principles - of programming languages’ (POPL '78), pp. 97-104. - . - -[Knuth 1965] - Donald E. Knuth, On the Translation of Languages from Left to - Right, in ‘Information and Control’, Vol. 8, Issue 6 (December - 1965), pp. 607-639. - - -[Scott 2000] - Elizabeth Scott, Adrian Johnstone, and Shamsa Sadaf Hussain, - ‘Tomita-Style Generalised LR Parsers’, Royal Holloway, University - of London, Department of Computer Science, TR-00-12 (December - 2000). - - - -File: bison.info, Node: Index of Terms, Prev: Bibliography, Up: Top - -Index of Terms -************** - -[index] -* Menu: - -* ;: Table of Symbols. (line 65) -* :: Table of Symbols. (line 62) -* @[NAME]: Actions and Locations. - (line 6) -* @[NAME] <1>: Table of Symbols. (line 19) -* @$: Actions and Locations. - (line 6) -* @$ <1>: Java Action Features. - (line 38) -* @$ <2>: Action Features. (line 99) -* @$ <3>: Table of Symbols. (line 6) -* @N: Midrule Action Translation. - (line 6) -* @N <1>: Actions and Locations. - (line 6) -* @N <2>: Java Action Features. - (line 34) -* @N <3>: Action Features. (line 104) -* @N <4>: Action Features. (line 105) -* @N <5>: Table of Symbols. (line 10) -* @N <6>: Table of Symbols. (line 11) -* @NAME: Actions and Locations. - (line 6) -* @NAME <1>: Table of Symbols. (line 18) -* /*: Table of Symbols. (line 58) -* /* ... */: Grammar Outline. (line 6) -* //: Table of Symbols. (line 59) -* // ...: Grammar Outline. (line 6) -* %?: Semantic Predicates. (line 6) -* %?{EXPRESSION}: Table of Symbols. (line 50) -* %{: Java Declarations Summary. - (line 51) -* %{CODE%}: Table of Symbols. (line 45) -* %%: Java Declarations Summary. - (line 47) -* %% <1>: Table of Symbols. (line 41) -* %code: Prologue Alternatives. - (line 6) -* %code <1>: Decl Summary. (line 52) -* %code <2>: Decl Summary. (line 53) -* %code <3>: Decl Summary. (line 54) -* %code <4>: %code Summary. (line 6) -* %code <5>: %code Summary. (line 14) -* %code <6>: %code Summary. (line 25) -* %code <7>: Calc++ Parser. (line 63) -* %code <8>: Java Declarations Summary. - (line 31) -* %code <9>: Table of Symbols. (line 89) -* %code <10>: Table of Symbols. (line 90) -* %code imports: %code Summary. (line 87) -* %code imports <1>: Java Declarations Summary. - (line 35) -* %code init: Java Declarations Summary. - (line 39) -* %code lexer: Java Declarations Summary. - (line 43) -* %code provides: Prologue Alternatives. - (line 6) -* %code provides <1>: Decl Summary. (line 109) -* %code provides <2>: %code Summary. (line 58) -* %code requires: Prologue Alternatives. - (line 6) -* %code requires <1>: Decl Summary. (line 109) -* %code requires <2>: %code Summary. (line 41) -* %code requires <3>: Calc++ Parser. (line 31) -* %code top: Prologue Alternatives. - (line 6) -* %code top <1>: %code Summary. (line 71) -* %debug: Decl Summary. (line 58) -* %debug <1>: Enabling Traces. (line 22) -* %debug <2>: Table of Symbols. (line 95) -* %define: Decl Summary. (line 62) -* %define <1>: Decl Summary. (line 63) -* %define <2>: Decl Summary. (line 64) -* %define <3>: Decl Summary. (line 65) -* %define <4>: %define Summary. (line 13) -* %define <5>: %define Summary. (line 14) -* %define <6>: %define Summary. (line 15) -* %define <7>: %define Summary. (line 16) -* %define <8>: %define Summary. (line 561) -* %define <9>: %define Summary. (line 640) -* %define <10>: Table of Symbols. (line 98) -* %define <11>: Table of Symbols. (line 99) -* %define <12>: Table of Symbols. (line 100) -* %define <13>: Table of Symbols. (line 101) -* %define api.filename.type: %define Summary. (line 51) -* %define api.header.include: %define Summary. (line 68) -* %define api.header.include <1>: %define Summary. (line 69) -* %define api.location.file: %define Summary. (line 110) -* %define api.location.file <1>: %define Summary. (line 111) -* %define api.location.include: %define Summary. (line 137) -* %define api.location.include <1>: %define Summary. (line 138) -* %define api.location.type: %define Summary. (line 156) -* %define api.location.type <1>: User Defined Location Type. - (line 6) -* %define api.location.type <2>: Java Declarations Summary. - (line 101) -* %define api.namespace: %define Summary. (line 171) -* %define api.namespace <1>: C++ Bison Interface. (line 10) -* %define api.package: Java Declarations Summary. - (line 107) -* %define api.parser.abstract: Java Declarations Summary. - (line 60) -* %define api.parser.annotations: Java Declarations Summary. - (line 64) -* %define api.parser.class: %define Summary. (line 198) -* %define api.parser.class <1>: Java Declarations Summary. - (line 68) -* %define api.parser.extends: Java Declarations Summary. - (line 72) -* %define api.parser.final: Java Declarations Summary. - (line 76) -* %define api.parser.implements: Java Declarations Summary. - (line 80) -* %define api.parser.public: Java Declarations Summary. - (line 84) -* %define api.parser.strictfp: Java Declarations Summary. - (line 88) -* %define api.position.type: Java Declarations Summary. - (line 111) -* %define api.prefix: %define Summary. (line 211) -* %define api.prefix <1>: Java Declarations Summary. - (line 55) -* %define api.pure: Pure Decl. (line 6) -* %define api.pure <1>: %define Summary. (line 225) -* %define api.push-pull: Push Decl. (line 6) -* %define api.push-pull <1>: %define Summary. (line 264) -* %define api.push-pull <2>: D Push Parser Interface. - (line 6) -* %define api.push-pull <3>: Java Push Parser Interface. - (line 6) -* %define api.symbol.prefix: %define Summary. (line 275) -* %define api.token.constructor: %define Summary. (line 323) -* %define api.token.constructor <1>: Calc++ Parser. (line 21) -* %define api.token.prefix: %define Summary. (line 339) -* %define api.token.raw: %define Summary. (line 371) -* %define api.token.raw <1>: Calc++ Parser. (line 15) -* %define api.value.automove: %define Summary. (line 401) -* %define api.value.type: %define Summary. (line 440) -* %define api.value.type <1>: %define Summary. (line 441) -* %define api.value.type <2>: Java Declarations Summary. - (line 116) -* %define api.value.type union: Type Generation. (line 6) -* %define api.value.type variant: Calc++ Parser. (line 21) -* %define api.value.union.name: %define Summary. (line 512) -* %define init_throws: Java Declarations Summary. - (line 92) -* %define lex_throws: Java Declarations Summary. - (line 96) -* %define lr.default-reduction: %define Summary. (line 525) -* %define lr.default-reduction <1>: Default Reductions. (line 6) -* %define lr.default-reduction <2>: Default Reductions. (line 82) -* %define lr.keep-unreachable-state: %define Summary. (line 539) -* %define lr.keep-unreachable-state <1>: Unreachable States. (line 6) -* %define lr.keep-unreachable-state <2>: Unreachable States. (line 16) -* %define lr.type: %define Summary. (line 550) -* %define lr.type <1>: LR Table Construction. - (line 6) -* %define lr.type <2>: LR Table Construction. - (line 23) -* %define parse.assert: %define Summary. (line 564) -* %define parse.error: %define Summary. (line 582) -* %define parse.error custom: Syntax Error Reporting Function. - (line 6) -* %define parse.error detailed: Error Reporting Function. - (line 16) -* %define parse.error verbose: Error Reporting Function. - (line 16) -* %define parse.lac: %define Summary. (line 615) -* %define parse.lac <1>: LAC. (line 6) -* %define parse.lac <2>: LAC. (line 27) -* %define parse.trace: %define Summary. (line 624) -* %define parse.trace <1>: Enabling Traces. (line 10) -* %define throws: Java Declarations Summary. - (line 120) -* %defines: Decl Summary. (line 69) -* %defines <1>: Decl Summary. (line 70) -* %defines <2>: Table of Symbols. (line 105) -* %defines <3>: Table of Symbols. (line 106) -* %destructor: Typed Midrule Actions. - (line 6) -* %destructor <1>: Destructor Decl. (line 6) -* %destructor <2>: Destructor Decl. (line 21) -* %destructor <3>: Destructor Decl. (line 22) -* %destructor <4>: Decl Summary. (line 73) -* %destructor <5>: Table of Symbols. (line 109) -* %dprec: Merging GLR Parses. (line 6) -* %dprec <1>: Table of Symbols. (line 113) -* %empty: Empty Rules. (line 6) -* %empty <1>: Table of Symbols. (line 118) -* %error-verbose: Table of Symbols. (line 136) -* %expect: Expect Decl. (line 6) -* %expect <1>: Decl Summary. (line 40) -* %expect-rr: Simple GLR Parsers. (line 6) -* %expect-rr <1>: Expect Decl. (line 6) -* %expect-rr <2>: Decl Summary. (line 44) -* %file-prefix: Decl Summary. (line 77) -* %file-prefix <1>: Table of Symbols. (line 139) -* %glr-parser: GLR Parsers. (line 6) -* %glr-parser <1>: Simple GLR Parsers. (line 6) -* %glr-parser <2>: Table of Symbols. (line 143) -* %header: Decl Summary. (line 81) -* %header <1>: Decl Summary. (line 127) -* %header <2>: Table of Symbols. (line 146) -* %header <3>: Table of Symbols. (line 150) -* %initial-action: Initial Action Decl. (line 6) -* %initial-action <1>: Initial Action Decl. (line 10) -* %initial-action <2>: Initial Action Decl. (line 11) -* %initial-action <3>: Table of Symbols. (line 154) -* %language: Decl Summary. (line 130) -* %language <1>: Table of Symbols. (line 157) -* %language "Java": Java Declarations Summary. - (line 9) -* %left: Symbol Decls. (line 6) -* %left <1>: Decl Summary. (line 20) -* %left <2>: Using Precedence. (line 6) -* %left <3>: Table of Symbols. (line 161) -* %lex-param: Pure Calling. (line 31) -* %lex-param <1>: Pure Calling. (line 32) -* %lex-param <2>: Java Declarations Summary. - (line 12) -* %lex-param <3>: Table of Symbols. (line 165) -* %locations: Decl Summary. (line 135) -* %merge: Merging GLR Parses. (line 6) -* %merge <1>: Table of Symbols. (line 169) -* %name-prefix: Decl Summary. (line 141) -* %name-prefix <1>: Table of Symbols. (line 175) -* %no-lines: Decl Summary. (line 159) -* %no-lines <1>: Table of Symbols. (line 192) -* %nonassoc: Decl Summary. (line 24) -* %nonassoc <1>: Using Precedence. (line 6) -* %nonassoc <2>: LR Table Construction. - (line 97) -* %nonassoc <3>: Default Reductions. (line 6) -* %nonassoc <4>: Table of Symbols. (line 196) -* %nterm: Type Decl. (line 6) -* %nterm <1>: Symbol Decls. (line 6) -* %nterm <2>: Decl Summary. (line 29) -* %nterm <3>: Java Declarations Summary. - (line 27) -* %nterm <4>: Table of Symbols. (line 200) -* %output: Decl Summary. (line 168) -* %output <1>: Table of Symbols. (line 203) -* %param: Pure Calling. (line 36) -* %param <1>: Pure Calling. (line 37) -* %param <2>: Table of Symbols. (line 207) -* %parse-param: Parser Function. (line 38) -* %parse-param <1>: Parser Function. (line 39) -* %parse-param <2>: Java Declarations Summary. - (line 18) -* %parse-param <3>: Table of Symbols. (line 211) -* %prec: Contextual Precedence. - (line 6) -* %prec <1>: Table of Symbols. (line 215) -* %precedence: Using Precedence. (line 6) -* %precedence <1>: Precedence Only. (line 6) -* %precedence <2>: Table of Symbols. (line 219) -* %printer: Printer Decl. (line 6) -* %printer <1>: Printer Decl. (line 15) -* %printer <2>: Printer Decl. (line 16) -* %pure-parser: Decl Summary. (line 171) -* %pure-parser <1>: Table of Symbols. (line 223) -* %require: Require Decl. (line 6) -* %require <1>: Decl Summary. (line 175) -* %require <2>: Table of Symbols. (line 227) -* %right: Decl Summary. (line 16) -* %right <1>: Using Precedence. (line 6) -* %right <2>: Table of Symbols. (line 230) -* %skeleton: Decl Summary. (line 178) -* %skeleton <1>: Table of Symbols. (line 234) -* %start: Start Decl. (line 6) -* %start <1>: Decl Summary. (line 37) -* %start <2>: Table of Symbols. (line 238) -* %token: Token Decl. (line 6) -* %token <1>: Symbol Decls. (line 6) -* %token <2>: Decl Summary. (line 12) -* %token <3>: Java Declarations Summary. - (line 23) -* %token <4>: Table of Symbols. (line 241) -* %token-table: Decl Summary. (line 186) -* %token-table <1>: Table of Symbols. (line 245) -* %type: Type Decl. (line 6) -* %type <1>: Symbol Decls. (line 6) -* %type <2>: Decl Summary. (line 33) -* %type <3>: Table of Symbols. (line 249) -* %union: Union Decl. (line 6) -* %union <1>: Structured Value Type. - (line 6) -* %union <2>: Decl Summary. (line 8) -* %union <3>: Table of Symbols. (line 258) -* %verbose: Decl Summary. (line 241) -* %yacc: Decl Summary. (line 246) -* <*>: Destructor Decl. (line 6) -* <*> <1>: Printer Decl. (line 6) -* <*> <2>: Table of Symbols. (line 72) -* <>: Destructor Decl. (line 6) -* <> <1>: Printer Decl. (line 6) -* <> <2>: Table of Symbols. (line 78) -* |: Rules Syntax. (line 48) -* | <1>: Table of Symbols. (line 68) -* $[NAME]: Actions. (line 6) -* $[NAME] <1>: Table of Symbols. (line 37) -* $@N: Midrule Action Translation. - (line 6) -* $@N <1>: Table of Symbols. (line 23) -* $$: Java Action Features. - (line 28) -* $$ <1>: Action Features. (line 17) -* $N: Java Action Features. - (line 16) -* $N <1>: Action Features. (line 21) -* $$: Actions. (line 6) -* $$ <1>: Java Action Features. - (line 20) -* $$ <2>: Action Features. (line 9) -* $$ <3>: D Action Features. (line 9) -* $$ <4>: Table of Symbols. (line 28) -* $accept: Table of Symbols. (line 84) -* $end: Table of Symbols. (line 122) -* $N: Actions. (line 6) -* $N <1>: Java Action Features. - (line 12) -* $N <2>: Action Features. (line 13) -* $N <3>: D Action Features. (line 13) -* $N <4>: Table of Symbols. (line 32) -* $NAME: Actions. (line 6) -* $NAME <1>: Table of Symbols. (line 36) -* $undefined: Table of Symbols. (line 253) -* abstract syntax tree: Implementing Gotos/Loops. - (line 17) -* accepting state: Understanding. (line 185) -* action: Actions. (line 6) -* action data types: Action Types. (line 6) -* action features summary: Action Features. (line 6) -* actions in midrule: Midrule Actions. (line 6) -* actions in midrule <1>: Destructor Decl. (line 83) -* actions, location: Actions and Locations. - (line 6) -* actions, semantic: Semantic Actions. (line 6) -* additional C code section: Epilogue. (line 6) -* algorithm of parser: Algorithm. (line 6) -* ambiguous grammars: Language and Grammar. - (line 33) -* ambiguous grammars <1>: Generalized LR Parsing. - (line 6) -* associativity: Why Precedence. (line 34) -* AST: Implementing Gotos/Loops. - (line 17) -* Backus-Naur form: Language and Grammar. - (line 16) -* begin of location: C++ location. (line 20) -* begin of Location: D Location Values. (line 10) -* begin of Location <1>: Java Location Values. - (line 19) -* Bison declaration summary: Decl Summary. (line 6) -* Bison declarations: Declarations. (line 6) -* Bison declarations (introduction): Bison Declarations. (line 6) -* Bison grammar: Grammar in Bison. (line 6) -* Bison invocation: Invocation. (line 6) -* Bison parser: Bison Parser. (line 6) -* Bison parser algorithm: Algorithm. (line 6) -* Bison symbols, table of: Table of Symbols. (line 6) -* Bison utility: Bison Parser. (line 6) -* BISON_I18N: Enabling I18n. (line 18) -* BISON_LOCALEDIR: Enabling I18n. (line 18) -* bison-i18n.m4: Enabling I18n. (line 11) -* bison-po: Internationalization. - (line 6) -* bisonSkeleton of YYParser: D Parser Interface. (line 70) -* bisonSkeleton of YYParser <1>: Java Parser Interface. - (line 92) -* bisonVersion of YYParser: D Parser Interface. (line 69) -* bisonVersion of YYParser <1>: Java Parser Interface. - (line 91) -* BNF: Language and Grammar. - (line 16) -* braced code: Rules Syntax. (line 29) -* byacc: Byacc. (line 6) -* C code, section for additional: Epilogue. (line 6) -* C-language interface: Interface. (line 6) -* calc: Infix Calc. (line 6) -* calculator, infix notation: Infix Calc. (line 6) -* calculator, location tracking: Location Tracking Calc. - (line 6) -* calculator, multi-function: Multi-function Calc. (line 6) -* calculator, simple: RPN Calc. (line 6) -* canonical LR: Mysterious Conflicts. - (line 48) -* canonical LR <1>: LR Table Construction. - (line 6) -* cex: Counterexamples. (line 6) -* character token: Symbols. (line 37) -* column of position: C++ position. (line 35) -* columns on location: C++ location. (line 24) -* columns on position: C++ position. (line 38) -* comment: Grammar Outline. (line 6) -* compatibility: Versioning. (line 6) -* compiling the parser: Rpcalc Compile. (line 6) -* conflict counterexamples: Counterexamples. (line 6) -* conflicts: GLR Parsers. (line 6) -* conflicts <1>: Simple GLR Parsers. (line 6) -* conflicts <2>: Merging GLR Parses. (line 6) -* conflicts <3>: Shift/Reduce. (line 6) -* conflicts, reduce/reduce: Reduce/Reduce. (line 6) -* conflicts, suppressing warnings of: Expect Decl. (line 6) -* consistent states: Default Reductions. (line 17) -* context: C++ Parser Context. (line 16) -* context-dependent precedence: Contextual Precedence. - (line 6) -* context-free grammar: Language and Grammar. - (line 6) -* controlling function: Rpcalc Main. (line 6) -* core, item set: Understanding. (line 132) -* counter_type: C++ position. (line 11) -* counterexample, nonunifying: Glossary. (line 31) -* counterexample, unifying: Glossary. (line 31) -* counterexamples: Counterexamples. (line 6) -* dangling else: Shift/Reduce. (line 6) -* data type of locations: Location Type. (line 6) -* data types in actions: Action Types. (line 6) -* data types of semantic values: Value Type. (line 6) -* debug_level on parser: C++ Parser Interface. - (line 91) -* debug_stream on parser: C++ Parser Interface. - (line 86) -* debugging: Tracing. (line 6) -* declaration summary: Decl Summary. (line 6) -* declarations: Prologue. (line 6) -* declarations section: Prologue. (line 6) -* declarations, Bison: Declarations. (line 6) -* declarations, Bison (introduction): Bison Declarations. (line 6) -* declaring literal string tokens: Token Decl. (line 6) -* declaring operator precedence: Precedence Decl. (line 6) -* declaring the start symbol: Start Decl. (line 6) -* declaring token kind names: Token Decl. (line 6) -* declaring value types: Type Generation. (line 6) -* declaring value types <1>: Union Decl. (line 6) -* declaring value types <2>: Structured Value Type. - (line 6) -* declaring value types, nonterminals: Type Decl. (line 6) -* default action: Actions. (line 61) -* default data type: Value Type. (line 6) -* default location type: Location Type. (line 6) -* default reductions: Default Reductions. (line 6) -* default stack limit: Memory Management. (line 30) -* default start symbol: Start Decl. (line 6) -* defaulted states: Default Reductions. (line 17) -* deferred semantic actions: GLR Semantic Actions. - (line 12) -* defining language semantics: Semantics. (line 6) -* delayed syntax error detection: LR Table Construction. - (line 97) -* delayed syntax error detection <1>: Default Reductions. (line 43) -* delayed yylex invocations: Default Reductions. (line 17) -* discarded symbols: Destructor Decl. (line 92) -* discarded symbols, midrule actions: Typed Midrule Actions. - (line 6) -* dot: Graphviz. (line 6) -* dotted rule: Understanding. (line 110) -* else, dangling: Shift/Reduce. (line 6) -* emplace on value_type: C++ Variants. (line 53) -* emplace on value_type: C++ Variants. (line 47) -* emplace on value_type <1>: C++ Variants. (line 48) -* empty rule: Empty Rules. (line 6) -* end of location: C++ location. (line 21) -* end of Location: D Location Values. (line 11) -* end of Location <1>: Java Location Values. - (line 20) -* epilogue: Epilogue. (line 6) -* error: Error Recovery. (line 20) -* error <1>: Table of Symbols. (line 126) -* error on parser: C++ Parser Interface. - (line 96) -* error on parser <1>: C++ Parser Interface. - (line 98) -* error recovery: Error Recovery. (line 6) -* error recovery, midrule actions: Typed Midrule Actions. - (line 6) -* error recovery, simple: Simple Error Recovery. - (line 6) -* error reporting function: Error Reporting Function. - (line 6) -* error reporting routine: Rpcalc Error. (line 6) -* examples, simple: Examples. (line 6) -* exceptions: C++ Parser Interface. - (line 78) -* exercises: Exercises. (line 6) -* expected_tokens on context: C++ Parser Context. (line 53) -* file format: Grammar Layout. (line 6) -* file of position: C++ position. (line 24) -* filename_type: C++ position. (line 6) -* finite-state machine: Parser States. (line 6) -* formal grammar: Grammar in Bison. (line 6) -* format of grammar file: Grammar Layout. (line 6) -* freeing discarded symbols: Destructor Decl. (line 6) -* frequently asked questions: FAQ. (line 6) -* generalized LR (GLR) parsing: Language and Grammar. - (line 33) -* generalized LR (GLR) parsing <1>: GLR Parsers. (line 6) -* generalized LR (GLR) parsing <2>: Generalized LR Parsing. - (line 6) -* generalized LR (GLR) parsing, ambiguous grammars: Merging GLR Parses. - (line 6) -* generalized LR (GLR) parsing, unambiguous grammars: Simple GLR Parsers. - (line 6) -* getDebugLevel on YYParser: Java Parser Interface. - (line 86) -* getDebugLevel() on YYParser: D Parser Interface. (line 64) -* getDebugStream on YYParser: Java Parser Interface. - (line 81) -* getDebugStream() on YYParser: D Parser Interface. (line 59) -* getEndPos on Lexer: Java Scanner Interface. - (line 40) -* getErrorVerbose on YYParser: Java Parser Interface. - (line 64) -* getErrorVerbose() on YYParser: D Parser Interface. (line 43) -* getExpectedTokens on YYParser.Context: Java Parser Context Interface. - (line 39) -* getExpectedTokens(YYParser.SymbolKind[] on YYParser.Context: D Parser Context Interface. - (line 21) -* getLocation on YYParser.Context: Java Parser Context Interface. - (line 36) -* getLocation() on YYParser.Context: D Parser Context Interface. - (line 18) -* getLVal on Lexer: Java Scanner Interface. - (line 52) -* getName on YYParser.SymbolKind: Java Parser Context Interface. - (line 29) -* getStartPos on Lexer: Java Scanner Interface. - (line 39) -* gettext: Internationalization. - (line 6) -* getToken on YYParser.Context: Java Parser Context Interface. - (line 32) -* getToken() on YYParser.Context: D Parser Context Interface. - (line 14) -* glossary: Glossary. (line 6) -* GLR parsers and yychar: GLR Semantic Actions. - (line 16) -* GLR parsers and yyclearin: GLR Semantic Actions. - (line 24) -* GLR parsers and YYERROR: GLR Semantic Actions. - (line 37) -* GLR parsers and yylloc: GLR Semantic Actions. - (line 16) -* GLR parsers and YYLLOC_DEFAULT: Location Default Action. - (line 6) -* GLR parsers and yylval: GLR Semantic Actions. - (line 16) -* GLR parsing: Language and Grammar. - (line 33) -* GLR parsing <1>: GLR Parsers. (line 6) -* GLR parsing <2>: Generalized LR Parsing. - (line 6) -* GLR parsing, ambiguous grammars: Merging GLR Parses. (line 6) -* GLR parsing, unambiguous grammars: Simple GLR Parsers. (line 6) -* GLR with LALR: LR Table Construction. - (line 59) -* grammar file: Grammar Layout. (line 6) -* grammar rule syntax: Rules Syntax. (line 6) -* grammar rules section: Grammar Rules. (line 6) -* grammar, Bison: Grammar in Bison. (line 6) -* grammar, context-free: Language and Grammar. - (line 6) -* grouping, syntactic: Language and Grammar. - (line 46) -* Header guard: Decl Summary. (line 112) -* history: History. (line 6) -* i18n: Internationalization. - (line 6) -* i18n of YYParser: Java Parser Interface. - (line 99) -* IELR: Mysterious Conflicts. - (line 48) -* IELR <1>: LR Table Construction. - (line 6) -* IELR grammars: Language and Grammar. - (line 22) -* infix notation calculator: Infix Calc. (line 6) -* initialize on location: C++ location. (line 16) -* initialize on position: C++ position. (line 20) -* interface: Interface. (line 6) -* internationalization: Internationalization. - (line 6) -* introduction: Introduction. (line 6) -* invoking Bison: Invocation. (line 6) -* item: Understanding. (line 110) -* item set core: Understanding. (line 132) -* item set core <1>: Understanding. (line 132) -* kernel, item set: Understanding. (line 132) -* kind on symbol_type: Complete Symbols. (line 21) -* LAC: LR Table Construction. - (line 97) -* LAC <1>: Default Reductions. (line 54) -* LAC <2>: LAC. (line 6) -* LALR: Mysterious Conflicts. - (line 36) -* LALR <1>: LR Table Construction. - (line 6) -* LALR grammars: Language and Grammar. - (line 22) -* language semantics, defining: Semantics. (line 6) -* layout of Bison grammar: Grammar Layout. (line 6) -* left recursion: Recursion. (line 17) -* lexical analyzer: Lexical. (line 6) -* lexical analyzer, purpose: Bison Parser. (line 6) -* lexical analyzer, writing: Rpcalc Lexer. (line 6) -* lexical tie-in: Lexical Tie-ins. (line 6) -* line of position: C++ position. (line 28) -* lines on location: C++ location. (line 25) -* lines on position: C++ position. (line 31) -* literal string token: Symbols. (line 57) -* literal token: Symbols. (line 37) -* location: Locations. (line 6) -* location <1>: Tracking Locations. (line 6) -* location actions: Actions and Locations. - (line 6) -* location on context: C++ Parser Context. (line 50) -* location on location: C++ location. (line 6) -* location on location <1>: C++ location. (line 10) -* location on location <2>: C++ location. (line 11) -* Location on Location: Java Location Values. - (line 23) -* Location on Location <1>: Java Location Values. - (line 27) -* location tracking calculator: Location Tracking Calc. - (line 6) -* location_type: C++ Parser Interface. - (line 49) -* location, textual: Locations. (line 6) -* location, textual <1>: Tracking Locations. (line 6) -* lookahead correction: LAC. (line 6) -* lookahead on context: C++ Parser Context. (line 43) -* lookahead token: Lookahead. (line 6) -* LR: Mysterious Conflicts. - (line 36) -* LR grammars: Language and Grammar. - (line 22) -* ltcalc: Location Tracking Calc. - (line 6) -* main function in simple example: Rpcalc Main. (line 6) -* make_TOKEN on parser: Complete Symbols. (line 82) -* make_TOKEN on parser <1>: Complete Symbols. (line 84) -* make_TOKEN on parser <2>: Complete Symbols. (line 86) -* make_TOKEN on parser <3>: Complete Symbols. (line 87) -* memory exhaustion: Memory Management. (line 6) -* memory management: Memory Management. (line 6) -* mfcalc: Multi-function Calc. (line 6) -* midrule actions: Midrule Actions. (line 6) -* midrule actions <1>: Destructor Decl. (line 83) -* multi-function calculator: Multi-function Calc. (line 6) -* multicharacter literal: Symbols. (line 57) -* mutual recursion: Recursion. (line 34) -* Mysterious Conflict: LR Table Construction. - (line 6) -* Mysterious Conflicts: Mysterious Conflicts. - (line 6) -* name on symbol_type: Complete Symbols. (line 24) -* named references: Named References. (line 6) -* NLS: Internationalization. - (line 6) -* nondeterministic parsing: Language and Grammar. - (line 33) -* nondeterministic parsing <1>: Generalized LR Parsing. - (line 6) -* nonterminal symbol: Symbols. (line 6) -* nonterminal, useless: Understanding. (line 56) -* nonunifying counterexample: Glossary. (line 31) -* operator precedence: Precedence. (line 6) -* operator precedence, declaring: Precedence Decl. (line 6) -* operator- on location: C++ location. (line 30) -* operator- on position: C++ position. (line 45) -* operator-= on location: C++ location. (line 31) -* operator-= on position: C++ position. (line 44) -* operator!= on location: C++ location. (line 43) -* operator!= on position: C++ position. (line 49) -* operator() on parser: C++ Parser Interface. - (line 73) -* operator+ on location: C++ location. (line 28) -* operator+ on location <1>: C++ location. (line 34) -* operator+ on position: C++ position. (line 43) -* operator+= on location: C++ location. (line 29) -* operator+= on location <1>: C++ location. (line 35) -* operator+= on position: C++ position. (line 42) -* operator<<: C++ position. (line 52) -* operator<< <1>: C++ location. (line 47) -* operator== on location: C++ location. (line 42) -* operator== on position: C++ position. (line 48) -* options for invoking Bison: Invocation. (line 6) -* overflow of parser stack: Memory Management. (line 6) -* parse error: Error Reporting Function. - (line 6) -* parse on parser: C++ Parser Interface. - (line 74) -* parse on YYParser: Java Parser Interface. - (line 60) -* parse() on YYParser: D Parser Interface. (line 39) -* parser: Bison Parser. (line 6) -* parser on parser: C++ Parser Interface. - (line 63) -* parser on parser <1>: C++ Parser Interface. - (line 64) -* parser stack: Algorithm. (line 6) -* parser stack overflow: Memory Management. (line 6) -* parser state: Parser States. (line 6) -* position on position: C++ position. (line 14) -* precedence declarations: Precedence Decl. (line 6) -* precedence of operators: Precedence. (line 6) -* precedence, context-dependent: Contextual Precedence. - (line 6) -* precedence, unary operator: Contextual Precedence. - (line 6) -* preventing warnings about conflicts: Expect Decl. (line 6) -* printing semantic values: Printer Decl. (line 6) -* Prologue: Prologue. (line 6) -* Prologue <1>: %code Summary. (line 6) -* Prologue Alternatives: Prologue Alternatives. - (line 6) -* pure parser: Pure Decl. (line 6) -* push parser: Push Decl. (line 6) -* push parser <1>: Push Decl. (line 6) -* push_parse on YYParser: Java Push Parser Interface. - (line 18) -* push_parse on YYParser <1>: Java Push Parser Interface. - (line 19) -* push_parse on YYParser <2>: Java Push Parser Interface. - (line 21) -* pushParse on YYParser: D Push Parser Interface. - (line 18) -* questions: FAQ. (line 6) -* recovering: Java Action Features. - (line 54) -* recovering on YYParser: Java Parser Interface. - (line 77) -* recovering() on YYParser: D Parser Interface. (line 55) -* recovery from errors: Error Recovery. (line 6) -* recursive rule: Recursion. (line 6) -* reduce/reduce conflict: Reduce/Reduce. (line 6) -* reduce/reduce conflicts: GLR Parsers. (line 6) -* reduce/reduce conflicts <1>: Simple GLR Parsers. (line 6) -* reduce/reduce conflicts <2>: Merging GLR Parses. (line 6) -* reduction: Algorithm. (line 6) -* reentrant parser: Pure Decl. (line 6) -* report_syntax_error on parser: C++ Parser Context. (line 9) -* reportSyntaxError on Lexer: Java Scanner Interface. - (line 59) -* reportSyntaxError(YYParser.Context on Lexer: D Scanner Interface. - (line 34) -* requiring a version of Bison: Require Decl. (line 6) -* Reverse Polish Notation: RPN Calc. (line 6) -* right recursion: Recursion. (line 17) -* rpcalc: RPN Calc. (line 6) -* rule syntax: Rules Syntax. (line 6) -* rule, dotted: Understanding. (line 110) -* rule, empty: Empty Rules. (line 6) -* rule, recursive: Recursion. (line 6) -* rule, useless: Understanding. (line 56) -* rules section for grammar: Grammar Rules. (line 6) -* running Bison (introduction): Rpcalc Generate. (line 6) -* semantic actions: Semantic Actions. (line 6) -* Semantic predicates in GLR parsers: Semantic Predicates. (line 6) -* semantic value: Semantic Values. (line 6) -* semantic value type: Value Type. (line 6) -* set_debug_level on parser: C++ Parser Interface. - (line 92) -* set_debug_stream on parser: C++ Parser Interface. - (line 87) -* setDebugLevel on YYParser: Java Parser Interface. - (line 87) -* setDebugLevel(int on YYParser: D Parser Interface. (line 65) -* setDebugStream on YYParser: Java Parser Interface. - (line 82) -* setDebugStream(File on YYParser: D Parser Interface. (line 60) -* setErrorVerbose on YYParser: Java Parser Interface. - (line 65) -* setErrorVerbose(boolean on YYParser: D Parser Interface. (line 44) -* shift/reduce conflicts: GLR Parsers. (line 6) -* shift/reduce conflicts <1>: Simple GLR Parsers. (line 6) -* shift/reduce conflicts <2>: Shift/Reduce. (line 6) -* shifting: Algorithm. (line 6) -* simple examples: Examples. (line 6) -* single-character literal: Symbols. (line 37) -* stack overflow: Memory Management. (line 6) -* stack, parser: Algorithm. (line 6) -* stages in using Bison: Stages. (line 6) -* start symbol: Language and Grammar. - (line 94) -* start symbol, declaring: Start Decl. (line 6) -* state (of parser): Parser States. (line 6) -* step on location: C++ location. (line 39) -* string token: Symbols. (line 57) -* summary, action features: Action Features. (line 6) -* summary, Bison declaration: Decl Summary. (line 6) -* suppressing conflict warnings: Expect Decl. (line 6) -* symbol: Symbols. (line 6) -* symbol table example: Mfcalc Symbol Table. (line 6) -* symbol_kind_type: C++ Parser Context. (line 19) -* symbol_name on parser: C++ Parser Context. (line 68) -* symbol_type: Complete Symbols. (line 17) -* symbol_type on parser::symbol_type: Complete Symbols. (line 32) -* symbol_type on parser::symbol_type <1>: Complete Symbols. (line 34) -* symbol_type on parser::symbol_type <2>: Complete Symbols. (line 36) -* symbol_type on parser::symbol_type <3>: Complete Symbols. (line 38) -* SymbolKind: D Parser Context Interface. - (line 9) -* SymbolKind <1>: Java Parser Context Interface. - (line 9) -* symbols (abstract): Language and Grammar. - (line 46) -* symbols in Bison, table of: Table of Symbols. (line 6) -* syntactic grouping: Language and Grammar. - (line 46) -* syntax error: Error Reporting Function. - (line 6) -* syntax of grammar rules: Rules Syntax. (line 6) -* syntax_error: C++ Parser Interface. - (line 53) -* syntax_error on syntax_error: C++ Parser Interface. - (line 68) -* syntax_error on syntax_error <1>: C++ Parser Interface. - (line 70) -* terminal symbol: Symbols. (line 6) -* textual location: Locations. (line 6) -* textual location <1>: Tracking Locations. (line 6) -* this(LEX_PARAM, on YYParser: D Parser Interface. (line 29) -* this(Lexer on YYParser: D Parser Interface. (line 34) -* this(Position on Location: D Location Values. (line 14) -* this(Position on Location <1>: D Location Values. (line 18) -* token: Language and Grammar. - (line 46) -* token <1>: C++ Parser Interface. - (line 14) -* token kind: Symbols. (line 6) -* token kind names, declaring: Token Decl. (line 6) -* token on context: C++ Parser Context. (line 46) -* token_kind_type: C++ Parser Interface. - (line 21) -* token, useless: Understanding. (line 56) -* toString on Location: Java Location Values. - (line 30) -* toString() on Location: D Location Values. (line 21) -* tracing the parser: Tracing. (line 6) -* unary operator precedence: Contextual Precedence. - (line 6) -* ungulates: History. (line 6) -* unifying counterexample: Glossary. (line 31) -* unreachable states: Unreachable States. (line 6) -* useless nonterminal: Understanding. (line 56) -* useless rule: Understanding. (line 56) -* useless token: Understanding. (line 56) -* using Bison: Stages. (line 6) -* value type, semantic: Value Type. (line 6) -* value types, declaring: Type Generation. (line 6) -* value types, declaring <1>: Union Decl. (line 6) -* value types, declaring <2>: Structured Value Type. - (line 6) -* value types, nonterminals, declaring: Type Decl. (line 6) -* value_type: C++ Parser Interface. - (line 46) -* value, semantic: Semantic Values. (line 6) -* version: Versioning. (line 6) -* version requirement: Require Decl. (line 6) -* warnings, preventing: Expect Decl. (line 6) -* writing a lexical analyzer: Rpcalc Lexer. (line 6) -* xml: Xml. (line 6) -* yacchack: yacchack. (line 6) -* YYABORT: Parser Function. (line 28) -* YYABORT <1>: Parser Function. (line 29) -* YYABORT <2>: Action Features. (line 25) -* YYABORT <3>: Java Action Features. - (line 42) -* YYABORT <4>: Table of Symbols. (line 262) -* YYACCEPT: Parser Function. (line 25) -* YYACCEPT <1>: Parser Function. (line 26) -* YYACCEPT <2>: Action Features. (line 29) -* YYACCEPT <3>: Java Action Features. - (line 46) -* YYACCEPT <4>: Table of Symbols. (line 270) -* YYBACKUP: Action Features. (line 33) -* YYBACKUP <1>: Action Features. (line 34) -* YYBACKUP <2>: Table of Symbols. (line 278) -* YYBISON: Table of Symbols. (line 282) -* yychar: GLR Semantic Actions. - (line 16) -* yychar <1>: Lookahead. (line 49) -* yychar <2>: Action Features. (line 70) -* yychar <3>: Table of Symbols. (line 287) -* yyclearin: GLR Semantic Actions. - (line 24) -* yyclearin <1>: Action Features. (line 77) -* yyclearin <2>: Error Recovery. (line 99) -* yyclearin <3>: Table of Symbols. (line 293) -* yydebug: Tracing. (line 6) -* YYDEBUG: Enabling Traces. (line 27) -* YYDEBUG <1>: Table of Symbols. (line 297) -* yydebug <1>: Table of Symbols. (line 301) -* YYEMPTY: Action Features. (line 46) -* YYEMPTY <1>: Table of Symbols. (line 306) -* YYENABLE_NLS: Enabling I18n. (line 18) -* YYEOF: Action Features. (line 49) -* YYEOF <1>: Table of Symbols. (line 309) -* yyerrok: Action Features. (line 82) -* yyerrok <1>: Error Recovery. (line 94) -* yyerrok <2>: D Action Features. (line 17) -* yyerrok <3>: Table of Symbols. (line 312) -* YYERROR: GLR Semantic Actions. - (line 37) -* yyerror: Error Reporting Function. - (line 6) -* YYERROR <1>: Action Features. (line 53) -* YYERROR <2>: Java Action Features. - (line 50) -* yyerror <1>: Java Action Features. - (line 59) -* yyerror <2>: Java Action Features. - (line 60) -* yyerror <3>: Java Action Features. - (line 61) -* YYERROR <3>: Table of Symbols. (line 316) -* yyerror <4>: Table of Symbols. (line 327) -* yyerror on Lexer: Java Scanner Interface. - (line 26) -* yyerror on YYParser: Java Parser Interface. - (line 70) -* yyerror on YYParser <1>: Java Parser Interface. - (line 71) -* yyerror on YYParser <2>: Java Parser Interface. - (line 72) -* yyerror(Location on Lexer: D Scanner Interface. (line 26) -* yyerror(Location on YYParser: D Parser Interface. (line 50) -* yyerror(string on YYParser: D Parser Interface. (line 49) -* YYFPRINTF: Enabling Traces. (line 47) -* YYFPRINTF <1>: Table of Symbols. (line 331) -* YYINITDEPTH: Memory Management. (line 32) -* YYINITDEPTH <1>: Table of Symbols. (line 334) -* yylex: Lexical. (line 6) -* yylex <1>: Split Symbols. (line 8) -* yylex <2>: Split Symbols. (line 10) -* yylex <3>: Complete Symbols. (line 10) -* yylex <4>: Complete Symbols. (line 11) -* yylex <5>: Table of Symbols. (line 338) -* yylex on Lexer: Java Scanner Interface. - (line 31) -* yylex() on Lexer: D Scanner Interface. (line 30) -* yylloc: GLR Semantic Actions. - (line 16) -* yylloc <1>: Actions and Locations. - (line 63) -* yylloc <2>: Token Locations. (line 6) -* yylloc <3>: Lookahead. (line 49) -* yylloc <4>: Action Features. (line 87) -* yylloc <5>: Table of Symbols. (line 342) -* YYLLOC_DEFAULT: Location Default Action. - (line 6) -* YYLOCATION_PRINT: Printing Locations. (line 6) -* YYLOCATION_PRINT <1>: Printing Locations. (line 10) -* YYLTYPE: Token Locations. (line 19) -* YYLTYPE <1>: Table of Symbols. (line 351) -* yylval: GLR Semantic Actions. - (line 16) -* yylval <1>: Actions. (line 86) -* yylval <2>: Token Values. (line 6) -* yylval <3>: Lookahead. (line 49) -* yylval <4>: Action Features. (line 93) -* yylval <5>: Table of Symbols. (line 355) -* YYMAXDEPTH: Memory Management. (line 14) -* YYMAXDEPTH <1>: Table of Symbols. (line 362) -* yynerrs: Error Reporting Function. - (line 66) -* yynerrs <1>: Table of Symbols. (line 366) -* YYNOMEM: Parser Function. (line 31) -* YYNOMEM <1>: Parser Function. (line 32) -* YYNOMEM <2>: Action Features. (line 61) -* YYNOMEM <3>: Table of Symbols. (line 372) -* yyo: Printer Decl. (line 16) -* yyparse: Parser Function. (line 6) -* yyparse <1>: Parser Function. (line 12) -* yyparse <2>: Table of Symbols. (line 377) -* YYParser on YYParser: Java Parser Interface. - (line 38) -* YYParser on YYParser <1>: Java Parser Interface. - (line 47) -* yypcontext_expected_tokens: Syntax Error Reporting Function. - (line 55) -* yypcontext_location: Syntax Error Reporting Function. - (line 51) -* yypcontext_t: Syntax Error Reporting Function. - (line 21) -* yypcontext_token: Syntax Error Reporting Function. - (line 45) -* yypstate_delete: Push Parser Interface. - (line 15) -* yypstate_delete <1>: Push Parser Interface. - (line 19) -* yypstate_delete <2>: Table of Symbols. (line 381) -* yypstate_expected_tokens: Push Parser Interface. - (line 53) -* yypstate_new: Push Parser Interface. - (line 6) -* yypstate_new <1>: Push Parser Interface. - (line 10) -* yypstate_new <2>: Table of Symbols. (line 387) -* yypull_parse: Push Parser Interface. - (line 48) -* yypull_parse <1>: Table of Symbols. (line 392) -* yypush_parse: Push Parser Interface. - (line 23) -* yypush_parse <1>: Push Parser Interface. - (line 27) -* yypush_parse <2>: Table of Symbols. (line 397) -* YYRECOVERING: Error Recovery. (line 110) -* YYRECOVERING <1>: Action Features. (line 65) -* YYRECOVERING <2>: Action Features. (line 66) -* YYRECOVERING <3>: Table of Symbols. (line 402) -* yyreport_syntax_error: Syntax Error Reporting Function. - (line 15) -* YYSTACK_USE_ALLOCA: Table of Symbols. (line 407) -* YYSTYPE: Table of Symbols. (line 424) -* yysymbol_kind_t: Syntax Error Reporting Function. - (line 24) -* yysymbol_kind_t <1>: Table of Symbols. (line 429) -* yysymbol_name: Syntax Error Reporting Function. - (line 72) -* yytoken_kind_t: Table of Symbols. (line 437) -* YYUNDEF: Table of Symbols. (line 443) -* zoo: Bison. (line 6) - - -Tag Table: -Node: Top1040 -Node: Introduction18154 -Node: Conditions19727 -Node: Copying21636 -Node: Concepts59175 -Node: Language and Grammar60367 -Node: Grammar in Bison66345 -Node: Semantic Values68261 -Node: Semantic Actions70389 -Node: GLR Parsers71566 -Node: Simple GLR Parsers74339 -Node: Merging GLR Parses80810 -Ref: Merging GLR Parses-Footnote-186139 -Node: GLR Semantic Actions86280 -Node: Semantic Predicates88877 -Node: Locations91330 -Node: Bison Parser92808 -Node: Stages96001 -Node: Grammar Layout97226 -Node: Examples98588 -Node: RPN Calc100075 -Ref: RPN Calc-Footnote-1101127 -Node: Rpcalc Declarations101205 -Node: Rpcalc Rules103257 -Node: Rpcalc Input105148 -Node: Rpcalc Line106714 -Node: Rpcalc Exp107862 -Node: Rpcalc Lexer109871 -Node: Rpcalc Main112564 -Node: Rpcalc Error112975 -Node: Rpcalc Generate114003 -Node: Rpcalc Compile115247 -Node: Infix Calc116211 -Ref: Infix Calc-Footnote-1119054 -Node: Simple Error Recovery119207 -Node: Location Tracking Calc121142 -Node: Ltcalc Declarations121842 -Node: Ltcalc Rules122937 -Node: Ltcalc Lexer124777 -Node: Multi-function Calc127114 -Ref: Multi-function Calc-Footnote-1128891 -Node: Mfcalc Declarations128969 -Node: Mfcalc Rules130993 -Node: Mfcalc Symbol Table132271 -Node: Mfcalc Lexer135731 -Node: Mfcalc Main138304 -Node: Exercises139180 -Node: Grammar File139707 -Node: Grammar Outline140557 -Node: Prologue141415 -Node: Prologue Alternatives143214 -Ref: Prologue Alternatives-Footnote-1152898 -Node: Bison Declarations153003 -Node: Grammar Rules153413 -Node: Epilogue153869 -Node: Symbols154920 -Node: Rules161943 -Node: Rules Syntax162258 -Node: Empty Rules164323 -Node: Recursion165410 -Node: Semantics167068 -Node: Value Type168374 -Node: Multiple Types169658 -Node: Type Generation171108 -Node: Union Decl173038 -Node: Structured Value Type174431 -Node: Actions175457 -Node: Action Types179329 -Node: Midrule Actions180682 -Node: Using Midrule Actions181336 -Node: Typed Midrule Actions184873 -Node: Midrule Action Translation186421 -Node: Midrule Conflicts188913 -Node: Tracking Locations191522 -Node: Location Type192254 -Node: Actions and Locations193777 -Node: Printing Locations196159 -Node: Location Default Action196919 -Node: Named References200452 -Node: Declarations203048 -Node: Require Decl204735 -Node: Token Decl205249 -Node: Precedence Decl208162 -Node: Type Decl210438 -Node: Symbol Decls211389 -Node: Initial Action Decl212344 -Node: Destructor Decl213157 -Node: Printer Decl218801 -Node: Expect Decl221070 -Node: Start Decl225067 -Node: Pure Decl225461 -Node: Push Decl227209 -Node: Decl Summary231642 -Ref: %header234319 -Node: %define Summary242648 -Ref: api-filename-type244557 -Ref: api-token-prefix255495 -Node: %code Summary267498 -Node: Multiple Parsers271750 -Node: Interface275605 -Node: Parser Function276712 -Node: Push Parser Interface279203 -Ref: yypstate_new279588 -Ref: yypstate_delete280029 -Ref: yypush_parse280443 -Ref: yypull_parse281455 -Node: Lexical282458 -Node: Calling Convention284033 -Node: Special Tokens285562 -Node: Tokens from Literals286997 -Node: Token Values288078 -Node: Token Locations289242 -Node: Pure Calling290174 -Node: Error Reporting292771 -Node: Error Reporting Function293294 -Node: Syntax Error Reporting Function296664 -Node: Action Features301265 -Node: Internationalization305605 -Node: Enabling I18n306457 -Node: Token I18n308567 -Node: Algorithm309958 -Node: Lookahead312372 -Node: Shift/Reduce314582 -Node: Precedence318637 -Node: Why Precedence319409 -Node: Using Precedence321324 -Node: Precedence Only322820 -Node: Precedence Examples324622 -Node: How Precedence325146 -Node: Non Operators326287 -Node: Contextual Precedence327848 -Node: Parser States329590 -Node: Reduce/Reduce330839 -Node: Mysterious Conflicts335642 -Node: Tuning LR339264 -Node: LR Table Construction340464 -Node: Default Reductions346076 -Node: LAC350863 -Node: Unreachable States356363 -Node: Generalized LR Parsing358366 -Node: Memory Management362744 -Node: Error Recovery365128 -Node: Context Dependency370404 -Node: Semantic Tokens371257 -Node: Lexical Tie-ins374325 -Node: Tie-in Recovery375781 -Node: Debugging377911 -Node: Counterexamples379374 -Node: Understanding385376 -Ref: state-8392088 -Node: Graphviz397639 -Node: Xml401956 -Node: Tracing403694 -Node: Enabling Traces404067 -Node: Mfcalc Traces408193 -Node: Invocation413400 -Node: Bison Options415502 -Node: Operation Modes416349 -Node: Diagnostics422058 -Ref: Wconflicts-sr422366 -Ref: Wconflicts-rr422390 -Ref: Wcounterexamples422778 -Ref: Wdangling-alias423091 -Ref: Wdeprecated424416 -Ref: Wempty-rule424540 -Ref: Wmidrule-values424752 -Ref: Wprecedence425463 -Ref: Wyacc426597 -Ref: Wother426659 -Ref: Wall426963 -Ref: Wnone427083 -Ref: Werror427359 -Node: Tuning the Parser429164 -Ref: option-yacc432576 -Ref: Tuning the Parser-Footnote-1434425 -Node: Output Files434491 -Node: Option Cross Key437760 -Node: Yacc Library439898 -Node: Other Languages440965 -Node: C++ Parsers441588 -Node: A Simple C++ Example442317 -Ref: A Simple C++ Example-Footnote-1446467 -Node: C++ Bison Interface446550 -Node: C++ Parser Interface448140 -Node: C++ Semantic Values452723 -Node: C++ Unions453273 -Node: C++ Variants454066 -Node: C++ Location Values457827 -Node: C++ position458766 -Node: C++ location461135 -Node: Exposing the Location Classes463325 -Node: User Defined Location Type465218 -Node: C++ Parser Context466797 -Node: C++ Scanner Interface470760 -Node: Split Symbols471328 -Node: Complete Symbols473063 -Node: A Complete C++ Example477814 -Node: Calc++ --- C++ Calculator478757 -Node: Calc++ Parsing Driver479278 -Node: Calc++ Parser482241 -Node: Calc++ Scanner486356 -Node: Calc++ Top Level490299 -Node: D Parsers490998 -Node: D Bison Interface491702 -Node: D Semantic Values492815 -Node: D Location Values493646 -Node: D Parser Interface494517 -Node: D Parser Context Interface499433 -Node: D Scanner Interface500876 -Node: D Action Features503816 -Node: D Push Parser Interface504585 -Node: D Complete Symbols506059 -Node: Java Parsers506775 -Node: Java Bison Interface507591 -Node: Java Semantic Values509774 -Node: Java Location Values511469 -Node: Java Parser Interface513067 -Node: Java Parser Context Interface518322 -Node: Java Scanner Interface520407 -Node: Java Action Features524479 -Node: Java Push Parser Interface527230 -Node: Java Differences530223 -Ref: Java Differences-Footnote-1532886 -Node: Java Declarations Summary533040 -Node: History538073 -Node: Yacc538529 -Ref: Yacc-Footnote-1540022 -Ref: Yacc-Footnote-2540245 -Node: yacchack540315 -Node: Byacc540738 -Node: Bison541551 -Node: Other Ungulates543559 -Node: Versioning544139 -Node: FAQ547562 -Node: Memory Exhausted548589 -Node: How Can I Reset the Parser548891 -Node: Strings are Destroyed551483 -Node: Implementing Gotos/Loops553184 -Node: Multiple start-symbols554475 -Node: Secure? Conform?556055 -Node: Enabling Relocatability556505 -Node: I can't build Bison559423 -Node: Where can I find help?560683 -Node: Bug Reports561476 -Node: More Languages562951 -Node: Beta Testing563286 -Node: Mailing Lists564160 -Node: Table of Symbols564372 -Node: Glossary581398 -Node: GNU Free Documentation License591676 -Node: Bibliography616829 -Ref: Corbett 1984616969 -Ref: Denny 2008617320 -Ref: Denny 2010 May617642 -Ref: Denny 2010 November617917 -Ref: DeRemer 1982618253 -Ref: Isradisaikul 2015618523 -Ref: Johnson 1978618848 -Ref: Knuth 1965619113 -Ref: Scott 2000619347 -Node: Index of Terms619664 - -End Tag Table - - -Local Variables: -coding: utf-8 -End: diff --git a/local/recipes/wayland/libwayland/recipe.toml.bak b/local/recipes/wayland/libwayland/recipe.toml.bak deleted file mode 100644 index 9213fdef1c..0000000000 --- a/local/recipes/wayland/libwayland/recipe.toml.bak +++ /dev/null @@ -1,22 +0,0 @@ -#TODO: Requires Redox compatibility patching for missing Linux header paths and -# some POSIX/Linux-only flags during cross-builds. -# redox.patch restores the Redox compatibility stubs plus Meson scanner detection. -[source] -tar = "https://gitlab.freedesktop.org/wayland/wayland/-/releases/1.24.0/downloads/wayland-1.24.0.tar.xz" -patches = ["redox.patch"] - -[build] -template = "custom" -dependencies = [ - "relibc", - "libffi", - "expat", - "libxml2", -] -script = """ -DYNAMIC_INIT -cookbook_meson -Ddocumentation=false -Dtests=false -Ddtd_validation=false -Dc_args=-Wno-error -""" - -[package] -dependencies = ["libffi"] diff --git a/recipes/libs/ncurses/recipe.toml.bak b/recipes/libs/ncurses/recipe.toml.bak deleted file mode 100644 index c4135c6092..0000000000 --- a/recipes/libs/ncurses/recipe.toml.bak +++ /dev/null @@ -1,34 +0,0 @@ -[source] -tar = "https://ftp.gnu.org/gnu/ncurses/ncurses-6.6.tar.gz" -blake3 = "fbec55697a01f99b9cc3f25be55e73ae7091f4c53e5d81a1ea15734c4e5b7238" -patches = [ - "redox.patch" -] - -[build] -template = "custom" -script = """ -DYNAMIC_INIT -COOKBOOK_CONFIGURE_FLAGS+=( - --disable-db-install - --disable-stripping - --disable-widec - --enable-pc-files - --without-ada - --without-manpages - --without-tests - --with-terminfo-dirs=/usr/share/terminfo - --with-pkg-config-libdir=/usr/lib/pkgconfig - cf_cv_func_mkstemp=yes -) -if [ "${COOKBOOK_DYNAMIC}" == "1" ] -then - COOKBOOK_CONFIGURE_FLAGS+=(--with-shared) -fi -cookbook_configure -""" - -[package] -dependencies = [ - "terminfo", -]