fix: noconfirm auto-selects first AUR match

This commit is contained in:
2026-05-08 11:01:02 +01:00
parent d39cdc3fd9
commit 153cca6132
8056 changed files with 1983098 additions and 779 deletions
@@ -0,0 +1,772 @@
/* IELR's inadequacy annotation list.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "AnnotationList.h"
#include "system.h"
#include "ielr.h"
#include "lalr.h"
/**
* \pre
* - <tt>annotations_obstackp != NULL</tt>.
* \post
* - \c result is a new \c AnnotationList with one node whose:
* - \c inadequacyNode member is \c NULL.
* - \c contributions member is allocated with \c contribution_count
* uninitialized elements.
* - All memory was allocated on \c annotations_obstackp.
*/
static AnnotationList*
AnnotationList__alloc_on_obstack (ContributionIndex contribution_count,
struct obstack *annotations_obstackp)
{
AnnotationList *res;
size_t contributions_size = contribution_count * sizeof res->contributions[0];
res = obstack_alloc (annotations_obstackp,
offsetof (AnnotationList, contributions)
+ contributions_size);
res->next = NULL;
res->inadequacyNode = NULL;
return res;
}
/**
* \pre
* - <tt>self != NULL</tt>.
* - <tt>0 <= ci < self->inadequacyNode->contributionCount</tt>.
* \post
* - \c result = true iff contribution \c ci in \c self represents an
* "always" contribution.
*/
static bool
AnnotationList__isContributionAlways (AnnotationList const *self,
ContributionIndex ci)
{
aver (0 <= ci && ci < self->inadequacyNode->contributionCount);
return self->contributions[ci] == NULL;
}
/**
* \pre
* - \c self is a single node.
* - \c self annotates the same state as every other node in \c list, and
* that state has \c nitems kernel items.
* \post
* - If the list \c list already contains an identical annotation to \c self,
* \c self was discarded, \c result is false, and the caller is responsible
* for the memory of \c self.
* - Otherwise, \c list now contains the node \c self, \c result is true, and
* \c list assumes responsibility for the memory of \c self.
* - The sort in \c list is:
* - Sort in reverse order on the unique ID of the associated
* inadequacy node. Because these IDs are assigned in ascending
* order, this should mean that the insertion position within an
* annotation list is usually near the beginning with other
* annotations associated with the same inadequacy.
* - Next, sort on the first contribution that is different as follows:
* - Sort an always-contribution before a never-contribution before a
* potential-contribution.
* - Two always-contributions are identical.
* - Two never-contributions are identical.
* - For two potential-contributions, sort on the contributions' kernel
* item bitsets interpreted as binary numbers.
* - The sorting has a few effects:
* - It accelerates elimination of identical annotations during insertion.
* - It determines how the output of \c AnnotationList__debug is sorted.
* - Other than that, it's probably not important.
*/
static bool
AnnotationList__insertInto (AnnotationList *self, AnnotationList **list,
size_t nitems)
{
AnnotationList **node;
for (node = list; *node; node = &(*node)->next)
{
int cmp = 0;
if (self->inadequacyNode->id < (*node)->inadequacyNode->id)
cmp = 1;
else if ((*node)->inadequacyNode->id < self->inadequacyNode->id)
cmp = -1;
else
for (ContributionIndex ci = 0;
cmp == 0 && ci < self->inadequacyNode->contributionCount;
++ci)
{
if (AnnotationList__isContributionAlways (self, ci))
{
if (!AnnotationList__isContributionAlways (*node, ci))
cmp = -1;
}
else if (AnnotationList__isContributionAlways (*node, ci))
cmp = 1;
else
for (size_t item = 0; cmp == 0 && item < nitems; ++item)
{
if (!Sbitset__test (self->contributions[ci], item))
{
if (Sbitset__test ((*node)->contributions[ci], item))
cmp = -1;
}
else if (!Sbitset__test ((*node)->contributions[ci], item))
cmp = 1;
}
}
if (cmp < 0)
{
self->next = *node;
*node = self;
break;
}
else if (cmp == 0)
{
self = NULL;
break;
}
}
if (!*node)
*node = self;
return self != NULL;
}
static bitset
AnnotationList__compute_shift_tokens (transitions *trans)
{
bitset shift_tokens = bitset_create (ntokens, BITSET_FIXED);
int i;
FOR_EACH_SHIFT (trans, i)
bitset_set (shift_tokens, TRANSITION_SYMBOL (trans, i));
return shift_tokens;
}
static bitset
AnnotationList__compute_conflicted_tokens (bitset shift_tokens,
reductions *reds)
{
bitset conflicted_tokens = bitset_create (ntokens, BITSET_FIXED);
bitset conflicted_tokens_rule = bitset_create (ntokens, BITSET_FIXED);
bitset tokens = bitset_create (ntokens, BITSET_FIXED);
bitset_copy (tokens, shift_tokens);
for (int i = 0; i < reds->num; ++i)
{
bitset_and (conflicted_tokens_rule, tokens, reds->lookaheads[i]);
bitset_or (conflicted_tokens,
conflicted_tokens, conflicted_tokens_rule);
bitset_or (tokens, tokens, reds->lookaheads[i]);
/* Check that rules are sorted on rule number or the next step in
AnnotationList__compute_from_inadequacies will misbehave. */
aver (i == 0 || reds->rules[i-1] < reds->rules[i]);
}
bitset_free (tokens);
bitset_free (conflicted_tokens_rule);
return conflicted_tokens;
}
static bool
AnnotationList__compute_lhs_contributions (state *s, const rule *the_rule,
symbol_number conflicted_token,
bitsetv follow_kernel_items,
bitsetv always_follows,
state ***predecessors,
bitset **item_lookahead_sets,
Sbitset *items,
struct obstack
*annotations_obstackp)
{
goto_number lhs_goto = map_goto (s->number, the_rule->lhs->number);
if (bitset_test (always_follows[lhs_goto], conflicted_token))
return true;
*items = Sbitset__new_on_obstack (s->nitems, annotations_obstackp);
{
bitset_iterator biter_item;
bitset_bindex item;
BITSET_FOR_EACH (biter_item, follow_kernel_items[lhs_goto], item, 0)
if (ielr_item_has_lookahead (s, 0, item, conflicted_token,
predecessors, item_lookahead_sets))
Sbitset__set (*items, item);
}
return false;
}
static void
AnnotationList__computePredecessorAnnotations (
AnnotationList *self, state *s,
bitsetv follow_kernel_items,
bitsetv always_follows,
state ***predecessors,
bitset **item_lookahead_sets,
AnnotationList **annotation_lists,
AnnotationIndex *annotation_counts,
struct obstack *annotations_obstackp)
{
for (state **predecessor = predecessors[s->number]; *predecessor; ++predecessor)
{
AnnotationList *annotation_node =
AnnotationList__alloc_on_obstack (
self->inadequacyNode->contributionCount, annotations_obstackp);
annotation_node->inadequacyNode = self->inadequacyNode;
bool potential_contribution = false;
bitset *lookaheads = NULL;
for (ContributionIndex ci = 0; ci < self->inadequacyNode->contributionCount; ++ci)
{
symbol_number contribution_token =
InadequacyList__getContributionToken (self->inadequacyNode, ci)
->content->number;
if (AnnotationList__isContributionAlways (self, ci))
{
annotation_node->contributions[ci] = NULL;
continue;
}
annotation_node->contributions[ci] =
Sbitset__new_on_obstack ((*predecessor)->nitems,
annotations_obstackp);
{
size_t predecessor_item = 0;
Sbitset sbiter_item;
Sbitset__Index self_item;
SBITSET__FOR_EACH (self->contributions[ci], s->nitems,
sbiter_item, self_item)
{
/* If this kernel item is the beginning of a RHS, it must be
the kernel item in the start state, and so it has an empty
lookahead set. Thus, it can't contribute to inadequacies,
and so it should never have been identified as a
contribution. If, instead, this kernel item is the
successor of the start state's kernel item, the lookahead
set is still empty, and so it also should never have been
identified as a contribution. This situation is fortunate
because we want to avoid the - 2 below in both cases. */
aver (s->items[self_item] > 1);
/* If this kernel item is next to the beginning of the RHS,
then check all of the predecessor's goto follows for the
LHS. */
if (item_number_is_rule_number (ritem[s->items[self_item] - 2]))
{
Sbitset items;
if (AnnotationList__compute_lhs_contributions (
*predecessor,
item_rule (&ritem[s->items[self_item]]),
contribution_token,
follow_kernel_items, always_follows, predecessors,
item_lookahead_sets, &items, annotations_obstackp))
{
obstack_free (annotations_obstackp,
annotation_node->contributions[ci]);
annotation_node->contributions[ci] = NULL;
// "Break" out of SBITSET__FOR_EACH.
goto after_sbitset__for_each;
}
else
{
Sbitset__or (annotation_node->contributions[ci],
annotation_node->contributions[ci],
items, (*predecessor)->nitems);
obstack_free (annotations_obstackp, items);
}
}
/* If this kernel item is later in the RHS, then check the
predecessor item's lookahead set. */
else
{
/* We don't have to start the predecessor item search at
the beginning every time because items from both
states are sorted by their indices in ritem. */
for (;
predecessor_item < (*predecessor)->nitems;
++predecessor_item)
if ((*predecessor)->items[predecessor_item]
== s->items[self_item] - 1)
break;
aver (predecessor_item != (*predecessor)->nitems);
if (ielr_item_has_lookahead (*predecessor, 0,
predecessor_item,
contribution_token,
predecessors,
item_lookahead_sets))
Sbitset__set (annotation_node->contributions[ci],
predecessor_item);
}
}
after_sbitset__for_each:;
}
if (annotation_node->contributions[ci])
{
Sbitset biter;
Sbitset__Index i;
SBITSET__FOR_EACH (annotation_node->contributions[ci],
(*predecessor)->nitems, biter, i)
{
potential_contribution = true;
if (!lookaheads)
{
lookaheads = xnmalloc ((*predecessor)->nitems,
sizeof *lookaheads);
for (size_t j = 0; j < (*predecessor)->nitems; ++j)
lookaheads[j] = NULL;
}
if (!lookaheads[i])
lookaheads[i] = bitset_create (ntokens, BITSET_FIXED);
bitset_set (lookaheads[i], contribution_token);
}
}
}
/* If the predecessor has any contributions besides just "always" and
"never" contributions:
- If the dominant contribution is split-stable, the annotation could
not affect merging on this predecessor state or its eventual
predecessor states. Moreover, all contributions that affect
whether the dominant contribution remains dominant must be "always"
or "never" contributions in order for the dominant contribution to
be split-stable. Thus, the dominant contribution computation result
in eventual successor states will not be affected by lookaheads
tracked for this predecessor state. (Also, as in the isocore
compatibility test, we depend on the fact that isocores with equal
dominant contributions will have the same dominant contribution when
merged. Otherwise, we might have to worry that the presence of a
potential contribution might somehow be the culprit of that behavior
and thus need to be tracked regardless of the split stability of the
dominant contribution.) Thus, go ahead and discard the annotation
to save space now plus time during state splitting.
- Otherwise, record the annotation, and compute any resulting
annotations needed on predecessor states. */
if (potential_contribution)
{
if (ContributionIndex__none
!= AnnotationList__computeDominantContribution (
annotation_node, (*predecessor)->nitems, lookaheads, true))
{
obstack_free (annotations_obstackp, annotation_node);
annotation_node = NULL;
}
{
for (size_t i = 0; i < (*predecessor)->nitems; ++i)
if (lookaheads[i])
bitset_free (lookaheads[i]);
free (lookaheads);
}
if (annotation_node)
{
if (AnnotationList__insertInto (annotation_node,
&annotation_lists[(*predecessor)
->number],
(*predecessor)->nitems))
{
++annotation_counts[(*predecessor)->number];
AnnotationList__computePredecessorAnnotations (
annotation_node, *predecessor,
follow_kernel_items, always_follows, predecessors,
item_lookahead_sets, annotation_lists, annotation_counts,
annotations_obstackp);
}
else
obstack_free (annotations_obstackp, annotation_node);
}
}
else
obstack_free (annotations_obstackp, annotation_node);
}
}
void
AnnotationList__compute_from_inadequacies (
state *s, bitsetv follow_kernel_items, bitsetv always_follows,
state ***predecessors, bitset **item_lookahead_sets,
InadequacyList **inadequacy_lists, AnnotationList **annotation_lists,
AnnotationIndex *annotation_counts,
ContributionIndex *max_contributionsp,
struct obstack *annotations_obstackp,
InadequacyListNodeCount *inadequacy_list_node_count)
{
/* Return an empty list if s->lookaheads = NULL. */
if (s->consistent)
return;
bitsetv all_lookaheads = bitsetv_create (s->nitems, ntokens, BITSET_FIXED);
bitsetv_ones (all_lookaheads);
bitset shift_tokens = AnnotationList__compute_shift_tokens (s->transitions);
bitset conflicted_tokens =
AnnotationList__compute_conflicted_tokens (shift_tokens, s->reductions);
/* Add an inadequacy annotation for each conflicted_token. */
bitset_iterator biter_conflict;
bitset_bindex conflicted_token;
BITSET_FOR_EACH (biter_conflict, conflicted_tokens, conflicted_token, 0)
{
AnnotationList *annotation_node;
ContributionIndex contribution_count = 0;
/* Allocate the annotation node. */
{
for (int rule_i = 0; rule_i < s->reductions->num; ++rule_i)
if (bitset_test (s->reductions->lookaheads[rule_i],
conflicted_token))
++contribution_count;
if (bitset_test (shift_tokens, conflicted_token))
++contribution_count;
annotation_node =
AnnotationList__alloc_on_obstack (contribution_count,
annotations_obstackp);
}
/* FIXME: Would a BITSET_FRUGAL or BITEST_SPARSE be more efficient? Now
or convert it inside InadequacyList__new_conflict? */
bitset actions = bitset_create (s->reductions->num + 1, BITSET_FIXED);
bool potential_contribution = false;
/* Add a contribution for each reduction that has conflicted_token as a
lookahead. */
{
ContributionIndex ci = 0;
int item_i = 0;
for (int rule_i = 0; rule_i < s->reductions->num; ++rule_i)
{
rule *the_rule = s->reductions->rules[rule_i];
if (bitset_test (s->reductions->lookaheads[rule_i],
conflicted_token))
{
bitset_set (actions, rule_i);
/* If this reduction is on a kernel item, just add it. */
if (!item_number_is_rule_number (the_rule->rhs[0]))
{
annotation_node->contributions[ci] =
Sbitset__new_on_obstack (s->nitems,
annotations_obstackp);
/* Catch item_i up to rule_i. This works because both are
sorted on rule number. */
while (!item_number_is_rule_number (ritem[s->items[item_i]])
|| item_number_as_rule_number (ritem[s->items[item_i]]) != the_rule->number)
{
++item_i;
aver (item_i < s->nitems);
}
Sbitset__set (annotation_node->contributions[ci], item_i);
}
/* Otherwise, add the kernel items whose lookahead sets
contribute the conflicted token to this reduction's
lookahead set. */
else if (AnnotationList__compute_lhs_contributions (
s, the_rule, conflicted_token, follow_kernel_items,
always_follows, predecessors, item_lookahead_sets,
&annotation_node->contributions[ci],
annotations_obstackp))
{
annotation_node->contributions[ci++] = NULL;
continue;
}
/* The lookahead token has to come from somewhere. */
aver (!Sbitset__isEmpty (annotation_node->contributions[ci],
s->nitems));
++ci;
potential_contribution = true;
}
}
}
/* If there are any contributions besides just "always" contributions:
- If there's also a shift contribution, record it.
- If the dominant contribution is split-stable, then the annotation
could not affect merging, so go ahead and discard the annotation and
the inadequacy to save space now plus time during state splitting.
- Otherwise, record the annotation and the inadequacy, and compute any
resulting annotations needed on predecessor states. */
if (potential_contribution)
{
if (bitset_test (shift_tokens, conflicted_token))
{
bitset_set (actions, s->reductions->num);
annotation_node->contributions[contribution_count - 1] = NULL;
}
{
InadequacyList *conflict_node =
InadequacyList__new_conflict (
s, symbols[conflicted_token], actions,
inadequacy_list_node_count);
actions = NULL;
annotation_node->inadequacyNode = conflict_node;
if (ContributionIndex__none
!= AnnotationList__computeDominantContribution (
annotation_node, s->nitems, all_lookaheads, true))
{
obstack_free (annotations_obstackp, annotation_node);
InadequacyList__delete (conflict_node);
}
else
{
InadequacyList__prependTo (conflict_node,
&inadequacy_lists[s->number]);
{
bool b =
AnnotationList__insertInto (annotation_node,
&annotation_lists[s->number],
s->nitems);
aver (b); (void) b;
}
/* This aver makes sure the
AnnotationList__computeDominantContribution check above
does discard annotations in the simplest case of a S/R
conflict with no token precedence. */
aver (!bitset_test (shift_tokens, conflicted_token)
|| symbols[conflicted_token]->content->prec);
++annotation_counts[s->number];
if (contribution_count > *max_contributionsp)
*max_contributionsp = contribution_count;
AnnotationList__computePredecessorAnnotations (
annotation_node, s,
follow_kernel_items, always_follows, predecessors,
item_lookahead_sets, annotation_lists, annotation_counts,
annotations_obstackp);
}
}
}
else
{
bitset_free (actions);
obstack_free (annotations_obstackp, annotation_node);
}
}
bitsetv_free (all_lookaheads);
bitset_free (shift_tokens);
bitset_free (conflicted_tokens);
}
void
AnnotationList__debug (AnnotationList const *self, size_t nitems, int spaces)
{
AnnotationList const *a;
AnnotationIndex ai;
for (a = self, ai = 0; a; a = a->next, ++ai)
{
fprintf (stderr, "%*sAnnotation %d (manifesting state %d):\n",
spaces, "",
ai, a->inadequacyNode->manifestingState->number);
bitset_bindex rulei
= bitset_first (a->inadequacyNode->inadequacy.conflict.actions);
for (ContributionIndex ci = 0; ci < a->inadequacyNode->contributionCount; ++ci)
{
symbol_number token =
InadequacyList__getContributionToken (a->inadequacyNode, ci)
->content->number;
fprintf (stderr, "%*s", spaces+2, "");
if (ci == InadequacyList__getShiftContributionIndex (
a->inadequacyNode))
fprintf (stderr, "Contributes shift of token %d.\n", token);
else
{
fprintf (stderr, "Contributes token %d", token);
aver (rulei != BITSET_BINDEX_MAX);
fprintf (stderr, " as lookahead, rule number %d",
a->inadequacyNode->manifestingState
->reductions->rules[rulei]->number);
rulei =
bitset_next (a->inadequacyNode->inadequacy.conflict.actions,
rulei+1);
if (AnnotationList__isContributionAlways (a, ci))
fprintf (stderr, " always.");
else
{
fprintf (stderr, ", items: ");
Sbitset__fprint (a->contributions[ci], nitems, stderr);
}
fprintf (stderr, "\n");
}
}
}
}
void
AnnotationList__computeLookaheadFilter (AnnotationList const *self,
size_t nitems,
bitsetv lookahead_filter)
{
bitsetv_zero (lookahead_filter);
for (; self; self = self->next)
for (ContributionIndex ci = 0; ci < self->inadequacyNode->contributionCount; ++ci)
if (!AnnotationList__isContributionAlways (self, ci))
{
symbol_number token =
InadequacyList__getContributionToken (self->inadequacyNode, ci)
->content->number;
Sbitset__Index item;
Sbitset biter;
SBITSET__FOR_EACH (self->contributions[ci], nitems, biter, item)
bitset_set (lookahead_filter[item], token);
}
}
/**
* \pre
* - <tt>self != NULL</tt>.
* - \c nitems is the number of kernel items in the LR(0) state that \c self
* annotates.
* - \c lookaheads describes the lookahead sets on the kernel items of some
* isocore of the LR(0) state that \c self annotates. Either:
* - <tt>lookaheads = NULL</tt> only if the lookahead set on every kernel
* item is empty.
* - For any <tt>0 <= i < nitems</tt>, <tt>lookaheads[i]</tt> is either:
* - \c NULL only if the lookahead set on kernel item \c i is empty.
* - The (possibly empty) lookahead set on kernel item \c i.
* - <tt>0 <= ci < self->inadequacyNode->contributionCount</tt>.
* \post
* - \c result = true iff contribution \c ci in \c self is made by the state
* described by \c lookaheads.
*/
static bool
AnnotationList__stateMakesContribution (AnnotationList const *self,
size_t nitems, ContributionIndex ci,
bitset *lookaheads)
{
if (AnnotationList__isContributionAlways (self, ci))
return true;
if (!lookaheads)
return false;
{
symbol_number token =
InadequacyList__getContributionToken (self->inadequacyNode, ci)
->content->number;
Sbitset__Index item;
Sbitset biter;
SBITSET__FOR_EACH (self->contributions[ci], nitems, biter, item)
if (lookaheads[item] && bitset_test (lookaheads[item], token))
return true;
}
return false;
}
ContributionIndex
AnnotationList__computeDominantContribution (AnnotationList const *self,
size_t nitems, bitset *lookaheads,
bool require_split_stable)
{
ContributionIndex const ci_shift =
InadequacyList__getShiftContributionIndex (self->inadequacyNode);
symbol *token = self->inadequacyNode->inadequacy.conflict.token;
/* S/R conflict. */
if (ci_shift != ContributionIndex__none)
{
bool find_stable_domination_over_shift = false;
bool find_stable_error_action_domination = false;
{
int shift_precedence = token->content->prec;
/* If the token has no precedence set, shift is always chosen. */
if (!shift_precedence)
return ci_shift;
/* Figure out which reductions contribute, which of those would
dominate in a R/R comparison, and whether any reduction dominates
the shift so that the R/R comparison is actually needed. */
ContributionIndex ci_rr_dominator = ContributionIndex__none;
int actioni;
ContributionIndex ci;
for (ci = 0,
actioni = bitset_first (self->inadequacyNode->inadequacy
.conflict.actions);
ci < self->inadequacyNode->contributionCount;
++ci,
actioni = bitset_next (self->inadequacyNode->inadequacy
.conflict.actions, actioni+1))
{
int reduce_precedence = 0;
if (ci == ci_shift)
continue;
{
rule *r = self->inadequacyNode->manifestingState
->reductions->rules[actioni];
if (r->prec)
reduce_precedence = r->prec->prec;
}
/* If there's no need to check whether this reduction actually
contributes because the shift eliminates it from the R/R
comparison anyway, continue to the next reduction. */
if (reduce_precedence
&& (reduce_precedence < shift_precedence
|| (reduce_precedence == shift_precedence
&& token->content->assoc == right_assoc)))
continue;
if (!AnnotationList__stateMakesContribution (self, nitems, ci,
lookaheads))
continue;
/* This uneliminated reduction contributes, so see if it can cause
an error action. */
if (reduce_precedence == shift_precedence
&& token->content->assoc == non_assoc)
{
/* It's not possible to find split-stable domination over
shift after a potential %nonassoc. */
if (find_stable_domination_over_shift)
return ContributionIndex__none;
if (!require_split_stable
|| AnnotationList__isContributionAlways (self, ci))
return ContributionIndex__error_action;
find_stable_error_action_domination = true;
}
/* Consider this uneliminated contributing reduction in the R/R
comparison. */
if (ci_rr_dominator == ContributionIndex__none)
ci_rr_dominator = ci;
/* If precedence is set for this uneliminated contributing
reduction, it dominates the shift, so try to figure out which
reduction dominates the R/R comparison. */
if (reduce_precedence)
{
/* It's not possible to find split-stable error action
domination after a potential reduction. */
if (find_stable_error_action_domination)
return ContributionIndex__none;
if (!require_split_stable)
return ci_rr_dominator;
if (!AnnotationList__isContributionAlways (self,
ci_rr_dominator))
return ContributionIndex__none;
if (AnnotationList__isContributionAlways (self, ci))
return ci_rr_dominator;
find_stable_domination_over_shift = true;
}
}
}
if (find_stable_domination_over_shift
|| find_stable_error_action_domination)
return ContributionIndex__none;
/* No reduce or error action domination found, so shift dominates. */
return ci_shift;
}
/* R/R conflict, so the reduction with the lowest rule number dominates.
Fortunately, contributions are sorted by rule number. */
for (ContributionIndex ci = 0; ci < self->inadequacyNode->contributionCount; ++ci)
if (AnnotationList__stateMakesContribution (self, nitems, ci, lookaheads))
{
if (require_split_stable
&& !AnnotationList__isContributionAlways (self, ci))
return ContributionIndex__none;
return ci;
}
return ContributionIndex__none;
}
@@ -0,0 +1,183 @@
/* IELR's inadequacy annotation list.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef ANNOTATION_LIST_H_
# define ANNOTATION_LIST_H_
# include <bitsetv.h>
# include "Sbitset.h"
# include "InadequacyList.h"
# include "state.h"
typedef int AnnotationIndex;
/**
* A node in a list of annotations on a particular LR(0) state. Each
* annotation records how isocores of that LR(0) state might contribute to an
* individual inadequacy, which might manifest in a different state. Don't
* break encapsulation by modifying the fields directly. Use the provided
* interface functions.
*/
typedef struct AnnotationList
{
/** The next node in the list or \c NULL if none. */
struct AnnotationList *next;
/** The \c InadequacyList node describing how this inadequacy manifests. */
InadequacyList *inadequacyNode;
/**
* List of how the "always", "never", and potential contributions of the
* inadequacy might be made by isocores of the annotated LR(0) state:
* - The number of rows is the number of contributions. That is,
* <tt>AnnotationList::inadequacyNode->contributionCount</tt>.
* - The token associated with contribution \c i is
* <tt>InadequacyList__getContributionToken (AnnotationList::inadequacyNode, i)</tt>.
* - Iff <tt>AnnotationList::contributions[i] = NULL</tt>, contribution
* \c i is an "always" contribution. That is, for every isocore of the
* annotated LR(0) state, its core or the core of one its eventual
* successors will definitely make this contribution to the inadequacy.
* It may contribute by either:
* - Creating a shift of contribution <tt>i</tt>'s token in the state
* that can manifest the inadequacy.
* - Propagating that token to the lookahead set of contribution
* <tt>i</tt>'s reduction in the state that can manifest the
* inadequacy.
* - Otherwise:
* - The number of columns in <tt>AnnotationList::contributions[i]</tt>
* is the number of kernel items in any isocore of the annotated LR(0)
* state.
* - Iff <tt>AnnotationList::contributions[i]</tt> is empty, contribution
* \c i is a "never" contribution. That is, no isocore of the
* annotated LR(0) state can make this contribution to the inadequacy.
* - Otherwise, for each bit \c j that is set in
* <tt>AnnotationList::contributions[i]</tt>, if the token associated
* with contribution \c i is present in the lookahead set of kernel
* item \c j of an isocore of the annotated LR(0) state, that isocore
* will make contribution \c i to the inadequacy by propagating the
* contribution's token to the lookahead set of the contribution's
* reduction in the state that can manifest the inadequacy.
*/
Sbitset contributions[1];
} AnnotationList;
/**
* \pre
* - <tt>s != NULL</tt>.
* - \c follow_kernel_items, \c always_follows, and \c predecessors were
* computed by \c ielr_compute_auxiliary_tables.
* - The size of each of \c annotation_lists and \c annotation_counts is
* \c ::nstates.
* - If no \c InadequacyList nodes are currently allocated for the
* parser tables to which \c s belongs, then it is best if
* <tt>*inadequacy_list_node_count</tt> is zero to avoid overflow.
* Otherwise, <tt>*inadequacy_list_node_count</tt> has not been
* modified by any function except
* \c AnnotationList__compute_from_inadequacies since the invocation
* of \c AnnotationList__compute_from_inadequacies that constructed
* the first of the \c InadequacyList nodes currently allocated for
* those parser tables.
* \post
* - <tt>inadequacy_lists[s->number]</tt> now describes all inadequacies that
* manifest in \c s.
* - For every state <tt>states[i]</tt>, <tt>annotation_lists[i]</tt> now
* contains all annotations associated with all inadequacies that manifest
* in \c s.
* - <tt>annotation_counts[i]</tt> was incremented by the number of new
* annotations added to <tt>states[i]</tt>.
* - <tt>*max_contributionsp</tt> is the higher of:
* - The maximum number of contributions computed per annotation.
* - <tt>*max_contributionsp \@pre</tt>.
* - All memory for all new annotations was allocated on
* \c annotations_obstackp.
*/
void
AnnotationList__compute_from_inadequacies (
state *s, bitsetv follow_kernel_items, bitsetv always_follows,
state ***predecessors, bitset **item_lookahead_sets,
InadequacyList **inadequacy_lists, AnnotationList **annotation_lists,
AnnotationIndex *annotation_counts,
ContributionIndex *max_contributionsp,
struct obstack *annotations_obstackp,
InadequacyListNodeCount *inadequacy_list_node_count);
/**
* \pre
* - <tt>self != NULL</tt>.
* - \c nitems is the number of kernel items in the LR(0) state that every
* node in the list \c self annotates.
* \post
* - A textual representation of all nodes in the list \c self was printed to
* stderr. \c spaces spaces were printed before each line of the text.
*/
void AnnotationList__debug (AnnotationList const *self, size_t nitems,
int spaces);
/**
* \pre
* - <tt>self != NULL</tt>.
* - \c nitems is the number of kernel items in the LR(0) state that \c self
* annotates.
* - The number of rows in \c lookahead_filter is at least \c nitems, and the
* number of columns is \c ::ntokens.
* \post
* - <tt>lookahead_filter[i][j]</tt> is set iff some annotation in the list
* \c self lists token \c j in kernel item \c i as a contributor.
*/
void AnnotationList__computeLookaheadFilter (AnnotationList const *self,
size_t nitems,
bitsetv lookahead_filter);
/**
* \pre
* - <tt>self != NULL</tt>.
* - \c nitems is the number of kernel items in the LR(0) state that \c self
* annotates.
* - \c lookaheads describes the lookahead sets on the kernel items of some
* isocore of the LR(0) state that \c self annotates. Either:
* - <tt>lookaheads = NULL</tt> only if the lookahead set on every kernel
* item is empty.
* - For any <tt>0 <= i < nitems</tt>, <tt>lookaheads[i]</tt> is either:
* - \c NULL only if the lookahead set on kernel item \c i is empty.
* - The (possibly empty) lookahead set on kernel item \c i.
* \post
* - If <tt>require_split_stable = false</tt>, \c result = either:
* - \c ContributionIndex__none iff the state described by \c lookaheads
* makes none of the contributions in \c self.
* - The index of the dominating contribution in \c self that is made by
* that state.
* - \c ContributionIndex__error_action to indicate that the inadequacy
* manifests as a conflict and that a syntax error action (because of a
* %nonassoc) dominates instead.
* - Otherwise, \c result is the same as if <tt>require_split_stable =
* false</tt> except that it is also \c ContributionIndex__none if there
* are contributions made by the state but the dominating contribution is
* not split-stable. By split-stable, we mean that the dominating
* contribution cannot change due to loss of one or more potential
* contributions due to loss of lookaheads due to splitting of the state.
* - After determining which contributions are actually made by the state,
* the algorithm for determining which contribution dominates in the
* conflict is intended to choose exactly the same action as conflicts.c
* would choose... no matter how crazy conflicts.c's choice is.
*/
ContributionIndex
AnnotationList__computeDominantContribution (AnnotationList const *self,
size_t nitems, bitset *lookaheads,
bool require_split_stable);
#endif /* !ANNOTATION_LIST_H_ */
@@ -0,0 +1,84 @@
/* IELR's inadequacy list.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "InadequacyList.h"
#include <intprops.h>
ContributionIndex const ContributionIndex__none = -1;
ContributionIndex const ContributionIndex__error_action = -2;
InadequacyList *
InadequacyList__new_conflict (state *manifesting_state, symbol *token,
bitset actions,
InadequacyListNodeCount *node_count)
{
InadequacyList *result = xmalloc (sizeof *result);
result->id = *node_count;
IGNORE_TYPE_LIMITS_BEGIN
if (INT_ADD_WRAPV (*node_count, 1, node_count))
aver (false);
IGNORE_TYPE_LIMITS_END
result->next = NULL;
result->manifestingState = manifesting_state;
result->contributionCount = bitset_count (actions);
result->inadequacy.conflict.token = token;
result->inadequacy.conflict.actions = actions;
return result;
}
void
InadequacyList__delete (InadequacyList *self)
{
while (self)
{
InadequacyList *node = self;
self = self->next;
bitset_free (node->inadequacy.conflict.actions);
free (node);
}
}
ContributionIndex
InadequacyList__getShiftContributionIndex (InadequacyList const *self)
{
if (!bitset_test (self->inadequacy.conflict.actions,
self->manifestingState->reductions->num))
return ContributionIndex__none;
return self->contributionCount - 1;
}
symbol *
InadequacyList__getContributionToken (InadequacyList const *self,
ContributionIndex i)
{
aver (0 <= i && i < self->contributionCount); (void) i;
return self->inadequacy.conflict.token;
}
void
InadequacyList__prependTo (InadequacyList *self, InadequacyList **list)
{
InadequacyList *head_old = *list;
*list = self;
self->next = head_old;
}
@@ -0,0 +1,153 @@
/* IELR's inadequacy list.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef INADEQUACY_LIST_H_
# define INADEQUACY_LIST_H_
# include <bitset.h>
# include "gram.h"
# include "state.h"
# include "symtab.h"
/**
* A unique ID assigned to every \c InadequacyList node.
*/
typedef long long InadequacyListNodeCount;
/**
* For a conflict, each rule in the grammar can have at most one contributing
* reduction except that rule 0 cannot have any because the reduction on rule 0
* cannot have lookaheads. For a conflict, exactly one shift can contribute.
* Thus the number of rules in the grammar is an upper bound on the number of
* possible contributions to any conflict. The maximum number of possible
* items in a state is also an upper bound, but the \c nitems member of \c
* state is currently a \c size_t and thus, if changed, risks becoming out of
* sync with this type. Whatever the type, it must support negatives for sake
* of the special values below.
*/
typedef rule_number ContributionIndex;
/* Special \c ContributionIndex used to indicate null result when looking for a
contribution. */
extern ContributionIndex const ContributionIndex__none;
/* Special \c ContributionIndex used by
\c AnnotationList__computeDominantContribution to signal when the action
chosen in a conflict is a syntax error because of a %nonassoc. */
extern ContributionIndex const ContributionIndex__error_action;
/**
* The description of a conflict. Don't break encapsulation by modifying the
* fields directly. Use the provided interface functions for
* \c InadequacyList.
*/
typedef struct {
/** The \c token passed to \c InadequacyList__new_conflict. */
symbol *token;
/** The \c actions passed to \c InadequacyList__new_conflict. */
bitset actions;
} Conflict;
/**
* A node in a list that describes all the inadequacies that manifest in a
* particular state. Don't break encapsulation by modifying the fields
* directly. Use the provided interface functions.
*/
typedef struct InadequacyList {
struct InadequacyList *next;
InadequacyListNodeCount id;
state *manifestingState;
ContributionIndex contributionCount;
union {
Conflict conflict;
} inadequacy;
} InadequacyList;
/**
* \pre
* - <tt>manifesting_state != NULL</tt>.
* - \c token is a token.
* - The size of \c actions is
* <tt>manifesting_state->reductions->num + 1</tt>.
* - If the set of all \c InadequacyList nodes with which the new
* \c InadequacyList node might be compared is currently empty, then
* it is best if <tt>*node_count</tt> is zero so that the node count
* does not eventually overflow. However, if that set is not
* currently empty, then <tt>*node_count</tt> has not been modified
* by any function except \c InadequacyList__new_conflict since the
* invocation of \c InadequacyList__new_conflict that constructed
* the first existing member of that set.
* \post
* - \c result is a new \c InadequacyList with one node indicating that, in
* \c manifesting_state, the following actions are in conflict on \c token:
* - Shift iff
* <tt>bitset_test (actions, manifesting_state->reductions->num)</tt>.
* - For any \c i such that
* <tt>0 <= i < manifesting_state->reductions->num</tt>, the reduction
* for the rule <tt>manifesting_state->reductions->rules[i]</tt> iff
* <tt>actions[i]</tt> is set.
* - Given any node \c n from the set of all existing
* \c InadequacyList nodes with which \c result might be compared
* such that <tt>n != result</tt>, then <tt>n->id < result->id</tt>.
* - \c result assumes responsibility for the memory of \c actions.
*/
InadequacyList *InadequacyList__new_conflict (
state *manifesting_state, symbol *token, bitset actions,
InadequacyListNodeCount *node_count);
/**
* \post
* - All memory associated with all nodes in the list \c self was freed.
*/
void InadequacyList__delete (InadequacyList *self);
/**
* \pre
* - <tt>self != NULL</tt>.
* \post
* - \c result = either:
* - \c ContributionIndex__none iff there is no shift contribution in
* \c self (perhaps because \c self isn't a conflict).
* - The index of the shift contribution, otherwise.
*/
ContributionIndex
InadequacyList__getShiftContributionIndex (InadequacyList const *self);
/**
* \pre
* - <tt>self != NULL</tt>.
* - <tt>0 <= i < self->contributionCount</tt>.
* \post
* - \c result = the token associated with contribution \c i in the
* inadequacy described by the node \c self.
*/
symbol *InadequacyList__getContributionToken (InadequacyList const *self,
ContributionIndex i);
/**
* \pre
* - \c self is a single node.
* - <tt>list != NULL</tt>.
* \post
* - \c list now contains \c self as its first node.
* - \c list assumes responsibility for the memory of \c self.
*/
void InadequacyList__prependTo (InadequacyList *self, InadequacyList **list);
#endif /* !INADEQUACY_LIST_H_ */
@@ -0,0 +1,80 @@
/* A simple, memory-efficient bitset implementation.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "Sbitset.h"
Sbitset
Sbitset__new (Sbitset__Index nbits)
{
/* Some functions, like Sbitset__last_byte_mask, will fail if nbits = 0. */
aver (nbits);
return xcalloc (1, Sbitset__nbytes (nbits));
}
Sbitset
Sbitset__new_on_obstack (Sbitset__Index nbits, struct obstack *obstackp)
{
Sbitset result;
Sbitset ptr;
Sbitset end;
aver (nbits);
result = obstack_alloc (obstackp, Sbitset__nbytes (nbits));
for (ptr = result, end = result + Sbitset__nbytes (nbits); ptr < end; ++ptr)
*ptr = 0;
return result;
}
void
Sbitset__delete (Sbitset self)
{
free (self);
}
bool
Sbitset__isEmpty (Sbitset self, Sbitset__Index nbits)
{
Sbitset last = self + Sbitset__nbytes (nbits) - 1;
for (; self < last; ++self)
if (*self != 0)
return false;
return ((*last) & Sbitset__last_byte_mask (nbits)) == 0;
}
void
Sbitset__fprint (Sbitset self, Sbitset__Index nbits, FILE *file)
{
Sbitset__Index i;
Sbitset itr;
bool first = true;
fprintf (file,
"nbits = %" SBITSET__INDEX__CONVERSION_SPEC ", set = {",
nbits);
SBITSET__FOR_EACH (self, nbits, itr, i)
{
if (first)
first = false;
else
fprintf (file, ",");
fprintf (file, " %" SBITSET__INDEX__CONVERSION_SPEC, i);
}
fprintf (file, " }");
}
@@ -0,0 +1,94 @@
/* A simple, memory-efficient bitset implementation.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef SBITSET_H_
# define SBITSET_H_
typedef unsigned char *Sbitset;
typedef size_t Sbitset__Index;
# define SBITSET__INDEX__CONVERSION_SPEC "zu"
# define Sbitset__nbytes(NBITS) \
(((NBITS) + CHAR_BIT - 1) / CHAR_BIT)
# define Sbitset__byteAddress(SELF, INDEX) \
(((SELF) + (INDEX) / CHAR_BIT))
# define Sbitset__bit_mask(INDEX) \
(1 << (CHAR_BIT - 1 - (INDEX) % CHAR_BIT))
# define Sbitset__last_byte_mask(NBITS) \
(UCHAR_MAX << (CHAR_BIT - 1 - ((NBITS) - 1) % CHAR_BIT))
/* nbits must not be 0. */
Sbitset Sbitset__new (Sbitset__Index nbits);
Sbitset Sbitset__new_on_obstack (Sbitset__Index nbits,
struct obstack *obstackp);
void Sbitset__delete (Sbitset self);
# define Sbitset__test(SELF, INDEX) \
((*Sbitset__byteAddress ((SELF), (INDEX)) & Sbitset__bit_mask (INDEX)) != 0)
bool Sbitset__isEmpty (Sbitset self, Sbitset__Index nbits);
void Sbitset__fprint (Sbitset self, Sbitset__Index nbits, FILE *file);
# define Sbitset__set(SELF, INDEX) \
do { \
*Sbitset__byteAddress ((SELF), (INDEX)) = \
*Sbitset__byteAddress ((SELF), (INDEX)) | Sbitset__bit_mask (INDEX); \
} while (0)
# define Sbitset__reset(SELF, INDEX) \
do { \
*Sbitset__byteAddress ((SELF), (INDEX)) = \
*Sbitset__byteAddress ((SELF), (INDEX)) & ~Sbitset__bit_mask (INDEX); \
} while (0)
/* NBITS is the size of the bitset. More than NBITS bits might be reset. */
# define Sbitset__zero(SELF, NBITS) \
do { \
memset (SELF, 0, Sbitset__nbytes (NBITS)); \
} while (0)
/* NBITS is the size of the bitset. More than NBITS bits might be set. */
# define Sbitset__ones(SELF, NBITS) \
do { \
memset (SELF, UCHAR_MAX, Sbitset__nbytes (NBITS)); \
} while (0)
/* NBITS is the size of every bitset. More than NBITS bits might be set. */
# define Sbitset__or(SELF, OTHER1, OTHER2, NBITS) \
do { \
Sbitset ptr_self = (SELF); \
Sbitset ptr_other1 = (OTHER1); \
Sbitset ptr_other2 = (OTHER2); \
Sbitset end_self = ptr_self + Sbitset__nbytes (NBITS); \
for (; ptr_self < end_self; ++ptr_self, ++ptr_other1, ++ptr_other2) \
*ptr_self = *ptr_other1 | *ptr_other2; \
} while (0)
/* ATTENTION: there are *two* loops here, "break" and "continue" will
not apply to the whole loop, just the inner one. */
# define SBITSET__FOR_EACH(SELF, NBITS, ITER, INDEX) \
for ((ITER) = (SELF); (ITER) < (SELF) + Sbitset__nbytes (NBITS); ++(ITER)) \
if (*(ITER) != 0) \
for ((INDEX) = ((ITER)-(SELF))*CHAR_BIT; \
(INDEX) < (NBITS) && (SELF)+(INDEX)/CHAR_BIT < (ITER)+1; \
++(INDEX)) \
if (((*ITER) & Sbitset__bit_mask (INDEX)) != 0)
#endif /* !SBITSET_H_ */
@@ -0,0 +1,48 @@
/* Associativity information.
Copyright (C) 2002, 2005-2006, 2008-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "assoc.h"
const char *
assoc_to_string (assoc a)
{
switch (a)
{
case undef_assoc:
return "undefined associativity";
case right_assoc:
return "%right";
case left_assoc:
return "%left";
case non_assoc:
return "%nonassoc";
case precedence_assoc:
return "%precedence";
}
abort ();
}
@@ -0,0 +1,36 @@
/* Associativity information.
Copyright (C) 2002, 2006, 2008-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef ASSOC_H_
# define ASSOC_H_
/* Associativity values for tokens and rules. */
typedef enum
{
undef_assoc, /** Not defined. */
right_assoc, /** %right */
left_assoc, /** %left */
non_assoc, /** %nonassoc */
precedence_assoc /** %precedence */
} assoc;
char const *assoc_to_string (assoc a);
#endif /* !ASSOC_H_ */
@@ -0,0 +1,235 @@
/* Closures for Bison
Copyright (C) 1984, 1989, 2000-2002, 2004-2005, 2007, 2009-2015,
2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include <bitsetv.h>
#include "closure.h"
#include "derives.h"
#include "getargs.h"
#include "gram.h"
#include "reader.h"
#include "symtab.h"
/* NITEMSET is the size of the array ITEMSET. */
item_index *itemset;
size_t nitemset;
/* RULESET contains a bit for each rule. CLOSURE sets the bits for
all rules which could potentially describe the next input to be
read. */
static bitset ruleset;
/* internal data. See comments before set_fderives and set_firsts. */
static bitsetv fderives = NULL;
static bitsetv firsts = NULL;
/* Retrieve the FDERIVES/FIRSTS sets of the nonterminals numbered Var. */
#define FDERIVES(Var) fderives[(Var) - ntokens]
#define FIRSTS(Var) firsts[(Var) - ntokens]
/*-----------------.
| Debugging code. |
`-----------------*/
static void
closure_print (char const *title, item_index const *array, size_t size)
{
fprintf (stderr, "Closure: %s\n", title);
for (size_t i = 0; i < size; ++i)
{
fprintf (stderr, " %2d: .", array[i]);
item_number *rp;
for (rp = &ritem[array[i]]; 0 <= *rp; ++rp)
fprintf (stderr, " %s", symbols[*rp]->tag);
fprintf (stderr, " (rule %d)\n", item_number_as_rule_number (*rp));
}
fputs ("\n\n", stderr);
}
static void
print_firsts (void)
{
fprintf (stderr, "FIRSTS\n");
for (symbol_number i = ntokens; i < nsyms; ++i)
{
fprintf (stderr, " %s firsts\n", symbols[i]->tag);
bitset_iterator iter;
symbol_number j;
BITSET_FOR_EACH (iter, FIRSTS (i), j, 0)
fprintf (stderr, " %s\n", symbols[j + ntokens]->tag);
}
fprintf (stderr, "\n\n");
}
static void
print_fderives (void)
{
fprintf (stderr, "FDERIVES\n");
for (symbol_number i = ntokens; i < nsyms; ++i)
{
fprintf (stderr, " %s derives\n", symbols[i]->tag);
bitset_iterator iter;
rule_number r;
BITSET_FOR_EACH (iter, FDERIVES (i), r, 0)
{
fprintf (stderr, " %3d ", r);
rule_rhs_print (&rules[r], stderr);
fprintf (stderr, "\n");
}
}
fprintf (stderr, "\n\n");
}
/*-------------------------------------------------------------------.
| Set FIRSTS to be an NNTERMS array of NNTERMS bitsets indicating |
| which items can represent the beginning of the input corresponding |
| to which other items. |
| |
| For example, if some rule expands symbol 5 into the sequence of |
| symbols 8 3 20, the symbol 8 can be the beginning of the data for |
| symbol 5, so the bit [8 - ntokens] in first[5 - ntokens] (= FIRST |
| (5)) is set. |
`-------------------------------------------------------------------*/
static void
set_firsts (void)
{
firsts = bitsetv_create (nnterms, nnterms, BITSET_FIXED);
for (symbol_number i = ntokens; i < nsyms; ++i)
for (symbol_number j = 0; derives[i - ntokens][j]; ++j)
{
item_number sym = derives[i - ntokens][j]->rhs[0];
if (ISVAR (sym))
bitset_set (FIRSTS (i), sym - ntokens);
}
if (trace_flag & trace_sets)
bitsetv_matrix_dump (stderr, "RTC: Firsts Input", firsts);
bitsetv_reflexive_transitive_closure (firsts);
if (trace_flag & trace_sets)
bitsetv_matrix_dump (stderr, "RTC: Firsts Output", firsts);
if (trace_flag & trace_sets)
print_firsts ();
}
/*-------------------------------------------------------------------.
| Set FDERIVES to an NNTERMS by NRULES matrix of bits indicating |
| which rules can help derive the beginning of the data for each |
| nonterminal. |
| |
| For example, if symbol 5 can be derived as the sequence of symbols |
| 8 3 20, and one of the rules for deriving symbol 8 is rule 4, then |
| the [5 - NTOKENS, 4] bit in FDERIVES is set. |
`-------------------------------------------------------------------*/
static void
set_fderives (void)
{
fderives = bitsetv_create (nnterms, nrules, BITSET_FIXED);
set_firsts ();
for (symbol_number i = ntokens; i < nsyms; ++i)
for (symbol_number j = ntokens; j < nsyms; ++j)
if (bitset_test (FIRSTS (i), j - ntokens))
for (rule_number k = 0; derives[j - ntokens][k]; ++k)
bitset_set (FDERIVES (i), derives[j - ntokens][k]->number);
if (trace_flag & trace_sets)
print_fderives ();
bitsetv_free (firsts);
}
void
closure_new (int n)
{
itemset = xnmalloc (n, sizeof *itemset);
ruleset = bitset_create (nrules, BITSET_FIXED);
set_fderives ();
}
void
closure (item_index const *core, size_t n)
{
if (trace_flag & trace_closure)
closure_print ("input", core, n);
bitset_zero (ruleset);
for (size_t c = 0; c < n; ++c)
if (ISVAR (ritem[core[c]]))
bitset_or (ruleset, ruleset, FDERIVES (ritem[core[c]]));
/* core is sorted on item index in ritem, which is sorted on rule number.
Compute itemset with the same sort. */
nitemset = 0;
size_t c = 0;
/* A bit index over RULESET. */
rule_number ruleno;
bitset_iterator iter;
BITSET_FOR_EACH (iter, ruleset, ruleno, 0)
{
item_index itemno = rules[ruleno].rhs - ritem;
while (c < n && core[c] < itemno)
{
itemset[nitemset] = core[c];
nitemset++;
c++;
}
itemset[nitemset] = itemno;
nitemset++;
};
while (c < n)
{
itemset[nitemset] = core[c];
nitemset++;
c++;
}
if (trace_flag & trace_closure)
closure_print ("output", itemset, nitemset);
}
void
closure_free (void)
{
free (itemset);
bitset_free (ruleset);
bitsetv_free (fderives);
}
@@ -0,0 +1,54 @@
/* Subroutines for bison
Copyright (C) 1984, 1989, 2000-2002, 2007, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef CLOSURE_H_
# define CLOSURE_H_
# include "gram.h"
/* Allocates the itemset and ruleset vectors, and precomputes useful
data so that closure can be called. n is the number of elements to
allocate for itemset. */
void closure_new (int n);
/* Given the kernel (aka core) of a state (a sorted vector of item indices
ITEMS, of length N), set up RULESET and ITEMSET to indicate what
rules could be run and which items could be accepted when those
items are the active ones. */
void closure (item_index const *items, size_t n);
/* Free ITEMSET, RULESET and internal data. */
void closure_free (void);
/* ITEMSET is a sorted vector of item indices; NITEMSET is its size
(actually, points to just beyond the end of the part of it that is
significant). CLOSURE places there the indices of all items which
represent units of input that could arrive next. */
extern item_index *itemset;
extern size_t nitemset;
#endif /* !CLOSURE_H_ */
@@ -0,0 +1,680 @@
/* Declaration for error-reporting function for Bison.
Copyright (C) 2000-2002, 2004-2006, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Based on error.c and error.h,
written by David MacKenzie <djm@gnu.ai.mit.edu>. */
#include <config.h>
#include "system.h"
#include <argmatch.h>
#include <c-ctype.h>
#include <progname.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <textstyle.h>
#include "complain.h"
#include "files.h"
#include "fixits.h"
#include "getargs.h"
#include "quote.h"
// The URL of the manual page about diagnostics. Use the per-node
// manual, to avoid downloading repeatedly the whole manual over the
// Internet.
static const char *diagnostics_url
= "https://www.gnu.org/software/bison/manual/html_node/Diagnostics.html";
err_status complaint_status = status_none;
bool warnings_are_errors = false;
/** Whether -Werror/-Wno-error was applied to a warning. */
typedef enum
{
errority_unset = 0, /** No explicit status. */
errority_disabled = 1, /** Explicitly disabled with -Wno-error=foo. */
errority_enabled = 2 /** Explicitly enabled with -Werror=foo. */
} errority;
/** For each warning type, its errority. */
static errority errority_flag[warnings_size];
/** Diagnostics severity. */
typedef enum
{
severity_disabled = 0, /**< Explicitly disabled via -Wno-foo. */
severity_unset = 1, /**< Unspecified status. */
severity_warning = 2, /**< A warning. */
severity_error = 3, /**< An error (continue, but die soon). */
severity_fatal = 4 /**< Fatal error (die now). */
} severity;
/** For each warning type, its severity. */
static severity warnings_flag[warnings_size];
styled_ostream_t errstream = NULL;
void
begin_use_class (const char *s, FILE *out)
{
if (out == stderr)
{
if (color_debug)
fprintf (out, "<%s>", s);
else
{
styled_ostream_begin_use_class (errstream, s);
styled_ostream_flush_to_current_style (errstream);
}
}
}
void
end_use_class (const char *s, FILE *out)
{
if (out == stderr)
{
if (color_debug)
fprintf (out, "</%s>", s);
else
{
styled_ostream_end_use_class (errstream, s);
styled_ostream_flush_to_current_style (errstream);
}
}
}
static void
begin_hyperlink (FILE *out, const char *ref)
{
if (out == stderr)
styled_ostream_set_hyperlink (errstream, ref, NULL);
}
static void
end_hyperlink (FILE *out)
{
if (out == stderr)
styled_ostream_set_hyperlink (errstream, NULL, NULL);
}
void
flush (FILE *out)
{
if (out == stderr)
ostream_flush (errstream, FLUSH_THIS_STREAM);
fflush (out);
}
bool
is_styled (FILE *out)
{
if (out != stderr)
return false;
if (color_debug)
return true;
#if HAVE_LIBTEXTSTYLE
return (color_mode == color_yes
|| color_mode == color_html
|| (color_mode == color_tty && isatty (STDERR_FILENO)));
#else
return false;
#endif
}
/*------------------------.
| --warnings's handling. |
`------------------------*/
ARGMATCH_DEFINE_GROUP (warning, warnings)
static const argmatch_warning_doc argmatch_warning_docs[] =
{
{ "conflicts-sr", N_("S/R conflicts (enabled by default)") },
{ "conflicts-rr", N_("R/R conflicts (enabled by default)") },
{ "counterexamples", N_("generate conflict counterexamples") },
{ "dangling-alias", N_("string aliases not attached to a symbol") },
{ "deprecated", N_("obsolete constructs") },
{ "empty-rule", N_("empty rules without %empty") },
{ "midrule-values", N_("unset or unused midrule values") },
{ "precedence", N_("useless precedence and associativity") },
{ "yacc", N_("incompatibilities with POSIX Yacc") },
{ "other", N_("all other warnings (enabled by default)") },
{ "all", N_("all the warnings except 'counterexamples', 'dangling-alias' and 'yacc'") },
{ "no-CATEGORY", N_("turn off warnings in CATEGORY") },
{ "none", N_("turn off all the warnings") },
{ "error[=CATEGORY]", N_("treat warnings as errors") },
{ NULL, NULL }
};
static const argmatch_warning_arg argmatch_warning_args[] =
{
{ "all", Wall },
{ "conflicts-rr", Wconflicts_rr },
{ "conflicts-sr", Wconflicts_sr },
{ "counterexamples", Wcounterexamples }, { "cex", Wcounterexamples }, // Show cex second.
{ "dangling-alias", Wdangling_alias },
{ "deprecated", Wdeprecated },
{ "empty-rule", Wempty_rule },
{ "everything", Weverything },
{ "midrule-values", Wmidrule_values },
{ "none", Wnone },
{ "other", Wother },
{ "precedence", Wprecedence },
{ "yacc", Wyacc },
{ NULL, Wnone }
};
const argmatch_warning_group_type argmatch_warning_group =
{
argmatch_warning_args,
argmatch_warning_docs,
N_("Warning categories include:"),
NULL
};
void
warning_usage (FILE *out)
{
argmatch_warning_usage (out);
}
void
warning_argmatch (char const *arg, size_t no, size_t err)
{
int value = *argmatch_warning_value ("--warning", arg + no + err);
/* -Wnone == -Wno-everything, and -Wno-none == -Weverything. */
if (!value)
{
value = Weverything;
no = !no;
}
for (size_t b = 0; b < warnings_size; ++b)
if (value & 1 << b)
{
if (err && no)
/* -Wno-error=foo. */
errority_flag[b] = errority_disabled;
else if (err && !no)
{
/* -Werror=foo: enables -Wfoo. */
errority_flag[b] = errority_enabled;
warnings_flag[b] = severity_warning;
}
else if (no)
/* -Wno-foo. */
warnings_flag[b] = severity_disabled;
else
/* -Wfoo. */
warnings_flag[b] = severity_warning;
}
}
/** Decode a comma-separated list of arguments from -W.
*
* \param args comma separated list of effective subarguments to decode.
* If 0, then activate all the flags.
*/
void
warnings_argmatch (char *args)
{
if (!args)
warning_argmatch ("all", 0, 0);
else if (STREQ (args, "help"))
{
warning_usage (stdout);
exit (EXIT_SUCCESS);
}
else
for (args = strtok (args, ","); args; args = strtok (NULL, ","))
if (STREQ (args, "error"))
warnings_are_errors = true;
else if (STREQ (args, "no-error"))
warnings_are_errors = false;
else
{
/* The length of the possible 'no-' prefix: 3, or 0. */
size_t no = STRPREFIX_LIT ("no-", args) ? 3 : 0;
/* The length of the possible 'error=' (possibly after
'no-') prefix: 6, or 0. */
size_t err = STRPREFIX_LIT ("error=", args + no) ? 6 : 0;
warning_argmatch (args, no, err);
}
}
/* Color style for this type of message. */
static const char*
severity_style (severity s)
{
switch (s)
{
case severity_disabled:
case severity_unset:
return "note";
case severity_warning:
return "warning";
case severity_error:
case severity_fatal:
return "error";
}
abort ();
}
/* Prefix for this type of message. */
static const char*
severity_prefix (severity s)
{
switch (s)
{
case severity_disabled:
case severity_unset:
return "";
case severity_warning:
return _("warning");
case severity_error:
return _("error");
case severity_fatal:
return _("fatal error");
}
abort ();
}
static void
severity_print (severity s, FILE *out)
{
if (s != severity_disabled)
{
const char* style = severity_style (s);
begin_use_class (style, out);
fprintf (out, "%s:", severity_prefix (s));
end_use_class (style, out);
fputc (' ', out);
}
}
/*-----------.
| complain. |
`-----------*/
void
complain_init_color (void)
{
#if HAVE_LIBTEXTSTYLE
if (is_styled (stderr))
{
style_file_prepare ("BISON_STYLE", "BISON_STYLEDIR", pkgdatadir (),
"bison-default.css");
/* As a fallback, use the default in the current directory. */
struct stat statbuf;
if ((style_file_name == NULL || stat (style_file_name, &statbuf) < 0)
&& stat ("bison-default.css", &statbuf) == 0)
style_file_name = "bison-default.css";
}
else
/* No styling. */
style_file_name = NULL;
#endif
/* Workaround clang's warning (starting at Clang 3.5) about the stub
code of html_styled_ostream_create:
| src/complain.c:274:7: error: code will never be executed [-Werror,-Wunreachable-code]
| ? html_styled_ostream_create (file_ostream_create (stderr),
| ^~~~~~~~~~~~~~~~~~~~~~~~~~ */
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wunreachable-code"
#endif
errstream =
color_mode == color_html
? html_styled_ostream_create (file_ostream_create (stderr),
style_file_name)
: styled_ostream_create (STDERR_FILENO, "(stderr)", TTYCTL_AUTO,
style_file_name);
#if defined __clang__
# pragma clang diagnostic pop
#endif
}
void
complain_init (void)
{
caret_init ();
warnings warnings_default =
Wconflicts_sr | Wconflicts_rr | Wdeprecated | Wother;
for (size_t b = 0; b < warnings_size; ++b)
{
warnings_flag[b] = (1 << b & warnings_default
? severity_warning
: severity_unset);
errority_flag[b] = errority_unset;
}
}
void
complain_free (void)
{
caret_free ();
styled_ostream_free (errstream);
}
/* A diagnostic with FLAGS is about to be issued. With what severity?
(severity_fatal, severity_error, severity_disabled, or
severity_warning.) */
static severity
warning_severity (warnings flags)
{
if (flags & fatal)
/* Diagnostics about fatal errors. */
return severity_fatal;
else if (flags & complaint)
/* Diagnostics about errors. */
return severity_error;
else
{
/* Diagnostics about warnings. */
severity res = severity_disabled;
for (size_t b = 0; b < warnings_size; ++b)
if (flags & 1 << b)
{
res = res < warnings_flag[b] ? warnings_flag[b] : res;
/* If the diagnostic is enabled, and -Werror is enabled,
and -Wno-error=foo was not explicitly requested, this
is an error. */
if (res == severity_warning
&& (errority_flag[b] == errority_enabled
|| (warnings_are_errors
&& errority_flag[b] != errority_disabled)))
res = severity_error;
}
return res;
}
}
bool
warning_is_unset (warnings flags)
{
for (size_t b = 0; b < warnings_size; ++b)
if (flags & 1 << b && warnings_flag[b] != severity_unset)
return false;
return true;
}
bool
warning_is_enabled (warnings flags)
{
return severity_warning <= warning_severity (flags);
}
/** Display a "[-Wyacc]" like message on \a out. */
static void
warnings_print_categories (warnings warn_flags, FILE *out)
{
for (int wbit = 0; wbit < warnings_size; ++wbit)
if (warn_flags & (1 << wbit))
{
warnings w = 1 << wbit;
severity s = warning_severity (w);
const char* style = severity_style (s);
fputs (" [", out);
begin_use_class (style, out);
// E.g., "counterexamples".
const char *warning = argmatch_warning_argument (&w);
char ref[200];
snprintf (ref, sizeof ref,
"%s#W%s", diagnostics_url, warning);
begin_hyperlink (out, ref);
ostream_printf (errstream,
"-W%s%s",
s == severity_error ? "error=" : "",
warning);
end_hyperlink (out);
// Because we mix stdio with ostream I/O, we need to flush
// here for sake of color == debug.
flush (out);
end_use_class (style, out);
fputc (']', out);
/* Display only the first match, the second is "-Wall". */
return;
}
}
/** Report an error message.
*
* \param loc the location, defaulting to the current file,
* or the program name.
* \param flags the category for this message.
* \param sever to decide the prefix to put before the message
* (e.g., "warning").
* \param message the error message, a printf format string. Iff it
* ends with ": ", then no trailing newline is printed,
* and the caller should print the remaining
* newline-terminated message to stderr.
* \param args the arguments of the format string.
*/
static
void
error_message (const location *loc, warnings flags,
severity sever, const char *message, va_list args)
{
const char* style = flags & note ? "note" : severity_style (sever);
if (loc)
location_print (*loc, stderr);
else
fprintf (stderr, "%s", grammar_file ? grammar_file : program_name);
fprintf (stderr, ": ");
if (sever != severity_disabled)
{
begin_use_class (style, stderr);
fprintf (stderr, "%s:", flags & note ? _("note") : severity_prefix (sever));
end_use_class (style, stderr);
fputc (' ', stderr);
}
vfprintf (stderr, message, args);
/* Print the type of warning, only if this is not a sub message
(in which case the prefix is null). */
if (! (flags & silent) && sever != severity_disabled)
warnings_print_categories (flags, stderr);
size_t l = strlen (message);
if (l < 2 || message[l - 2] != ':' || message[l - 1] != ' ')
{
putc ('\n', stderr);
flush (stderr);
if (loc && !(flags & no_caret))
location_caret (*loc, style, stderr);
}
flush (stderr);
}
/** Raise a complaint (fatal error, error or just warning). */
static void
complains (const location *loc, warnings flags,
const char *message, va_list args)
{
if ((flags & complaint) && complaint_status < status_complaint)
complaint_status = status_complaint;
severity s = warning_severity (flags);
if (severity_warning <= s)
{
if (severity_error <= s && ! complaint_status)
complaint_status = status_warning_as_error;
error_message (loc, flags, s, message, args);
}
if (flags & fatal)
exit (EXIT_FAILURE);
}
void
complain (location const *loc, warnings flags, const char *message, ...)
{
va_list args;
va_start (args, message);
complains (loc, flags, message, args);
va_end (args);
}
void
subcomplain (location const *loc, warnings flags, const char *message, ...)
{
va_list args;
va_start (args, message);
complains (loc, flags | note | silent, message, args);
va_end (args);
}
void
complain_args (location const *loc, warnings w,
int argc, char *argv[])
{
switch (argc)
{
case 1:
complain (loc, w, "%s", _(argv[0]));
break;
case 2:
complain (loc, w, _(argv[0]), argv[1]);
break;
case 3:
complain (loc, w, _(argv[0]), argv[1], argv[2]);
break;
case 4:
complain (loc, w, _(argv[0]), argv[1], argv[2], argv[3]);
break;
case 5:
complain (loc, w, _(argv[0]), argv[1], argv[2], argv[3], argv[4]);
break;
default:
complain (loc, fatal, "too many arguments for complains");
break;
}
}
void
bison_directive (location const *loc, char const *directive)
{
complain (loc, Wyacc,
_("POSIX Yacc does not support %s"), directive);
}
void
deprecated_directive (location const *loc, char const *old, char const *upd)
{
if (warning_is_enabled (Wdeprecated))
{
complain (loc, Wdeprecated,
_("deprecated directive: %s, use %s"),
quote (old), quote_n (1, upd));
location_caret_suggestion (*loc, upd, stderr);
/* Register updates only if -Wdeprecated is enabled. */
fixits_register (loc, upd);
}
}
void
duplicate_directive (char const *directive,
location first, location second)
{
if (feature_flag & feature_caret)
complain (&second, Wother, _("duplicate directive"));
else
complain (&second, Wother, _("duplicate directive: %s"), quote (directive));
subcomplain (&first, Wother, _("previous declaration"));
fixits_register (&second, "");
}
void
duplicate_rule_directive (char const *directive,
location first, location second)
{
complain (&second, complaint, _("only one %s allowed per rule"), directive);
subcomplain (&first, complaint, _("previous declaration"));
fixits_register (&second, "");
}
void
syntax_error (location loc,
int argc, const char* argv[])
{
if (complaint_status < status_complaint)
complaint_status = status_complaint;
assert (argc <= 5);
const char *format = NULL;
switch (argc)
{
#define CASE(N, S) \
case N: \
format = S; \
break
default: /* Avoid compiler warnings. */
CASE (0, _("syntax error"));
CASE (1, _("unexpected %0$s"));
CASE (2, _("expected %1$s before %0$s"));
CASE (3, _("expected %1$s or %2$s before %0$s"));
CASE (4, _("expected %1$s or %2$s or %3$s before %0$s"));
CASE (5, _("expected %1$s or %2$s or %3$s or %4$s before %0$s"));
#undef CASE
}
location_print (loc, stderr);
fputs (": ", stderr);
severity_print (severity_error, stderr);
while (*format)
if (format[0] == '%'
&& c_isdigit (format[1])
&& format[2] == '$'
&& format[3] == 's'
&& (format[1] - '0') < argc)
{
int i = format[1] - '0';
const char *style = i == 0 ? "unexpected" : "expected";
begin_use_class (style, stderr);
fputs (argv[i], stderr);
end_use_class (style, stderr);
format += 4;
}
else
{
fputc (*format, stderr);
++format;
}
fputc ('\n', stderr);
location_caret (loc, "error", stderr);
}
@@ -0,0 +1,186 @@
/* Declaration for error-reporting function for Bison.
Copyright (C) 2000-2002, 2006, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef COMPLAIN_H_
# define COMPLAIN_H_ 1
# include <attribute.h>
# include "location.h"
/*---------------.
| Error stream. |
`---------------*/
/** Enable a style on \a out provided it's stderr. */
void begin_use_class (const char *style, FILE *out);
/** Disable a style on \a out provided it's stderr. */
void end_use_class (const char *style, FILE *out);
/** Flush \a out. */
void flush (FILE *out);
/** Whether there's styling on OUT. */
bool is_styled (FILE *out);
/*-------------.
| --warnings. |
`-------------*/
/** The bits assigned to each warning type. */
typedef enum
{
warning_conflicts_rr,
warning_conflicts_sr,
warning_counterexamples,
warning_dangling_alias,
warning_deprecated,
warning_empty_rule,
warning_midrule_values,
warning_other,
warning_precedence,
warning_yacc, /**< POSIXME. */
warnings_size /**< The number of warnings. Must be last. */
} warning_bit;
/** Whether -Werror was set. */
extern bool warnings_are_errors;
/** Document --warning arguments. */
void warning_usage (FILE *out);
/** Decode a single argument from -W.
*
* \param arg the subarguments to decode.
* If null, then activate all the flags.
* \param no length of the potential "no-" prefix.
* Can be 0 or 3. If 3, negate the action of the subargument.
* \param err length of a potential "error=".
* Can be 0 or 6. If 6, treat the subargument as a CATEGORY.
*
* If VALUE != 0 then KEY sets flags and no-KEY clears them.
* If VALUE == 0 then KEY clears all flags from \c all and no-KEY sets all
* flags from \c all. Thus no-none = all and no-all = none.
*/
void warning_argmatch (char const *arg, size_t no, size_t err);
/** Decode a comma-separated list of arguments from -W.
*
* \param args comma separated list of effective subarguments to decode.
* If 0, then activate all the flags.
*/
void warnings_argmatch (char *args);
/*-----------.
| complain. |
`-----------*/
/** Initialize this module. */
void complain_init (void);
/** Reclaim resources. */
void complain_free (void);
/** Initialize support for colored messages. */
void complain_init_color (void);
/** Flags passed to diagnostics functions. */
typedef enum
{
Wnone = 0, /**< Issue no warnings. */
Wconflicts_rr = 1 << warning_conflicts_rr,
Wconflicts_sr = 1 << warning_conflicts_sr,
Wcounterexamples = 1 << warning_counterexamples,
Wdangling_alias = 1 << warning_dangling_alias,
Wdeprecated = 1 << warning_deprecated,
Wempty_rule = 1 << warning_empty_rule,
Wmidrule_values = 1 << warning_midrule_values,
Wother = 1 << warning_other,
Wprecedence = 1 << warning_precedence,
Wyacc = 1 << warning_yacc,
complaint = 1 << 11, /**< All complaints. */
fatal = 1 << 12, /**< All fatal errors. */
silent = 1 << 13, /**< Do not display the warning type. */
no_caret = 1 << 14, /**< Do not display caret location. */
note = 1 << 15, /**< Display as a note. */
/**< All above warnings. */
Weverything = ~complaint & ~fatal & ~silent,
Wall = Weverything & ~Wcounterexamples & ~Wdangling_alias & ~Wyacc
} warnings;
/** Whether the warnings of \a flags are all unset.
(Never enabled, never disabled). */
bool warning_is_unset (warnings flags);
/** Whether warnings of \a flags should be reported. */
bool warning_is_enabled (warnings flags);
/** Make a complaint, with maybe a location. */
void complain (location const *loc, warnings flags, char const *message, ...)
ATTRIBUTE_FORMAT ((__printf__, 3, 4));
/** Likewise, but with an \a argc/argv interface. */
void complain_args (location const *loc, warnings w,
int argc, char *arg[]);
/** Make a subcomplain with location and note. */
void subcomplain (location const *loc, warnings flags,
char const *message, ...)
ATTRIBUTE_FORMAT ((__printf__, 3, 4));
/** GNU Bison extension not valid with POSIX Yacc. */
void bison_directive (location const *loc, char const *directive);
/** Report an obsolete syntax, suggest the updated one. */
void deprecated_directive (location const *loc,
char const *obsolete, char const *updated);
/** Report a repeated directive. */
void duplicate_directive (char const *directive,
location first, location second);
/** Report a repeated directive for a rule. */
void duplicate_rule_directive (char const *directive,
location first, location second);
/** Report a syntax error, where argv[0] is the unexpected
token, and argv[1...argc] are the expected ones. */
void syntax_error (location loc,
int argc, const char* argv[]);
/** Warnings treated as errors shouldn't stop the execution as regular
errors should (because due to their nature, it is safe to go
on). Thus, there are three possible execution statuses. */
typedef enum
{
status_none, /**< No diagnostic issued so far. */
status_warning_as_error, /**< A warning was issued (but no error). */
status_complaint /**< An error was issued. */
} err_status;
/** Whether an error was reported. */
extern err_status complaint_status;
#endif /* !COMPLAIN_H_ */
@@ -0,0 +1,767 @@
/* Find and resolve or report lookahead conflicts for bison,
Copyright (C) 1984, 1989, 1992, 2000-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include "complain.h"
#include "conflicts.h"
#include "counterexample.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "lr0.h"
#include "print-xml.h"
#include "reader.h"
#include "state.h"
#include "symtab.h"
/* -1 stands for not specified. */
int expected_sr_conflicts = -1;
int expected_rr_conflicts = -1;
/* CONFLICTS[STATE-NUM] -- Whether that state has unresolved conflicts. */
static bool *conflicts;
static struct obstack solved_conflicts_obstack;
static struct obstack solved_conflicts_xml_obstack;
static bitset shift_set;
static bitset lookahead_set;
bool
has_conflicts (const state *s)
{
return conflicts[s->number];
}
enum conflict_resolution
{
shift_resolution,
reduce_resolution,
left_resolution,
right_resolution,
nonassoc_resolution
};
/*----------------------------------------------------------------.
| Explain how an SR conflict between TOKEN and RULE was resolved: |
| RESOLUTION. |
`----------------------------------------------------------------*/
static inline void
log_resolution (rule *r, symbol_number token,
enum conflict_resolution resolution)
{
if (report_flag & report_solved_conflicts)
{
/* The description of the resolution. */
switch (resolution)
{
case shift_resolution:
case right_resolution:
obstack_sgrow (&solved_conflicts_obstack, " ");
obstack_printf (&solved_conflicts_obstack,
_("Conflict between rule %d and token %s"
" resolved as shift"),
r->number,
symbols[token]->tag);
break;
case reduce_resolution:
case left_resolution:
obstack_sgrow (&solved_conflicts_obstack, " ");
obstack_printf (&solved_conflicts_obstack,
_("Conflict between rule %d and token %s"
" resolved as reduce"),
r->number,
symbols[token]->tag);
break;
case nonassoc_resolution:
obstack_sgrow (&solved_conflicts_obstack, " ");
obstack_printf (&solved_conflicts_obstack,
_("Conflict between rule %d and token %s"
" resolved as an error"),
r->number,
symbols[token]->tag);
break;
}
/* The reason. */
switch (resolution)
{
case shift_resolution:
obstack_printf (&solved_conflicts_obstack,
" (%s < %s)",
r->prec->symbol->tag,
symbols[token]->tag);
break;
case reduce_resolution:
obstack_printf (&solved_conflicts_obstack,
" (%s < %s)",
symbols[token]->tag,
r->prec->symbol->tag);
break;
case left_resolution:
obstack_printf (&solved_conflicts_obstack,
" (%%left %s)",
symbols[token]->tag);
break;
case right_resolution:
obstack_printf (&solved_conflicts_obstack,
" (%%right %s)",
symbols[token]->tag);
break;
case nonassoc_resolution:
obstack_printf (&solved_conflicts_obstack,
" (%%nonassoc %s)",
symbols[token]->tag);
break;
}
obstack_sgrow (&solved_conflicts_obstack, ".\n");
}
/* XML report */
if (xml_flag)
{
/* The description of the resolution. */
switch (resolution)
{
case shift_resolution:
case right_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
" <resolution rule=\"%d\" symbol=\"%s\""
" type=\"shift\">",
r->number,
xml_escape (symbols[token]->tag));
break;
case reduce_resolution:
case left_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
" <resolution rule=\"%d\" symbol=\"%s\""
" type=\"reduce\">",
r->number,
xml_escape (symbols[token]->tag));
break;
case nonassoc_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
" <resolution rule=\"%d\" symbol=\"%s\""
" type=\"error\">",
r->number,
xml_escape (symbols[token]->tag));
break;
}
/* The reason. */
switch (resolution)
{
case shift_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
"%s &lt; %s",
xml_escape_n (0, r->prec->symbol->tag),
xml_escape_n (1, symbols[token]->tag));
break;
case reduce_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
"%s &lt; %s",
xml_escape_n (0, symbols[token]->tag),
xml_escape_n (1, r->prec->symbol->tag));
break;
case left_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
"%%left %s",
xml_escape (symbols[token]->tag));
break;
case right_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
"%%right %s",
xml_escape (symbols[token]->tag));
break;
case nonassoc_resolution:
obstack_printf (&solved_conflicts_xml_obstack,
"%%nonassoc %s",
xml_escape (symbols[token]->tag));
break;
}
obstack_sgrow (&solved_conflicts_xml_obstack, "</resolution>\n");
}
}
/*------------------------------------------------------------------.
| Turn off the shift recorded for the specified token in the |
| specified state. Used when we resolve a shift/reduce conflict in |
| favor of the reduction or as an error (%nonassoc). |
`------------------------------------------------------------------*/
static void
flush_shift (state *s, int token)
{
transitions *trans = s->transitions;
bitset_reset (lookahead_set, token);
for (int i = 0; i < trans->num; ++i)
if (!TRANSITION_IS_DISABLED (trans, i)
&& TRANSITION_SYMBOL (trans, i) == token)
TRANSITION_DISABLE (trans, i);
}
/*--------------------------------------------------------------------.
| Turn off the reduce recorded for the specified token in the |
| specified lookahead set. Used when we resolve a shift/reduce |
| conflict in favor of the shift or as an error (%nonassoc). |
`--------------------------------------------------------------------*/
static void
flush_reduce (bitset lookaheads, int token)
{
bitset_reset (lookaheads, token);
}
/*------------------------------------------------------------------.
| Attempt to resolve shift/reduce conflict for one rule by means of |
| precedence declarations. It has already been checked that the |
| rule has a precedence. A conflict is resolved by modifying the |
| shift or reduce tables so that there is no longer a conflict. |
| |
| RULENO is the number of the lookahead bitset to consider. |
| |
| ERRORS and NERRS can be used to store discovered explicit |
| errors. |
`------------------------------------------------------------------*/
static void
resolve_sr_conflict (state *s, int ruleno, symbol **errors, int *nerrs)
{
reductions *reds = s->reductions;
/* Find the rule to reduce by to get precedence of reduction. */
rule *redrule = reds->rules[ruleno];
int redprec = redrule->prec->prec;
bitset lookaheads = reds->lookaheads[ruleno];
for (symbol_number i = 0; i < ntokens; ++i)
if (bitset_test (lookaheads, i)
&& bitset_test (lookahead_set, i)
&& symbols[i]->content->prec)
{
/* Shift/reduce conflict occurs for token number i
and it has a precedence.
The precedence of shifting is that of token i. */
if (symbols[i]->content->prec < redprec)
{
register_precedence (redrule->prec->number, i);
log_resolution (redrule, i, reduce_resolution);
flush_shift (s, i);
}
else if (symbols[i]->content->prec > redprec)
{
register_precedence (i, redrule->prec->number);
log_resolution (redrule, i, shift_resolution);
flush_reduce (lookaheads, i);
}
else
/* Matching precedence levels.
For non-defined associativity, keep both: unexpected
associativity conflict.
For left associativity, keep only the reduction.
For right associativity, keep only the shift.
For nonassociativity, keep neither. */
switch (symbols[i]->content->assoc)
{
case undef_assoc:
abort ();
case precedence_assoc:
break;
case right_assoc:
register_assoc (i, redrule->prec->number);
log_resolution (redrule, i, right_resolution);
flush_reduce (lookaheads, i);
break;
case left_assoc:
register_assoc (i, redrule->prec->number);
log_resolution (redrule, i, left_resolution);
flush_shift (s, i);
break;
case non_assoc:
register_assoc (i, redrule->prec->number);
log_resolution (redrule, i, nonassoc_resolution);
flush_shift (s, i);
flush_reduce (lookaheads, i);
/* Record an explicit error for this token. */
errors[(*nerrs)++] = symbols[i];
break;
}
}
}
/*-------------------------------------------------------------------.
| Solve the S/R conflicts of state S using the |
| precedence/associativity, and flag it inconsistent if it still has |
| conflicts. ERRORS can be used as storage to compute the list of |
| lookahead tokens on which S raises a syntax error (%nonassoc). |
`-------------------------------------------------------------------*/
static void
set_conflicts (state *s, symbol **errors)
{
if (s->consistent)
return;
reductions *reds = s->reductions;
int nerrs = 0;
bitset_zero (lookahead_set);
{
transitions *trans = s->transitions;
int i;
FOR_EACH_SHIFT (trans, i)
bitset_set (lookahead_set, TRANSITION_SYMBOL (trans, i));
}
/* Loop over all rules which require lookahead in this state. First
check for shift/reduce conflict, and try to resolve using
precedence. */
for (int i = 0; i < reds->num; ++i)
if (reds->rules[i]->prec
&& reds->rules[i]->prec->prec
&& !bitset_disjoint_p (reds->lookaheads[i], lookahead_set))
resolve_sr_conflict (s, i, errors, &nerrs);
if (nerrs)
/* Some tokens have been explicitly made errors. Allocate a
permanent errs structure for this state, to record them. */
state_errs_set (s, nerrs, errors);
if (obstack_object_size (&solved_conflicts_obstack))
s->solved_conflicts = obstack_finish0 (&solved_conflicts_obstack);
if (obstack_object_size (&solved_conflicts_xml_obstack))
s->solved_conflicts_xml = obstack_finish0 (&solved_conflicts_xml_obstack);
/* Loop over all rules which require lookahead in this state. Check
for conflicts not resolved above.
reds->lookaheads can be NULL if the LR type is LR(0). */
if (reds->lookaheads)
for (int i = 0; i < reds->num; ++i)
{
if (!bitset_disjoint_p (reds->lookaheads[i], lookahead_set))
conflicts[s->number] = true;
bitset_or (lookahead_set, lookahead_set, reds->lookaheads[i]);
}
}
/*----------------------------------------------------------------.
| Solve all the S/R conflicts using the precedence/associativity, |
| and flag as inconsistent the states that still have conflicts. |
`----------------------------------------------------------------*/
void
conflicts_solve (void)
{
/* List of lookahead tokens on which we explicitly raise a syntax error. */
symbol **errors = xnmalloc (ntokens + 1, sizeof *errors);
conflicts = xcalloc (nstates, sizeof *conflicts);
shift_set = bitset_create (ntokens, BITSET_FIXED);
lookahead_set = bitset_create (ntokens, BITSET_FIXED);
obstack_init (&solved_conflicts_obstack);
obstack_init (&solved_conflicts_xml_obstack);
for (state_number i = 0; i < nstates; ++i)
{
set_conflicts (states[i], errors);
/* For uniformity of the code, make sure all the states have a valid
'errs' member. */
if (!states[i]->errs)
states[i]->errs = errs_new (0, 0);
}
free (errors);
}
void
conflicts_update_state_numbers (state_number old_to_new[],
state_number nstates_old)
{
for (state_number i = 0; i < nstates_old; ++i)
if (old_to_new[i] != nstates_old)
conflicts[old_to_new[i]] = conflicts[i];
}
/*---------------------------------------------.
| Count the number of shift/reduce conflicts. |
`---------------------------------------------*/
static size_t
count_state_sr_conflicts (const state *s)
{
transitions *trans = s->transitions;
reductions *reds = s->reductions;
if (!trans)
return 0;
bitset_zero (lookahead_set);
bitset_zero (shift_set);
{
int i;
FOR_EACH_SHIFT (trans, i)
bitset_set (shift_set, TRANSITION_SYMBOL (trans, i));
}
for (int i = 0; i < reds->num; ++i)
bitset_or (lookahead_set, lookahead_set, reds->lookaheads[i]);
bitset_and (lookahead_set, lookahead_set, shift_set);
return bitset_count (lookahead_set);
}
/*---------------------------------------------.
| The total number of shift/reduce conflicts. |
`---------------------------------------------*/
static size_t
count_sr_conflicts (void)
{
size_t res = 0;
/* Conflicts by state. */
for (state_number i = 0; i < nstates; ++i)
if (conflicts[i])
res += count_state_sr_conflicts (states[i]);
return res;
}
/*-----------------------------------------------------------------.
| Count the number of reduce/reduce conflicts. Count one conflict |
| for each reduction after the first for a given token. |
`-----------------------------------------------------------------*/
static size_t
count_state_rr_conflicts (const state *s)
{
reductions *reds = s->reductions;
size_t res = 0;
for (symbol_number i = 0; i < ntokens; ++i)
{
int count = 0;
for (int j = 0; j < reds->num; ++j)
count += bitset_test (reds->lookaheads[j], i);
if (2 <= count)
res += count-1;
}
return res;
}
static size_t
count_rr_conflicts (void)
{
size_t res = 0;
/* Conflicts by state. */
for (state_number i = 0; i < nstates; ++i)
if (conflicts[i])
res += count_state_rr_conflicts (states[i]);
return res;
}
/*------------------------------------------------------------------.
| For a given rule, the number of shift/reduce conflicts in a given |
| state. |
`------------------------------------------------------------------*/
static size_t
count_rule_state_sr_conflicts (rule *r, state *s)
{
size_t res = 0;
transitions *trans = s->transitions;
reductions *reds = s->reductions;
for (int i = 0; i < reds->num; ++i)
if (reds->rules[i] == r)
{
bitset lookaheads = reds->lookaheads[i];
int j;
FOR_EACH_SHIFT (trans, j)
res += bitset_test (lookaheads, TRANSITION_SYMBOL (trans, j));
}
return res;
}
/*----------------------------------------------------------------------.
| For a given rule, count the number of states for which it is involved |
| in shift/reduce conflicts. |
`----------------------------------------------------------------------*/
static size_t
count_rule_sr_conflicts (rule *r)
{
size_t res = 0;
for (state_number i = 0; i < nstates; ++i)
if (conflicts[i])
res += count_rule_state_sr_conflicts (r, states[i]);
return res;
}
/*-----------------------------------------------------------------.
| For a given rule, count the number of states in which it is |
| involved in reduce/reduce conflicts. |
`-----------------------------------------------------------------*/
static size_t
count_rule_state_rr_conflicts (rule *r, state *s)
{
size_t res = 0;
const reductions *reds = s->reductions;
bitset lookaheads = bitset_create (ntokens, BITSET_FIXED);
for (int i = 0; i < reds->num; ++i)
if (reds->rules[i] == r)
for (int j = 0; j < reds->num; ++j)
if (reds->rules[j] != r)
{
bitset_and (lookaheads,
reds->lookaheads[i],
reds->lookaheads[j]);
res += bitset_count (lookaheads);
}
bitset_free (lookaheads);
return res;
}
static size_t
count_rule_rr_conflicts (rule *r)
{
size_t res = 0;
for (state_number i = 0; i < nstates; ++i)
res += count_rule_state_rr_conflicts (r, states[i]);
return res;
}
/*-----------------------------------------------------------.
| Output the detailed description of states with conflicts. |
`-----------------------------------------------------------*/
void
conflicts_output (FILE *out)
{
bool printed_sth = false;
for (state_number i = 0; i < nstates; ++i)
if (conflicts[i])
{
const state *s = states[i];
int src = count_state_sr_conflicts (s);
int rrc = count_state_rr_conflicts (s);
fprintf (out, _("State %d "), i);
if (src && rrc)
fprintf (out,
_("conflicts: %d shift/reduce, %d reduce/reduce\n"),
src, rrc);
else if (src)
fprintf (out, _("conflicts: %d shift/reduce\n"), src);
else if (rrc)
fprintf (out, _("conflicts: %d reduce/reduce\n"), rrc);
printed_sth = true;
}
if (printed_sth)
fputs ("\n\n", out);
}
/*--------------------------------------------.
| Total the number of S/R and R/R conflicts. |
`--------------------------------------------*/
int
conflicts_total_count (void)
{
return count_sr_conflicts () + count_rr_conflicts ();
}
static void
report_counterexamples (void)
{
for (state_number sn = 0; sn < nstates; ++sn)
if (conflicts[sn])
counterexample_report_state (states[sn], stderr, "");
}
/*------------------------------------------------.
| Report per-rule %expect/%expect-rr mismatches. |
`------------------------------------------------*/
static void
report_rule_expectation_mismatches (void)
{
for (rule_number i = 0; i < nrules; i += 1)
{
rule *r = &rules[i];
int expected_sr = r->expected_sr_conflicts;
int expected_rr = r->expected_rr_conflicts;
if (expected_sr != -1 || expected_rr != -1)
{
int sr = count_rule_sr_conflicts (r);
if (sr != expected_sr && (sr != 0 || expected_sr != -1))
complain (&r->location, complaint,
_("shift/reduce conflicts for rule %d:"
" %d found, %d expected"),
r->code, sr, expected_sr);
int rr = count_rule_rr_conflicts (r);
if (rr != expected_rr && (rr != 0 || expected_rr != -1))
complain (&r->location, complaint,
_("reduce/reduce conflicts for rule %d:"
" %d found, %d expected"),
r->code, rr, expected_rr);
}
}
}
/*---------------------------------.
| Reporting numbers of conflicts. |
`---------------------------------*/
void
conflicts_print (void)
{
report_rule_expectation_mismatches ();
if (! glr_parser && expected_rr_conflicts != -1)
{
complain (NULL, Wother, _("%%expect-rr applies only to GLR parsers"));
expected_rr_conflicts = -1;
}
// The warning flags used to emit a diagnostic, if we did.
warnings unexpected_conflicts_warning = Wnone;
/* The following two blocks scream for factoring, but i18n support
would make it ugly. */
{
int total = count_sr_conflicts ();
/* If %expect is not used, but %expect-rr is, then expect 0 sr. */
int expected =
(expected_sr_conflicts == -1 && expected_rr_conflicts != -1)
? 0
: expected_sr_conflicts;
if (expected != -1)
{
if (expected != total)
{
complain (NULL, complaint,
_("shift/reduce conflicts: %d found, %d expected"),
total, expected);
if (total)
unexpected_conflicts_warning = complaint;
}
}
else if (total)
{
complain (NULL, Wconflicts_sr,
ngettext ("%d shift/reduce conflict",
"%d shift/reduce conflicts",
total),
total);
unexpected_conflicts_warning = Wconflicts_sr;
}
}
{
int total = count_rr_conflicts ();
/* If %expect-rr is not used, but %expect is, then expect 0 rr. */
int expected =
(expected_rr_conflicts == -1 && expected_sr_conflicts != -1)
? 0
: expected_rr_conflicts;
if (expected != -1)
{
if (expected != total)
{
complain (NULL, complaint,
_("reduce/reduce conflicts: %d found, %d expected"),
total, expected);
if (total)
unexpected_conflicts_warning = complaint;
}
}
else if (total)
{
complain (NULL, Wconflicts_rr,
ngettext ("%d reduce/reduce conflict",
"%d reduce/reduce conflicts",
total),
total);
unexpected_conflicts_warning = Wconflicts_rr;
}
}
if (warning_is_enabled (Wcounterexamples))
report_counterexamples ();
else if (unexpected_conflicts_warning != Wnone)
subcomplain (NULL, unexpected_conflicts_warning,
_("rerun with option '-Wcounterexamples'"
" to generate conflict counterexamples"));
}
void
conflicts_free (void)
{
free (conflicts);
bitset_free (shift_set);
bitset_free (lookahead_set);
obstack_free (&solved_conflicts_obstack, NULL);
obstack_free (&solved_conflicts_xml_obstack, NULL);
}
@@ -0,0 +1,50 @@
/* Find and resolve or report lookahead conflicts for bison,
Copyright (C) 2000-2002, 2004, 2007, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef CONFLICTS_H_
# define CONFLICTS_H_
# include "state.h"
void conflicts_solve (void);
/**
* Update state numbers recorded in internal arrays such that:
* - \c nstates_old is the old number of states.
* - Where \c i is the old state number, <tt>old_to_new[i]</tt> is either:
* - \c nstates_old if state \c i is removed because it is unreachable.
* - The new state number.
* - The highest new state number is the number of remaining states - 1.
* - The numerical order of the remaining states has not changed.
*/
void conflicts_update_state_numbers (state_number old_to_new[],
state_number nstates_old);
void conflicts_print (void);
int conflicts_total_count (void);
void conflicts_output (FILE *out);
void conflicts_free (void);
bool has_conflicts (const state *s);
/* Were there conflicts? */
extern int expected_sr_conflicts;
extern int expected_rr_conflicts;
#endif /* !CONFLICTS_H_ */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,36 @@
/* Conflict counterexample generation
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef COUNTEREXAMPLE_H
# define COUNTEREXAMPLE_H
# include "state.h"
// Init/deinit this module.
void counterexample_init (void);
void counterexample_free (void);
// Print the counterexamples for the conflicts of state S.
//
// Used both for the warnings on the terminal (OUT = stderr, PREFIX =
// ""), and for the reports (OUT != stderr, PREFIX != "").
void
counterexample_report_state (const state *s, FILE *out, const char *prefix);
#endif /* COUNTEREXAMPLE_H */
@@ -0,0 +1,476 @@
/* Counterexample derivation trees
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "derivation.h"
#include "glyphs.h"
#include <c-ctype.h>
#include <gl_linked_list.h>
#include <mbswidth.h>
#include <vasnprintf.h>
#include "system.h"
#include "complain.h"
struct derivation
{
symbol_number sym;
derivation_list children;
int reference_count;
// The rule SYM -> CHILDREN.
const rule *rule;
// Color assigned for styling. Guarantees that the derivation is
// always displayed with the same color, independently of the order
// in which the derivations are traversed.
int color;
};
static derivation d_dot = { -1, NULL, -1, NULL, -1 };
derivation *
derivation_dot (void)
{
return &d_dot;
}
void
derivation_list_append (derivation_list dl, derivation *d)
{
derivation_retain (d);
gl_list_add_last (dl, d);
}
void
derivation_list_prepend (derivation_list dl, derivation *d)
{
derivation_retain (d);
gl_list_add_first (dl, d);
}
void derivation_list_free (derivation_list dl)
{
derivation *d = NULL;
for (gl_list_iterator_t it = gl_list_iterator (dl);
derivation_list_next (&it, &d);
)
if (d != &d_dot)
derivation_free (d);
gl_list_free (dl);
}
derivation *
derivation_new (symbol_number sym, derivation_list children,
const rule *r)
{
derivation *res = xmalloc (sizeof *res);
res->sym = sym;
res->children = children;
res->reference_count = 0;
res->rule = r;
res->color = -1;
return res;
}
void
derivation_retain (derivation *d)
{
++d->reference_count;
}
void
derivation_free (derivation *d)
{
if (!d)
return;
derivation_list free_queue =
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true,
1, (const void **)&d);
while (gl_list_size (free_queue) > 0)
{
derivation *deriv = (derivation *) gl_list_get_at (free_queue, 0);
if (--deriv->reference_count == 0)
{
if (deriv->children)
{
derivation *child = NULL;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
if (child != &d_dot)
gl_list_add_last (free_queue, child);
gl_list_free (deriv->children);
}
free (deriv);
}
gl_list_remove_at (free_queue, 0);
}
gl_list_free (free_queue);
}
size_t
derivation_size (const derivation *deriv)
{
if (!deriv->children)
return 1;
int size = 1;
derivation *child = NULL;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
size += derivation_size (child);
return size;
}
// Longest distance from root to leaf.
static int
derivation_depth (const derivation *deriv)
{
if (deriv->children)
{
// Children's depth cannot be 0, even if there are no children
// (the case of a derivation with an empty RHS).
int res = 1;
derivation *child;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
res = max_int (res, derivation_depth (child));
return res + 1;
}
else
return 1;
}
static bool
all_spaces (const char *s)
{
while (c_isspace (*s))
s++;
return *s == '\0';
}
// Printing the derivation as trees without trailing spaces is
// painful: we cannot simply pad one "column" before moving to the
// next:
//
// exp
// ↳ x1 e1 foo1 x1
// ↳ x2 ↳ ε ↳ foo2 ↳ x2
// ↳ x3 ↳ foo3 ↳ x3
// ↳ "X" • ↳ x1 foo4 ↳ "X"
// ↳ x2 ↳ "quuux"
// ↳ x3
// ↳ "X"
//
// It's hard for a column to know that it's "last" to decide whether
// to output the right-padding or not. So when we need to pad on the
// right to complete a column, we don't output the spaces, we
// accumulate the width of padding in *PADDING.
//
// Each time we actually print something (non space), we flush that
// padding. When we _don't_ print something, its width is added to
// the current padding.
//
// This function implements this.
//
// When COND is true, put S on OUT, preceded by *PADDING white spaces.
// Otherwise add the width to *PADDING. Return the width of S.
static int
fputs_if (bool cond, FILE *out, int *padding, const char *s)
{
int res = mbswidth (s, 0);
if (cond && !all_spaces (s))
{
fprintf (out, "%*s%s", *padding, "", s);
*padding = 0;
}
else
{
*padding += res;
}
return res;
}
static int
fprintf_if (bool cond, FILE *out, int *padding, const char *fmt, ...)
{
char buf[256];
size_t len = sizeof (buf);
va_list args;
va_start (args, fmt);
char *cp = vasnprintf (buf, &len, fmt, args);
va_end (args);
if (!cp)
xalloc_die ();
int res = fputs_if (cond, out, padding, cp);
if (cp != buf)
free (cp);
return res;
}
// The width taken to report this derivation recursively down to its
// leaves.
static int
derivation_width (const derivation *deriv)
{
if (deriv->children)
{
const symbol *sym = symbols[deriv->sym];
int self_width = mbswidth (sym->tag, 0);
// Arrow and space.
int children_width = down_arrow_width;
children_width += snprintf (NULL, 0, "%d: ", deriv->rule->number);
if (gl_list_size (deriv->children) == 0)
// Empty rhs.
children_width += empty_width;
else
{
if (gl_list_size (deriv->children) == 1
&& gl_list_get_first (deriv->children) == &d_dot)
{
children_width += empty_width;
children_width += derivation_separator_width;
}
derivation *child;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
children_width
+= derivation_separator_width + derivation_width (child);
// No separator at the beginning.
children_width -= derivation_separator_width;
}
return max_int (self_width, children_width);
}
else if (deriv == &d_dot)
{
return dot_width;
}
else // leaf.
{
const symbol *sym = symbols[deriv->sym];
return mbswidth (sym->tag, 0);
}
}
// Print DERIV for DEPTH.
//
// The tree is printed from top to bottom with DEPTH ranging from 0 to
// the total depth of the tree. DERIV should only printed when we
// reach its depth, i.e., then DEPTH is 0.
//
// When DEPTH is 1 and we're on a subderivation, then we print the RHS
// of the derivation (in DEPTH 0 we printed its LHS).
//
// Return the "logical printed" width. We might have not have reached
// that width, in which case the missing spaces are in *PADDING.
static int
derivation_print_tree_impl (const derivation *deriv, FILE *out,
int depth, int *padding)
{
const int width = derivation_width (deriv);
int res = 0;
if (deriv->children)
{
const symbol *sym = symbols[deriv->sym];
char style[20];
snprintf (style, 20, "cex-%d", deriv->color);
if (depth == 0 || depth == 1)
{
begin_use_class (style, out);
begin_use_class ("cex-step", out);
}
if (depth == 0)
{
res += fputs_if (true, out, padding, sym->tag);
}
else
{
res += fputs_if (depth == 1, out, padding, down_arrow);
res += fprintf_if (depth == 1, out, padding, "%d: ", deriv->rule->number);
if (gl_list_size (deriv->children) == 0)
// Empty rhs.
res += fputs_if (depth == 1, out, padding, empty);
else
{
if (gl_list_size (deriv->children) == 1
&& gl_list_get_first (deriv->children) == &d_dot)
{
res += fputs_if (depth == 1, out, padding, empty);
res += fputs_if (depth == 1, out, padding, derivation_separator);
}
bool first = true;
derivation *child;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
{
if (!first)
res += fputs_if (depth == 1, out, padding, derivation_separator);
res += derivation_print_tree_impl (child, out, depth - 1, padding);
first = false;
}
}
}
if (depth == 0 || depth == 1)
{
end_use_class ("cex-step", out);
end_use_class (style, out);
}
*padding += width - res;
res = width;
}
else if (deriv == &d_dot)
{
if (depth == 0)
begin_use_class ("cex-dot", out);
res += fputs_if (depth == 0, out, padding, dot);
if (depth == 0)
end_use_class ("cex-dot", out);
}
else // leaf.
{
const symbol *sym = symbols[deriv->sym];
if (depth == 0)
begin_use_class ("cex-leaf", out);
res += fputs_if (depth == 0, out, padding, sym->tag);
if (depth == 0)
end_use_class ("cex-leaf", out);
}
return res;
}
static void
derivation_print_tree (const derivation *deriv, FILE *out, const char *prefix)
{
fputc ('\n', out);
for (int depth = 0, max_depth = derivation_depth (deriv);
depth < max_depth; ++depth)
{
int padding = 0;
fprintf (out, " %s", prefix);
derivation_print_tree_impl (deriv, out, depth, &padding);
fputc ('\n', out);
}
}
/* Print DERIV, colored according to COUNTER.
Return false if nothing is printed. */
static bool
derivation_print_flat_impl (derivation *deriv, FILE *out,
bool leaves_only,
int *counter, const char *prefix)
{
if (deriv->children)
{
const symbol *sym = symbols[deriv->sym];
deriv->color = *counter;
++*counter;
char style[20];
snprintf (style, 20, "cex-%d", deriv->color);
begin_use_class (style, out);
if (!leaves_only)
{
fputs (prefix, out);
begin_use_class ("cex-step", out);
fprintf (out, "%s %s [ ", sym->tag, arrow);
end_use_class ("cex-step", out);
prefix = "";
}
bool res = false;
derivation *child;
for (gl_list_iterator_t it = gl_list_iterator (deriv->children);
derivation_list_next (&it, &child);
)
{
if (derivation_print_flat_impl (child, out,
leaves_only, counter, prefix))
{
prefix = " ";
res = true;
}
else if (!leaves_only)
prefix = " ";
}
if (!leaves_only)
{
begin_use_class ("cex-step", out);
if (res)
fputs (" ]", out);
else
fputs ("]", out);
end_use_class ("cex-step", out);
}
end_use_class (style, out);
return res;
}
else if (deriv == &d_dot)
{
fputs (prefix, out);
begin_use_class ("cex-dot", out);
fputs (dot, out);
end_use_class ("cex-dot", out);
}
else // leaf.
{
fputs (prefix, out);
const symbol *sym = symbols[deriv->sym];
begin_use_class ("cex-leaf", out);
fprintf (out, "%s", sym->tag);
end_use_class ("cex-leaf", out);
}
return true;
}
static void
derivation_print_flat (const derivation *deriv, FILE *out, const char *prefix)
{
int counter = 0;
fputs (prefix, out);
derivation_print_flat_impl ((derivation *)deriv, out, false, &counter, "");
fputc ('\n', out);
}
void
derivation_print_leaves (const derivation *deriv, FILE *out)
{
int counter = 0;
derivation_print_flat_impl ((derivation *)deriv, out, true, &counter, "");
fputc ('\n', out);
}
void
derivation_print (const derivation *deriv, FILE *out, const char *prefix)
{
if (getenv ("YYFLAT"))
derivation_print_flat (deriv, out, prefix);
else
derivation_print_tree (deriv, out, prefix);
}
@@ -0,0 +1,77 @@
/* Counterexample derivation trees
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef DERIVATION_H
# define DERIVATION_H
# include <gl_linked_list.h>
# include <gl_xlist.h>
# include "gram.h"
/* Derivations are trees of symbols such that each nonterminal's
children are symbols that produce that nonterminal if they are
relevant to the counterexample. The leaves of a derivation form a
counterexample when printed. */
typedef gl_list_t derivation_list;
typedef struct derivation derivation;
static inline derivation_list derivation_list_new (void)
{
return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
}
static inline bool
derivation_list_next (gl_list_iterator_t *it, derivation **d)
{
const void *p = NULL;
bool res = gl_list_iterator_next (it, &p, NULL);
if (res)
*d = (derivation *) p;
else
gl_list_iterator_free (it);
return res;
}
void derivation_list_append (derivation_list dl, derivation *d);
void derivation_list_prepend (derivation_list dl, derivation *d);
void derivation_list_free (derivation_list dl);
// rule_num is the number of the rule SYM -> CHILDREN.
derivation *
derivation_new (symbol_number sym, derivation_list children,
const rule *r);
static inline derivation *derivation_new_leaf (symbol_number sym)
{
return derivation_new (sym, NULL, NULL);
}
// Number of symbols.
size_t derivation_size (const derivation *deriv);
void derivation_print (const derivation *deriv, FILE *out, const char *prefix);
void derivation_print_leaves (const derivation *deriv, FILE *out);
void derivation_free (derivation *deriv);
void derivation_retain (derivation *deriv);
// A derivation denoting the position of the dot.
derivation *derivation_dot (void);
#endif /* DERIVATION_H */
@@ -0,0 +1,117 @@
/* Match rules with nonterminals for bison,
Copyright (C) 1984, 1989, 2000-2003, 2005, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "getargs.h"
#include "derives.h"
#include "gram.h"
#include "reader.h"
#include "symtab.h"
/* Linked list of rule numbers. */
typedef struct rule_list
{
struct rule_list *next;
rule *value;
} rule_list;
rule ***derives;
static void
print_derives (void)
{
fputs ("DERIVES\n", stderr);
for (symbol_number i = ntokens; i < nsyms; ++i)
{
fprintf (stderr, " %s derives\n", symbols[i]->tag);
for (rule **rp = derives[i - ntokens]; *rp; ++rp)
{
fprintf (stderr, " %3d ", (*rp)->code);
rule_rhs_print (*rp, stderr);
fprintf (stderr, "\n");
}
}
fputs ("\n\n", stderr);
}
void
derives_compute (void)
{
/* DSET[NTERM - NTOKENS] -- A linked list of the numbers of the rules
whose LHS is NTERM. */
rule_list **dset = xcalloc (nnterms, sizeof *dset);
/* DELTS[RULE] -- There are NRULES rule number to attach to nterms.
Instead of performing NRULES allocations for each, have an array
indexed by rule numbers. */
rule_list *delts = xnmalloc (nrules, sizeof *delts);
for (rule_number r = nrules - 1; r >= 0; --r)
{
symbol_number lhs = rules[r].lhs->number;
rule_list *p = &delts[r];
/* A new LHS is found. */
p->next = dset[lhs - ntokens];
p->value = &rules[r];
dset[lhs - ntokens] = p;
}
/* DSET contains what we need under the form of a linked list. Make
it a single array. */
derives = xnmalloc (nnterms, sizeof *derives);
/* Q is the storage for DERIVES[...] (DERIVES[0] = q). */
rule **q = xnmalloc (nnterms + nrules, sizeof *q);
for (symbol_number i = ntokens; i < nsyms; ++i)
{
rule_list *p = dset[i - ntokens];
derives[i - ntokens] = q;
while (p)
{
*q++ = p->value;
p = p->next;
}
*q++ = NULL;
}
if (trace_flag & trace_sets)
print_derives ();
free (dset);
free (delts);
}
void
derives_free (void)
{
if (derives)
{
free (derives[0]);
free (derives);
}
}
@@ -0,0 +1,35 @@
/* Match rules with nonterminals for bison,
Copyright (C) 1984, 1989, 2000-2002, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef DERIVES_H_
# define DERIVES_H_
# include "gram.h"
/* DERIVES[SYMBOL - NTOKENS] points to a vector of the rules that
SYMBOL derives, terminated with NULL. */
extern rule ***derives;
/* Compute DERIVES. */
void derives_compute (void);
void derives_free (void);
#endif /* !DERIVES_H_ */
+597
View File
@@ -0,0 +1,597 @@
/* Open and close files for Bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <configmake.h> /* PKGDATADIR */
#include <dirname.h>
#include <error.h>
#include <get-errno.h>
#include <gl_array_list.h>
#include <gl_hash_map.h>
#include <gl_xlist.h>
#include <gl_xmap.h>
#include <quote.h>
#include <quotearg.h>
#include <relocatable.h> /* relocate2 */
#include <stdio-safer.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <xstrndup.h>
#include "complain.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
/* Initializing some values below (such SPEC_NAME_PREFIX to 'yy') is
tempting, but don't do that: for the time being our handling of the
%directive vs --option leaves precedence to the options by deciding
that if a %directive sets a variable which is really set (i.e., not
NULL), then the %directive is ignored. As a result, %name-prefix,
for instance, will not be honored. */
char const *spec_outfile = NULL; /* for -o. */
char const *spec_file_prefix = NULL; /* for -b. */
location spec_file_prefix_loc = EMPTY_LOCATION_INIT;
char const *spec_name_prefix = NULL; /* for -p. */
location spec_name_prefix_loc = EMPTY_LOCATION_INIT;
char *spec_verbose_file = NULL; /* for --verbose. */
char *spec_graph_file = NULL; /* for -g. */
char *spec_html_file = NULL; /* for --html. */
char *spec_xml_file = NULL; /* for -x. */
char *spec_header_file = NULL; /* for --header. */
char *parser_file_name;
/* All computed output file names. */
typedef struct generated_file
{
/** File name. */
char *name;
/** Whether is a generated source file (e.g., *.c, *.java...), as
opposed to the report file (e.g., *.output). When late errors
are detected, generated source files are removed. */
bool is_source;
} generated_file;
static generated_file *generated_files = NULL;
static int generated_files_size = 0;
uniqstr grammar_file = NULL;
/* If --output=dir/foo.c was specified,
DIR_PREFIX gis 'dir/' and ALL_BUT_EXT and ALL_BUT_TAB_EXT are 'dir/foo'.
If --output=dir/foo.tab.c was specified, DIR_PREFIX is 'dir/',
ALL_BUT_EXT is 'dir/foo.tab', and ALL_BUT_TAB_EXT is 'dir/foo'.
If --output was not specified but --file-prefix=dir/foo was specified,
ALL_BUT_EXT = 'foo.tab' and ALL_BUT_TAB_EXT = 'foo'.
If neither --output nor --file was specified but the input grammar
is name dir/foo.y, ALL_BUT_EXT and ALL_BUT_TAB_EXT are 'foo'.
If neither --output nor --file was specified, DIR_PREFIX is the
empty string (meaning the current directory); otherwise it is
'dir/'. */
char *all_but_ext;
static char *all_but_tab_ext;
char *dir_prefix;
/* C source file extension (the parser source). */
static char *src_extension = NULL;
/* Header file extension (if option '`-d'' is specified). */
static char *header_extension = NULL;
struct prefix_map
{
char *oldprefix;
char *newprefix;
};
static gl_list_t prefix_maps = NULL;
/* Map file names to prefix-mapped file names. */
static gl_map_t mapped_files = NULL;
/*-----------------------------------------------------------------.
| Return a newly allocated string composed of the concatenation of |
| STR1, and STR2. |
`-----------------------------------------------------------------*/
static char *
concat2 (char const *str1, char const *str2)
{
size_t len = strlen (str1) + strlen (str2);
char *res = xmalloc (len + 1);
char *cp;
cp = stpcpy (res, str1);
cp = stpcpy (cp, str2);
return res;
}
/*-----------------------------------------------------------------.
| Try to open file NAME with mode MODE, and print an error message |
| if fails. |
`-----------------------------------------------------------------*/
FILE *
xfopen (const char *name, const char *mode)
{
FILE *res = fopen_safer (name, mode);
if (!res)
error (EXIT_FAILURE, get_errno (),
_("%s: cannot open"), quotearg_colon (name));
return res;
}
/*-------------------------------------------------------------.
| Try to close file PTR, and print an error message if fails. |
`-------------------------------------------------------------*/
void
xfclose (FILE *ptr)
{
if (ptr == NULL)
return;
if (ferror (ptr))
error (EXIT_FAILURE, 0, _("input/output error"));
if (fclose (ptr) != 0)
error (EXIT_FAILURE, get_errno (), _("cannot close file"));
}
FILE *
xfdopen (int fd, char const *mode)
{
FILE *res = fdopen (fd, mode);
if (! res)
error (EXIT_FAILURE, get_errno (),
/* On a separate line to please the "unmarked_diagnostics"
syntax-check. */
"fdopen");
return res;
}
/* The mapped name of FILENAME, allocated, if there are prefix maps.
Otherwise NULL. */
static char *
map_file_name_alloc (char const *filename)
{
struct prefix_map const *p = NULL;
assert (prefix_maps);
{
void const *ptr;
gl_list_iterator_t iter = gl_list_iterator (prefix_maps);
while (gl_list_iterator_next (&iter, &ptr, NULL))
{
p = ptr;
if (strncmp (p->oldprefix, filename, strlen (p->oldprefix)) == 0)
break;
p = NULL;
}
gl_list_iterator_free (&iter);
}
if (!p)
return xstrdup (filename);
size_t oldprefix_len = strlen (p->oldprefix);
size_t newprefix_len = strlen (p->newprefix);
char *res = xmalloc (newprefix_len + strlen (filename) - oldprefix_len + 1);
char *end = stpcpy (res, p->newprefix);
stpcpy (end, filename + oldprefix_len);
return res;
}
static bool
string_equals (const void *x1, const void *x2)
{
const char *s1 = x1;
const char *s2 = x2;
return STREQ (s1, s2);
}
/* A hash function for NUL-terminated char* strings using
the method described by Bruno Haible.
See https://www.haible.de/bruno/hashfunc.html. */
static size_t
string_hash (const void *x)
{
#define SIZE_BITS (sizeof (size_t) * CHAR_BIT)
const char *s = x;
size_t h = 0;
for (; *s; s++)
h = *s + ((h << 9) | (h >> (SIZE_BITS - 9)));
return h;
}
static void
string_free (const void *cp)
{
void *p = (void*) cp;
free (p);
}
const char *
map_file_name (char const *filename)
{
if (!filename || !prefix_maps)
return filename;
if (!mapped_files)
mapped_files
= gl_map_nx_create_empty (GL_HASH_MAP,
string_equals, string_hash,
string_free, string_free);
const void *res = gl_map_get (mapped_files, filename);
if (!res)
{
res = map_file_name_alloc (filename);
gl_map_put (mapped_files, xstrdup (filename), res);
}
return res;
}
static void
prefix_map_free (struct prefix_map *p)
{
free (p->oldprefix);
free (p->newprefix);
free (p);
}
void
add_prefix_map (char const *oldprefix, char const *newprefix)
{
if (!prefix_maps)
prefix_maps
= gl_list_create_empty (GL_ARRAY_LIST,
/* equals */ NULL,
/* hashcode */ NULL,
(gl_listelement_dispose_fn) prefix_map_free,
true);
struct prefix_map *p = xmalloc (sizeof (*p));
p->oldprefix = xstrdup (oldprefix);
p->newprefix = xstrdup (newprefix);
gl_list_add_last (prefix_maps, p);
}
/*------------------------------------------------------------------.
| Compute ALL_BUT_EXT, ALL_BUT_TAB_EXT and output files extensions. |
`------------------------------------------------------------------*/
/* Compute extensions from the grammar file extension. */
static void
compute_exts_from_gf (const char *ext)
{
if (STREQ (ext, ".y"))
{
src_extension = xstrdup (language->src_extension);
header_extension = xstrdup (language->header_extension);
}
else
{
src_extension = xstrdup (ext);
header_extension = xstrdup (ext);
tr (src_extension, 'y', 'c');
tr (src_extension, 'Y', 'C');
tr (header_extension, 'y', 'h');
tr (header_extension, 'Y', 'H');
}
}
/* Compute extensions from the given c source file extension. */
static void
compute_exts_from_src (const char *ext)
{
/* We use this function when the user specifies `-o' or `--output',
so the extensions must be computed unconditionally from the file name
given by this option. */
src_extension = xstrdup (ext);
header_extension = xstrdup (ext);
tr (header_extension, 'c', 'h');
tr (header_extension, 'C', 'H');
}
/* Decompose FILE_NAME in four parts: *BASE, *TAB, and *EXT, the fourth
part, (the directory) is ranging from FILE_NAME to the char before
*BASE, so we don't need an additional parameter.
*EXT points to the last period in the basename, or NULL if none.
If there is no *EXT, *TAB is NULL. Otherwise, *TAB points to
'.tab' or '_tab' if present right before *EXT, or is NULL. *TAB
cannot be equal to *BASE.
None are allocated, they are simply pointers to parts of FILE_NAME.
Examples:
'/tmp/foo.tab.c' -> *BASE = 'foo.tab.c', *TAB = '.tab.c', *EXT =
'.c'
'foo.c' -> *BASE = 'foo.c', *TAB = NULL, *EXT = '.c'
'tab.c' -> *BASE = 'tab.c', *TAB = NULL, *EXT = '.c'
'.tab.c' -> *BASE = '.tab.c', *TAB = NULL, *EXT = '.c'
'foo.tab' -> *BASE = 'foo.tab', *TAB = NULL, *EXT = '.tab'
'foo_tab' -> *BASE = 'foo_tab', *TAB = NULL, *EXT = NULL
'foo' -> *BASE = 'foo', *TAB = NULL, *EXT = NULL. */
static void
file_name_split (const char *file_name,
const char **base, const char **tab, const char **ext)
{
*base = last_component (file_name);
/* Look for the extension, i.e., look for the last dot. */
*ext = strrchr (*base, '.');
*tab = NULL;
/* If there is an extension, check if there is a '.tab' part right
before. */
if (*ext)
{
size_t baselen = *ext - *base;
size_t dottablen = sizeof (TAB_EXT) - 1;
if (dottablen < baselen
&& STRPREFIX_LIT (TAB_EXT, *ext - dottablen))
*tab = *ext - dottablen;
}
}
/* Compute ALL_BUT_EXT and ALL_BUT_TAB_EXT from SPEC_OUTFILE or
GRAMMAR_FILE.
The precise -o name will be used for FTABLE. For other output
files, remove the ".c" or ".tab.c" suffix. */
static void
compute_file_name_parts (void)
{
if (spec_outfile)
{
const char *base, *tab, *ext;
file_name_split (spec_outfile, &base, &tab, &ext);
dir_prefix = xstrndup (spec_outfile, base - spec_outfile);
/* ALL_BUT_EXT goes up the EXT, excluding it. */
all_but_ext =
xstrndup (spec_outfile,
(strlen (spec_outfile) - (ext ? strlen (ext) : 0)));
/* ALL_BUT_TAB_EXT goes up to TAB, excluding it. */
all_but_tab_ext =
xstrndup (spec_outfile,
(strlen (spec_outfile)
- (tab ? strlen (tab) : (ext ? strlen (ext) : 0))));
if (ext)
compute_exts_from_src (ext);
}
else
{
const char *base, *tab, *ext;
file_name_split (grammar_file, &base, &tab, &ext);
if (spec_file_prefix)
{
/* If --file-prefix=foo was specified, ALL_BUT_TAB_EXT = 'foo'. */
dir_prefix =
xstrndup (spec_file_prefix,
last_component (spec_file_prefix) - spec_file_prefix);
all_but_tab_ext = xstrdup (spec_file_prefix);
}
else if (! location_empty (yacc_loc))
{
/* If --yacc, then the output is 'y.tab.c'. */
dir_prefix = xstrdup ("");
all_but_tab_ext = xstrdup ("y");
}
else
{
/* Otherwise, ALL_BUT_TAB_EXT is computed from the input
grammar: 'foo/bar.yy' => 'bar'. */
dir_prefix = xstrdup ("");
all_but_tab_ext =
xstrndup (base, (strlen (base) - (ext ? strlen (ext) : 0)));
}
if (language->add_tab)
all_but_ext = concat2 (all_but_tab_ext, TAB_EXT);
else
all_but_ext = xstrdup (all_but_tab_ext);
/* Compute the extensions from the grammar file name. */
if (ext && location_empty (yacc_loc))
compute_exts_from_gf (ext);
}
}
/* Compute the output file names. Warn if we detect conflicting
outputs to the same file. */
void
compute_output_file_names (void)
{
compute_file_name_parts ();
/* If not yet done. */
if (!src_extension)
src_extension = xstrdup (".c");
if (!header_extension)
header_extension = xstrdup (".h");
parser_file_name =
(spec_outfile
? xstrdup (spec_outfile)
: concat2 (all_but_ext, src_extension));
if (header_flag)
{
if (! spec_header_file)
spec_header_file = concat2 (all_but_ext, header_extension);
}
if (graph_flag)
{
if (! spec_graph_file)
spec_graph_file = concat2 (all_but_tab_ext, ".gv");
output_file_name_check (&spec_graph_file, false);
}
if (html_flag)
{
if (! spec_html_file)
spec_html_file = concat2 (all_but_tab_ext, ".html");
output_file_name_check (&spec_html_file, false);
}
if (xml_flag)
{
if (! spec_xml_file)
spec_xml_file = concat2 (all_but_tab_ext, ".xml");
output_file_name_check (&spec_xml_file, false);
}
if (report_flag)
{
if (!spec_verbose_file)
spec_verbose_file = concat2 (all_but_tab_ext, OUTPUT_EXT);
output_file_name_check (&spec_verbose_file, false);
}
free (all_but_tab_ext);
free (src_extension);
free (header_extension);
}
void
output_file_name_check (char **file_name, bool source)
{
bool conflict = false;
if (STREQ (*file_name, grammar_file))
{
complain (NULL, complaint, _("refusing to overwrite the input file %s"),
quote (*file_name));
conflict = true;
}
else
for (int i = 0; i < generated_files_size; i++)
if (STREQ (generated_files[i].name, *file_name))
{
complain (NULL, Wother, _("conflicting outputs to file %s"),
quote (generated_files[i].name));
conflict = true;
}
if (conflict)
{
free (*file_name);
*file_name = strdup ("/dev/null");
}
else
{
generated_files = xnrealloc (generated_files, ++generated_files_size,
sizeof *generated_files);
generated_files[generated_files_size-1].name = xstrdup (*file_name);
generated_files[generated_files_size-1].is_source = source;
}
}
void
unlink_generated_sources (void)
{
for (int i = 0; i < generated_files_size; i++)
if (generated_files[i].is_source)
/* Ignore errors. The file might not even exist. */
unlink (generated_files[i].name);
}
/* Memory allocated by relocate2, to free. */
static char *relocate_buffer = NULL;
char const *
pkgdatadir (void)
{
if (relocate_buffer)
return relocate_buffer;
else
{
char const *cp = getenv ("BISON_PKGDATADIR");
return cp ? cp : relocate2 (PKGDATADIR, &relocate_buffer);
}
}
char const *
m4path (void)
{
char const *m4 = getenv ("M4");
if (m4)
return m4;
/* We don't use relocate2() to store the temporary buffer and re-use
it, because m4path() is only called once. */
char const *m4_relocated = relocate (M4);
struct stat buf;
if (stat (m4_relocated, &buf) == 0)
return m4_relocated;
return M4;
}
void
output_file_names_free (void)
{
free (all_but_ext);
free (spec_verbose_file);
free (spec_graph_file);
free (spec_html_file);
free (spec_xml_file);
free (spec_header_file);
free (parser_file_name);
free (dir_prefix);
for (int i = 0; i < generated_files_size; i++)
free (generated_files[i].name);
free (generated_files);
free (relocate_buffer);
if (prefix_maps)
gl_list_free (prefix_maps);
if (mapped_files)
gl_map_free (mapped_files);
}
+100
View File
@@ -0,0 +1,100 @@
/* File names and variables for bison,
Copyright (C) 1984, 1989, 2000-2002, 2006-2007, 2009-2015, 2018-2021
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef FILES_H_
# define FILES_H_
# include "location.h"
# include "uniqstr.h"
/* File name specified with -o for the output file, or 0 if no -o. */
extern char const *spec_outfile;
/* File name for the parser (i.e., the one above, or its default.) */
extern char *parser_file_name;
/* Symbol prefix specified with -p, or 0 if no -p. */
extern const char *spec_name_prefix;
extern location spec_name_prefix_loc;
/* File name prefix specified with -b, or 0 if no -b. */
extern char const *spec_file_prefix;
extern location spec_file_prefix_loc;
/* --verbose. */
extern char *spec_verbose_file;
/* File name specified for the output graph. */
extern char *spec_graph_file;
/* File name specified for the HTML output. */
extern char *spec_html_file;
/* File name specified for the XML output. */
extern char *spec_xml_file;
/* File name specified with --header. */
extern char *spec_header_file;
/* Directory prefix of output file names. */
extern char *dir_prefix;
/* The file name as given on the command line.
Not named "input_file" because Flex uses this name for an argument,
and therefore GCC warns about a name clash. */
extern uniqstr grammar_file;
/* The computed base for output file names. */
extern char *all_but_ext;
/* Where our data files are installed. */
char const *pkgdatadir (void);
/* Where the m4 program is installed. */
char const *m4path (void);
void compute_output_file_names (void);
void output_file_names_free (void);
/** Record that we generate a file.
*
* \param file_name the name of file being generated.
* \param source whether this is a source file (*c, *.java...)
* as opposed to a report (*.output, *.dot...).
*/
void output_file_name_check (char **file_name, bool source);
/** Remove all the generated source files. */
void unlink_generated_sources (void);
FILE *xfopen (const char *name, char const *mode);
void xfclose (FILE *ptr);
FILE *xfdopen (int fd, char const *mode);
/* Given an input file path, return a string that contains the path
with the file prefix mapping rules applied, or NULL if the input
was NULL. Do not free the return value. */
const char *map_file_name (char const *filename);
/* Add a new file prefix mapping. If a file path starts with
oldprefix, it will be replaced with newprefix. */
void add_prefix_map (char const *oldprefix, char const *newprefix);
#endif /* !FILES_H_ */
+229
View File
@@ -0,0 +1,229 @@
/* Support for fixing grammar files.
Copyright (C) 2019-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "fixits.h"
#include <error.h>
#include <get-errno.h>
#include <gl_array_list.h>
#include <gl_xlist.h>
#include <progname.h>
#include <quote.h>
#include <quotearg.h>
#include <vasnprintf.h>
#include "system.h"
#include "files.h"
#include "getargs.h"
typedef struct
{
location location;
char *fix;
} fixit;
gl_list_t fixits = NULL;
static fixit *
fixit_new (location const *loc, char const* fix)
{
fixit *res = xmalloc (sizeof *res);
res->location = *loc;
res->fix = xstrdup (fix);
return res;
}
static int
fixit_cmp (const fixit *a, const fixit *b)
{
return location_cmp (a->location, b->location);
}
static void
fixit_free (fixit *f)
{
free (f->fix);
free (f);
}
/* GCC and Clang follow the same pattern.
https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Message-Formatting-Options.html
https://clang.llvm.org/docs/UsersManual.html#cmdoption-fdiagnostics-parseable-fixits */
static void
fixit_print (fixit const *f, FILE *out)
{
fprintf (out, "fix-it:%s:{%d:%d-%d:%d}:%s\n",
quotearg_n_style (1, c_quoting_style, f->location.start.file),
f->location.start.line, f->location.start.byte,
f->location.end.line, f->location.end.byte,
quotearg_n_style (2, c_quoting_style, f->fix));
}
void
fixits_register (location const *loc, char const* fix)
{
if (!fixits)
fixits = gl_list_create_empty (GL_ARRAY_LIST,
/* equals */ NULL,
/* hashcode */ NULL,
(gl_listelement_dispose_fn) fixit_free,
true);
fixit *f = fixit_new (loc, fix);
gl_sortedlist_add (fixits, (gl_listelement_compar_fn) fixit_cmp, f);
if (feature_flag & feature_fixit)
fixit_print (f, stderr);
}
bool
fixits_empty (void)
{
return !fixits;
}
void
fixits_run (void)
{
if (!fixits)
return;
/* This is not unlike what is done in location_caret. */
uniqstr input = ((fixit *) gl_list_get_at (fixits, 0))->location.start.file;
/* Backup the file. */
char buf[256];
size_t len = sizeof (buf);
char *backup = asnprintf (buf, &len, "%s~", input);
if (!backup)
xalloc_die ();
if (rename (input, backup))
error (EXIT_FAILURE, get_errno (),
_("%s: cannot backup"), quotearg_colon (input));
FILE *in = xfopen (backup, "r");
FILE *out = xfopen (input, "w");
size_t line = 1;
size_t offset = 1;
void const *p = NULL;
gl_list_iterator_t iter = gl_list_iterator (fixits);
while (gl_list_iterator_next (&iter, &p, NULL))
{
fixit const *f = p;
/* Look for the correct line. */
while (line < f->location.start.line)
{
int c = getc (in);
if (c == EOF)
break;
if (c == '\n')
{
++line;
offset = 1;
}
putc (c, out);
}
/* Look for the right offset. */
bool need_eol = false;
while (offset < f->location.start.byte)
{
int c = getc (in);
if (c == EOF)
break;
++offset;
if (c == '\n')
/* The position we are asked for is beyond the actual
line: pad with spaces, and remember we need a \n. */
need_eol = true;
putc (need_eol ? ' ' : c, out);
}
/* Paste the fix instead. */
fputs (f->fix, out);
/* Maybe install the eol afterwards. */
if (need_eol)
putc ('\n', out);
/* Skip the bad input. */
while (line < f->location.end.line)
{
int c = getc (in);
if (c == EOF)
break;
if (c == '\n')
{
++line;
offset = 1;
}
}
while (offset < f->location.end.byte)
{
int c = getc (in);
if (c == EOF)
break;
++offset;
}
/* If erasing the content of a full line, also remove the
end-of-line. */
if (f->fix[0] == 0 && f->location.start.byte == 1)
{
int c = getc (in);
if (c == EOF)
break;
else if (c == '\n')
{
++line;
offset = 1;
}
else
ungetc (c, in);
}
}
/* Paste the rest of the file. */
{
int c;
while ((c = getc (in)) != EOF)
putc (c, out);
}
gl_list_iterator_free (&iter);
xfclose (out);
xfclose (in);
fprintf (stderr, "%s: file %s was updated (backup: %s)\n",
program_name, quote_n (0, input), quote_n (1, backup));
if (backup != buf)
free (backup);
}
/* Free the registered fixits. */
void fixits_free (void)
{
if (fixits)
{
gl_list_free (fixits);
fixits = NULL;
}
}
@@ -0,0 +1,35 @@
/* Support for fixing grammar files.
Copyright (C) 2019-2021 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef FIXITS_H_
# define FIXITS_H_ 1
# include "location.h"
/* Declare a fix to apply. */
void fixits_register (location const *loc, char const* update);
/* Apply the fixits: update the source file. */
void fixits_run (void);
/* Whether there are no fixits. */
bool fixits_empty (void);
/* Free the registered fixits. */
void fixits_free (void);
#endif /* !FIXITS_H_ */
@@ -0,0 +1,126 @@
/* Common parts between scan-code.l, scan-gram.l, and scan-skel.l.
Copyright (C) 2006, 2009-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef FLEX_PREFIX
# error "FLEX_PREFIX not defined"
#endif
/* Flex full version as a number. */
#define FLEX_VERSION \
((YY_FLEX_MAJOR_VERSION) * 1000000 \
+ (YY_FLEX_MINOR_VERSION) * 1000 \
+ (YY_FLEX_SUBMINOR_VERSION))
// Pacify warnings in yy_init_buffer (observed with Flex 2.6.4 and GCC
// 6.4.0 and 7.3.0).
//
// ./src/scan-skel.c: In function 'skel_restart':
// ./src/scan-skel.c:2035:20: error: potential null pointer dereference [-Werror=null-dereference]
// b->yy_fill_buffer = 1;
// ~~~~~~~~~~~~~~~~~~^~~
// ./src/scan-skel.c:2031:19: error: potential null pointer dereference [-Werror=null-dereference]
// b->yy_input_file = file;
// ~~~~~~~~~~~~~~~~~^~~~~~
#if defined __GNUC__ && ! defined __clang__ && 6 <= __GNUC__
# pragma GCC diagnostic ignored "-Wnull-dereference"
#endif
// Old versions of Flex (2.5.35) generate an incomplete documentation comment.
//
// In file included from src/scan-code-c.c:3:
// src/scan-code.c:2198:21: error: empty paragraph passed to '@param' command
// [-Werror,-Wdocumentation]
// * @param line_number
// ~~~~~~~~~~~~~~~~~^
// 1 error generated.
#if FLEX_VERSION <= 20060000 && defined __clang__
# pragma clang diagnostic ignored "-Wdocumentation"
#endif
/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
#if FLEX_VERSION <= 2005031
int FLEX_PREFIX (get_lineno) (void);
FILE *FLEX_PREFIX (get_in) (void);
FILE *FLEX_PREFIX (get_out) (void);
int FLEX_PREFIX (get_leng) (void);
char *FLEX_PREFIX (get_text) (void);
void FLEX_PREFIX (set_lineno) (int);
void FLEX_PREFIX (set_in) (FILE *);
void FLEX_PREFIX (set_out) (FILE *);
int FLEX_PREFIX (get_debug) (void);
void FLEX_PREFIX (set_debug) (int);
int FLEX_PREFIX (lex_destroy) (void);
#endif
#define last_string FLEX_PREFIX (last_string)
/* It seems to be a nice "feature" of Flex that one cannot use yytext,
yyleng etc. when a prefix is given, since there is no longer a
#define, but rather the token is actually changed in the output.
However, this is not true for Flex 2.5.4. */
#ifndef yyleng
# define yyleng FLEX_PREFIX (leng)
#endif
#ifndef yytext
# define yytext FLEX_PREFIX (text)
#endif
/* Non-reentrant scanners generated by Flex 2.5.9 and later (and some earlier
versions according to the Flex manual) leak memory if yylex_destroy is not
invoked. However, yylex_destroy is not defined before Flex 2.5.9, so give
an implementation here that at least appears to work with Flex 2.5.4. */
#if FLEX_VERSION <= 2005009
# define yylex_destroy() yy_delete_buffer (YY_CURRENT_BUFFER)
#endif
/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
keep (to construct ID, STRINGS etc.). Use the following macros to
use it.
Use STRING_GROW () to append what has just been matched, and
STRING_FINISH () to end the string (it puts the ending 0).
STRING_FINISH () also stores this string in LAST_STRING, which can be
used, and which is used by STRING_FREE () to free the last string. */
#ifndef FLEX_NO_OBSTACK
static struct obstack obstack_for_string;
# define STRING_GROW() \
obstack_grow (&obstack_for_string, yytext, yyleng)
# define STRING_FINISH() \
(last_string = obstack_finish0 (&obstack_for_string))
# define STRING_1GROW(Char) \
obstack_1grow (&obstack_for_string, Char)
# ifdef NDEBUG
# define STRING_FREE() \
obstack_free (&obstack_for_string, last_string)
# else
# define STRING_FREE() \
do { \
obstack_free (&obstack_for_string, last_string); \
last_string = NULL; \
} while (0)
# endif
#endif
@@ -0,0 +1,918 @@
/* Parse command line arguments for Bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "getargs.h"
#include "system.h"
#include <argmatch.h>
#include <c-strcase.h>
#include <configmake.h>
#include <error.h>
#include <getopt.h>
#include <progname.h>
#include <quote.h>
#include <textstyle.h>
#include "complain.h"
#include "files.h"
#include "muscle-tab.h"
#include "output.h"
#include "uniqstr.h"
bool header_flag = false;
bool graph_flag = false;
bool html_flag = false;
bool xml_flag = false;
bool no_lines_flag = false;
bool token_table_flag = false;
location yacc_loc = EMPTY_LOCATION_INIT;
bool update_flag = false; /* for -u */
bool color_debug = false;
bool nondeterministic_parser = false;
bool glr_parser = false;
int feature_flag = feature_caret;
int report_flag = report_none;
int trace_flag = trace_none;
static struct bison_language const valid_languages[] = {
/* lang, skeleton, ext, hdr, add_tab */
{ "c", "c-skel.m4", ".c", ".h", true },
{ "c++", "c++-skel.m4", ".cc", ".hh", true },
{ "d", "d-skel.m4", ".d", ".d", false },
{ "java", "java-skel.m4", ".java", ".java", false },
{ "", "", "", "", false }
};
int skeleton_prio = default_prio;
const char *skeleton = NULL;
int language_prio = default_prio;
struct bison_language const *language = &valid_languages[0];
typedef int* (xargmatch_fn) (const char *context, const char *arg);
void
set_yacc (location loc)
{
yacc_loc = loc;
if (getenv ("POSIXLY_CORRECT"))
muscle_percent_define_insert ("posix",
loc,
muscle_keyword, "",
MUSCLE_PERCENT_DEFINE_D);
}
/** Decode an option's key.
*
* \param opt option being decoded.
* \param xargmatch matching function.
* \param all the value of the argument 'all'.
* \param flags the flags to update
* \param arg the subarguments to decode.
* If null, then activate all the flags.
* \param no length of the potential "no-" prefix.
* Can be 0 or 3. If 3, negate the action of the subargument.
*
* If VALUE != 0 then KEY sets flags and no-KEY clears them.
* If VALUE == 0 then KEY clears all flags from \c all and no-KEY sets all
* flags from \c all. Thus no-none = all and no-all = none.
*/
static void
flag_argmatch (const char *opt, xargmatch_fn xargmatch,
int all, int *flags, char *arg, size_t no)
{
int value = *xargmatch (opt, arg + no);
/* -rnone == -rno-all, and -rno-none == -rall. */
if (!value)
{
value = all;
no = !no;
}
if (no)
*flags &= ~value;
else
*flags |= value;
}
typedef void (usage_fn) (FILE *out);
/** Decode an option's set of keys.
*
* \param opt option being decoded (e.g., --report).
* \param xargmatch matching function.
* \param usage function that implement --help for this option.
* \param all the value of the argument 'all'.
* \param flags the flags to update
* \param args comma separated list of effective subarguments to decode.
* If 0, then activate all the flags.
*/
static void
flags_argmatch (const char *opt,
xargmatch_fn xargmatch,
usage_fn usage,
int all, int *flags, char *args)
{
if (!args)
*flags |= all;
else if (STREQ (args, "help"))
{
usage (stdout);
exit (EXIT_SUCCESS);
}
else
for (args = strtok (args, ","); args; args = strtok (NULL, ","))
{
size_t no = STRPREFIX_LIT ("no-", args) ? 3 : 0;
flag_argmatch (opt, xargmatch,
all, flags, args, no);
}
}
/** Decode a set of sub arguments.
*
* \param FlagName the flag family to update.
* \param Args the effective sub arguments to decode.
* \param All the "all" value.
*
* \arg FlagName_args the list of keys.
* \arg FlagName_types the list of values.
* \arg FlagName_flag the flag to update.
*/
#define FLAGS_ARGMATCH(FlagName, Args, All) \
flags_argmatch ("--" #FlagName, \
(xargmatch_fn*) argmatch_## FlagName ## _value, \
argmatch_ ## FlagName ## _usage, \
All, &FlagName ## _flag, Args)
/*---------------------.
| --color's handling. |
`---------------------*/
enum color
{
color_always,
color_never,
color_auto
};
ARGMATCH_DEFINE_GROUP (color, enum color)
static const argmatch_color_doc argmatch_color_docs[] =
{
{ "always", N_("colorize the output") },
{ "never", N_("don't colorize the output") },
{ "auto", N_("colorize if the output device is a tty") },
{ NULL, NULL },
};
static const argmatch_color_arg argmatch_color_args[] =
{
{ "always", color_always },
{ "yes", color_always },
{ "never", color_never },
{ "no", color_never },
{ "auto", color_auto },
{ "tty", color_auto },
{ NULL, color_always },
};
const argmatch_color_group_type argmatch_color_group =
{
argmatch_color_args,
argmatch_color_docs,
/* TRANSLATORS: Use the same translation for WHEN as in the
--color=WHEN help message. */
N_("WHEN can be one of the following:"),
NULL
};
/*----------------------.
| --report's handling. |
`----------------------*/
ARGMATCH_DEFINE_GROUP (report, enum report)
static const argmatch_report_doc argmatch_report_docs[] =
{
{ "states", N_("describe the states") },
{ "itemsets", N_("complete the core item sets with their closure") },
{ "lookaheads", N_("explicitly associate lookahead tokens to items") },
{ "solved", N_("describe shift/reduce conflicts solving") },
{ "counterexamples", N_("generate conflict counterexamples") },
{ "all", N_("include all the above information") },
{ "none", N_("disable the report") },
{ NULL, NULL },
};
static const argmatch_report_arg argmatch_report_args[] =
{
{ "none", report_none },
{ "states", report_states },
{ "itemsets", report_states | report_itemsets },
{ "lookaheads", report_states | report_lookaheads },
{ "solved", report_states | report_solved_conflicts },
{ "counterexamples", report_cex },
{ "cex", report_cex },
{ "all", report_all },
{ NULL, report_none },
};
const argmatch_report_group_type argmatch_report_group =
{
argmatch_report_args,
argmatch_report_docs,
/* TRANSLATORS: Use the same translation for THINGS as in the
--report=THINGS help message. */
N_("THINGS is a list of comma separated words that can include:"),
NULL
};
/*---------------------.
| --trace's handling. |
`---------------------*/
ARGMATCH_DEFINE_GROUP (trace, enum trace)
static const argmatch_trace_doc argmatch_trace_docs[] =
{
/* Meant for developers only, don't translate them. */
{ "none", "no traces" },
{ "locations", "full display of the locations" },
{ "scan", "grammar scanner traces" },
{ "parse", "grammar parser traces" },
{ "automaton", "construction of the automaton" },
{ "bitsets", "use of bitsets" },
{ "closure", "input/output of closure" },
{ "grammar", "reading, reducing the grammar" },
{ "resource", "memory consumption (where available)" },
{ "sets", "grammar sets: firsts, nullable etc." },
{ "muscles", "m4 definitions passed to the skeleton" },
{ "tools", "m4 invocation" },
{ "m4-early", "m4 traces starting from the start" },
{ "m4", "m4 traces starting from the skeleton evaluation" },
{ "skeleton", "skeleton postprocessing" },
{ "time", "time consumption" },
{ "ielr", "IELR conversion" },
{ "cex", "counterexample generation"},
{ "all", "all of the above" },
{ NULL, NULL},
};
static const argmatch_trace_arg argmatch_trace_args[] =
{
{ "none", trace_none },
{ "locations", trace_locations },
{ "scan", trace_scan },
{ "parse", trace_parse },
{ "automaton", trace_automaton },
{ "bitsets", trace_bitsets },
{ "closure", trace_closure },
{ "grammar", trace_grammar },
{ "resource", trace_resource },
{ "sets", trace_sets },
{ "muscles", trace_muscles },
{ "tools", trace_tools },
{ "m4-early", trace_m4_early },
{ "m4", trace_m4 },
{ "skeleton", trace_skeleton },
{ "time", trace_time },
{ "ielr", trace_ielr },
{ "cex", trace_cex },
{ "all", trace_all },
{ NULL, trace_none},
};
const argmatch_trace_group_type argmatch_trace_group =
{
argmatch_trace_args,
argmatch_trace_docs,
N_("TRACES is a list of comma separated words that can include:"),
NULL
};
/*-----------------------.
| --feature's handling. |
`-----------------------*/
ARGMATCH_DEFINE_GROUP (feature, enum feature)
static const argmatch_feature_doc argmatch_feature_docs[] =
{
{ "caret", N_("show errors with carets") },
{ "fixit", N_("show machine-readable fixes") },
{ "syntax-only", N_("do not generate any file") },
{ "all", N_("all of the above") },
{ "none", N_("disable all of the above") },
{ NULL, NULL }
};
static const argmatch_feature_arg argmatch_feature_args[] =
{
{ "none", feature_none },
{ "caret", feature_caret },
{ "diagnostics-show-caret", feature_caret },
{ "fixit", feature_fixit },
{ "diagnostics-parseable-fixits", feature_fixit },
{ "syntax-only", feature_syntax_only },
{ "all", feature_all },
{ NULL, feature_none}
};
const argmatch_feature_group_type argmatch_feature_group =
{
argmatch_feature_args,
argmatch_feature_docs,
/* TRANSLATORS: Use the same translation for FEATURES as in the
--feature=FEATURES help message. */
N_("FEATURES is a list of comma separated words that can include:"),
NULL
};
/*-------------------------------------------.
| Display the help message and exit STATUS. |
`-------------------------------------------*/
_Noreturn
static void usage (int);
static void
usage (int status)
{
if (status != 0)
fprintf (stderr, _("Try '%s --help' for more information.\n"),
program_name);
else
{
/* For ../build-aux/cross-options.pl to work, use the format:
^ -S, --long[=ARGS] (whitespace)
A --long option is required.
Otherwise, add exceptions to ../build-aux/cross-options.pl. */
printf (_("Usage: %s [OPTION]... FILE\n"), program_name);
fputs (_("\
Generate a deterministic LR or generalized LR (GLR) parser employing\n\
LALR(1), IELR(1), or canonical LR(1) parser tables.\n\
\n\
"), stdout);
fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
fputs (_("\
The same is true for optional arguments.\n\
"), stdout);
putc ('\n', stdout);
fputs (_("\
Operation Modes:\n\
-h, --help display this help and exit\n\
-V, --version output version information and exit\n\
--print-localedir output directory containing locale-dependent data\n\
and exit\n\
--print-datadir output directory containing skeletons and XSLT\n\
and exit\n\
-u, --update apply fixes to the source grammar file and exit\n\
-f, --feature[=FEATURES] activate miscellaneous features\n\
\n\
"), stdout);
argmatch_feature_usage (stdout);
putc ('\n', stdout);
fputs (_("\
Diagnostics:\n\
-W, --warnings[=CATEGORY] report the warnings falling in CATEGORY\n\
--color[=WHEN] whether to colorize the diagnostics\n\
--style=FILE specify the CSS FILE for colorizer diagnostics\n\
\n\
"), stdout);
warning_usage (stdout);
putc ('\n', stdout);
argmatch_color_usage (stdout);
putc ('\n', stdout);
fputs (_("\
Tuning the Parser:\n\
-L, --language=LANGUAGE specify the output programming language\n\
-S, --skeleton=FILE specify the skeleton to use\n\
-t, --debug instrument the parser for tracing\n\
same as '-Dparse.trace'\n\
--locations enable location support\n\
-D, --define=NAME[=VALUE] similar to '%define NAME VALUE'\n\
-F, --force-define=NAME[=VALUE] override '%define NAME VALUE'\n\
-p, --name-prefix=PREFIX prepend PREFIX to the external symbols\n\
deprecated by '-Dapi.prefix={PREFIX}'\n\
-l, --no-lines don't generate '#line' directives\n\
-k, --token-table include a table of token names\n\
-y, --yacc emulate POSIX Yacc\n\
"), stdout);
putc ('\n', stdout);
fputs (_("\
Output Files:\n\
-H, --header=[FILE] also produce a header file\n\
-d likewise but cannot specify FILE (for POSIX Yacc)\n\
-r, --report=THINGS also produce details on the automaton\n\
--report-file=FILE write report to FILE\n\
-v, --verbose same as '--report=state'\n\
-b, --file-prefix=PREFIX specify a PREFIX for output files\n\
-o, --output=FILE leave output to FILE\n\
-g, --graph[=FILE] also output a graph of the automaton\n\
--html[=FILE] also output an HTML report of the automaton\n\
-x, --xml[=FILE] also output an XML report of the automaton\n\
-M, --file-prefix-map=OLD=NEW replace prefix OLD with NEW when writing file paths\n\
in output files\n\
"), stdout);
putc ('\n', stdout);
argmatch_report_usage (stdout);
putc ('\n', stdout);
printf (_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
printf (_("%s home page: <%s>.\n"), PACKAGE_NAME, PACKAGE_URL);
fputs (_("General help using GNU software: "
"<https://www.gnu.org/gethelp/>.\n"),
stdout);
#if (defined __GLIBC__ && __GLIBC__ >= 2) && !defined __UCLIBC__
/* Don't output this redundant message for English locales.
Note we still output for 'C' so that it gets included in the
man page. */
const char *lc_messages = setlocale (LC_MESSAGES, NULL);
if (lc_messages && !STREQ (lc_messages, "en_"))
/* TRANSLATORS: Replace LANG_CODE in this URL with your language code to
form one of the URLs at https://translationproject.org/team/.
Otherwise, replace the entire URL with your translation team's
email address. */
fputs (_("Report translation bugs to "
"<https://translationproject.org/team/>.\n"), stdout);
#endif
fputs (_("For complete documentation, run: info bison.\n"), stdout);
}
exit (status);
}
/*------------------------------.
| Display the version message. |
`------------------------------*/
static void
version (void)
{
/* Some efforts were made to ease the translators' task, please
continue. */
printf ("bison (GNU Bison) %s", VERSION);
putc ('\n', stdout);
fputs (_("Written by Robert Corbett and Richard Stallman.\n"), stdout);
putc ('\n', stdout);
fprintf (stdout,
_("Copyright (C) %d Free Software Foundation, Inc.\n"),
PACKAGE_COPYRIGHT_YEAR);
fputs (_("\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
"),
stdout);
}
/*-------------------------------------.
| --skeleton and --language handling. |
`--------------------------------------*/
void
skeleton_arg (char const *arg, int prio, location loc)
{
if (prio < skeleton_prio)
{
skeleton_prio = prio;
skeleton = arg;
}
else if (prio == skeleton_prio)
complain (&loc, complaint,
_("multiple skeleton declarations are invalid"));
}
void
language_argmatch (char const *arg, int prio, location loc)
{
char const *msg = NULL;
if (prio < language_prio)
{
for (int i = 0; valid_languages[i].language[0]; ++i)
if (c_strcasecmp (arg, valid_languages[i].language) == 0)
{
language_prio = prio;
language = &valid_languages[i];
return;
}
msg = _("%s: invalid language");
}
else if (language_prio == prio)
msg = _("multiple language declarations are invalid");
if (msg)
complain (&loc, complaint, msg, quotearg_colon (arg));
}
/*----------------------.
| Process the options. |
`----------------------*/
/* Shorts options.
Should be computed from long_options. */
static char const short_options[] =
"D:"
"F:"
"H::"
"L:"
"S:"
"T::"
"V"
"W::"
"b:"
"d"
"f::"
"g::"
"h"
"k"
"l"
"M:"
"o:"
"p:"
"r:"
"t"
"u" /* --update */
"v"
"x::"
"y"
;
/* Values for long options that do not have single-letter equivalents. */
enum
{
COLOR_OPTION = CHAR_MAX + 1,
FIXED_OUTPUT_FILES_OPTION,
HTML_OPTION,
LOCATIONS_OPTION,
PRINT_DATADIR_OPTION,
PRINT_LOCALEDIR_OPTION,
REPORT_FILE_OPTION,
STYLE_OPTION
};
/* In the same order as in usage(), and in the documentation. */
static struct option const long_options[] =
{
/* Operation modes. */
{ "help", no_argument, 0, 'h' },
{ "version", no_argument, 0, 'V' },
{ "print-localedir", no_argument, 0, PRINT_LOCALEDIR_OPTION },
{ "print-datadir", no_argument, 0, PRINT_DATADIR_OPTION },
{ "update", no_argument, 0, 'u' },
{ "feature", optional_argument, 0, 'f' },
/* Diagnostics. */
{ "warnings", optional_argument, 0, 'W' },
{ "color", optional_argument, 0, COLOR_OPTION },
{ "style", optional_argument, 0, STYLE_OPTION },
/* Tuning the Parser. */
{ "language", required_argument, 0, 'L' },
{ "skeleton", required_argument, 0, 'S' },
{ "debug", no_argument, 0, 't' },
{ "locations", no_argument, 0, LOCATIONS_OPTION },
{ "define", required_argument, 0, 'D' },
{ "force-define", required_argument, 0, 'F' },
{ "name-prefix", required_argument, 0, 'p' },
{ "no-lines", no_argument, 0, 'l' },
{ "token-table", no_argument, 0, 'k' },
{ "yacc", no_argument, 0, 'y' },
/* Output Files. */
{ "header", optional_argument, 0, 'H' },
{ "defines", optional_argument, 0, 'd' },
{ "report", required_argument, 0, 'r' },
{ "report-file", required_argument, 0, REPORT_FILE_OPTION },
{ "verbose", no_argument, 0, 'v' },
{ "file-prefix", required_argument, 0, 'b' },
{ "output", required_argument, 0, 'o' },
{ "graph", optional_argument, 0, 'g' },
{ "html", optional_argument, 0, HTML_OPTION },
{ "xml", optional_argument, 0, 'x' },
{ "file-prefix-map", required_argument, 0, 'M' },
/* Hidden. */
{ "fixed-output-files", no_argument, 0, FIXED_OUTPUT_FILES_OPTION },
{ "output-file", required_argument, 0, 'o' },
{ "trace", optional_argument, 0, 'T' },
{0, 0, 0, 0}
};
/* Build a location for the current command line argument. */
static location
command_line_location (void)
{
location res;
/* "<command line>" is used in GCC's messages about -D. */
boundary_set (&res.start, uniqstr_new ("<command line>"), optind - 1, -1, -1);
res.end = res.start;
return res;
}
/* Handle the command line options for color support. Do it early, so
that error messages from getargs be also colored as per the user's
request. This is consistent with the way GCC and Clang behave. */
static void
getargs_colors (int argc, char *argv[])
{
for (int i = 1; i < argc; i++)
{
const char *arg = argv[i];
if (STRPREFIX_LIT ("--color=", arg))
{
const char *color = arg + strlen ("--color=");
if (STREQ (color, "debug"))
color_debug = true;
else
handle_color_option (color);
}
else if (STREQ ("--color", arg))
handle_color_option (NULL);
else if (STRPREFIX_LIT ("--style=", arg))
{
const char *style = arg + strlen ("--style=");
handle_style_option (style);
}
}
complain_init_color ();
}
void
getargs (int argc, char *argv[])
{
getargs_colors (argc, argv);
int c;
while ((c = getopt_long (argc, argv, short_options, long_options, NULL))
!= -1)
{
location loc = command_line_location ();
switch (c)
{
/* ASCII Sorting for short options (i.e., upper case then
lower case), and then long-only options. */
case 0:
/* Certain long options cause getopt_long to return 0. */
break;
case 'D': /* -DNAME[=(VALUE|"VALUE"|{VALUE})]. */
case 'F': /* -FNAME[=(VALUE|"VALUE"|{VALUE})]. */
{
char *name = optarg;
char *value = strchr (optarg, '=');
muscle_kind kind = muscle_keyword;
if (value)
{
char *end = value + strlen (value) - 1;
*value++ = 0;
if (*value == '{' && *end == '}')
{
kind = muscle_code;
++value;
*end = 0;
}
else if (*value == '"' && *end == '"')
{
kind = muscle_string;
++value;
*end = 0;
}
}
muscle_percent_define_insert (name, loc,
kind, value ? value : "",
c == 'D' ? MUSCLE_PERCENT_DEFINE_D
: MUSCLE_PERCENT_DEFINE_F);
}
break;
case 'H':
case 'd':
header_flag = true;
if (optarg)
{
free (spec_header_file);
spec_header_file = xstrdup (optarg);
}
break;
case 'L':
language_argmatch (optarg, command_line_prio, loc);
break;
case 'M': // -MOLDPREFIX=NEWPREFIX
{
char *newprefix = strchr (optarg, '=');
if (newprefix)
{
*newprefix = '\0';
add_prefix_map (optarg, newprefix + 1);
}
else
{
complain (&loc, complaint, _("invalid argument for %s: %s"),
quote ("--file-prefix-map"), quotearg_n (1, optarg));
}
}
break;
case 'S':
skeleton_arg (optarg, command_line_prio, loc);
break;
case 'T':
FLAGS_ARGMATCH (trace, optarg, trace_all);
break;
case 'V':
version ();
exit (EXIT_SUCCESS);
case 'f':
FLAGS_ARGMATCH (feature, optarg, feature_all);
break;
case 'W':
warnings_argmatch (optarg);
break;
case 'b':
spec_file_prefix = optarg;
break;
case 'g':
graph_flag = true;
if (optarg)
{
free (spec_graph_file);
spec_graph_file = xstrdup (optarg);
}
break;
case 'h':
usage (EXIT_SUCCESS);
case 'k':
token_table_flag = true;
break;
case 'l':
no_lines_flag = true;
break;
case 'o':
spec_outfile = optarg;
break;
case 'p':
spec_name_prefix = optarg;
break;
case 'r':
FLAGS_ARGMATCH (report, optarg, report_all);
break;
case 't':
muscle_percent_define_insert ("parse.trace",
loc,
muscle_keyword, "",
MUSCLE_PERCENT_DEFINE_D);
break;
case 'u':
update_flag = true;
feature_flag |= feature_syntax_only;
break;
case 'v':
report_flag |= report_states;
break;
case 'x':
xml_flag = true;
if (optarg)
{
free (spec_xml_file);
spec_xml_file = xstrdup (optarg);
}
break;
case 'y':
warning_argmatch ("yacc", 0, 0);
set_yacc (loc);
break;
case COLOR_OPTION:
/* Handled in getargs_colors. */
break;
case HTML_OPTION:
html_flag = true;
xml_flag = true;
if (optarg)
{
free (spec_html_file);
spec_html_file = xstrdup (optarg);
}
break;
case FIXED_OUTPUT_FILES_OPTION:
complain (&loc, Wdeprecated,
_("deprecated option: %s, use %s"),
quote ("--fixed-output-files"), quote_n (1, "-o y.tab.c"));
spec_outfile = "y.tab.c";
break;
case LOCATIONS_OPTION:
muscle_percent_define_ensure ("locations", loc, true);
break;
case PRINT_LOCALEDIR_OPTION:
printf ("%s\n", LOCALEDIR);
exit (EXIT_SUCCESS);
case PRINT_DATADIR_OPTION:
printf ("%s\n", pkgdatadir ());
exit (EXIT_SUCCESS);
case REPORT_FILE_OPTION:
free (spec_verbose_file);
spec_verbose_file = xstrdup (optarg);
break;
case STYLE_OPTION:
/* Handled in getargs_colors. */
break;
default:
usage (EXIT_FAILURE);
}
}
if (argc - optind != 1)
{
if (argc - optind < 1)
error (0, 0, _("missing operand"));
else
error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
usage (EXIT_FAILURE);
}
grammar_file = uniqstr_new (argv[optind]);
MUSCLE_INSERT_C_STRING ("file_name", grammar_file);
}
void
tr (char *s, char from, char to)
{
for (; *s; ++s)
if (*s == from)
*s = to;
}
@@ -0,0 +1,148 @@
/* Parse command line arguments for bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef GETARGS_H_
# define GETARGS_H_
# include "location.h"
enum { command_line_prio, grammar_prio, default_prio };
/* flags set by % directives */
/* for -S */
extern char const *skeleton;
extern int skeleton_prio;
/* for -I */
extern char const *include;
extern bool header_flag; /* for -d/-H */
extern bool graph_flag; /* for -g */
extern bool html_flag; /* for --html */
extern bool xml_flag; /* for -x */
extern bool no_lines_flag; /* for -l */
extern bool token_table_flag; /* for -k */
extern location yacc_loc; /* for -y */
extern bool update_flag; /* for -u */
extern bool color_debug; /* --color=debug. */
/* GLR_PARSER is true if the input file says to use the GLR
(Generalized LR) parser, and to output some additional information
used by the GLR algorithm. */
extern bool glr_parser;
/* NONDETERMINISTIC_PARSER is true iff conflicts are accepted. This
is used by the GLR parser, and might be used in BackTracking
parsers too. */
extern bool nondeterministic_parser;
/* --language. */
struct bison_language
{
char language[sizeof "Java"];
char skeleton[sizeof "java-skel.m4"];
char src_extension[sizeof ".java"];
char header_extension[sizeof ".java"];
bool add_tab;
};
extern int language_prio;
extern struct bison_language const *language;
/*-----------.
| --report. |
`-----------*/
enum report
{
report_none = 0,
report_states = 1 << 0,
report_itemsets = 1 << 1,
report_lookaheads = 1 << 2,
report_solved_conflicts = 1 << 3,
report_cex = 1 << 4,
report_all = ~0
};
/** What appears in the *.output file. */
extern int report_flag;
/*----------.
| --trace. |
`----------*/
enum trace
{
trace_none = 0, /**< No traces. */
trace_scan = 1 << 0, /**< Grammar scanner traces. */
trace_parse = 1 << 1, /**< Grammar parser traces. */
trace_resource = 1 << 2, /**< Memory allocation. */
trace_sets = 1 << 3, /**< Grammar sets: firsts, nullable etc. */
trace_bitsets = 1 << 4, /**< Use of bitsets. */
trace_tools = 1 << 5, /**< m4 invocation. */
trace_automaton = 1 << 6, /**< Construction of the automaton. */
trace_grammar = 1 << 7, /**< Reading, reducing the grammar. */
trace_time = 1 << 8, /**< Time consumption. */
trace_skeleton = 1 << 9, /**< Skeleton postprocessing. */
trace_m4_early = 1 << 10, /**< M4 early traces. */
trace_m4 = 1 << 11, /**< M4 traces. */
trace_muscles = 1 << 12, /**< M4 definitions of the muscles. */
trace_ielr = 1 << 13, /**< IELR conversion. */
trace_closure = 1 << 14, /**< Input/output of closure(). */
trace_locations = 1 << 15, /**< Full display of locations. */
trace_cex = 1 << 16, /**< Counterexample generation */
trace_all = ~0 /**< All of the above. */
};
/** What debug items bison displays during its run. */
extern int trace_flag;
/*-------------.
| --features. |
`-------------*/
enum feature
{
feature_none = 0, /**< No additional feature. */
feature_caret = 1 << 0, /**< Output errors with carets. */
feature_fixit = 1 << 1, /**< Issue instructions to fix the sources. */
feature_syntax_only = 1 << 2, /**< Don't generate output. */
feature_all = ~0 /**< All above features. */
};
/** What additional features to use. */
extern int feature_flag;
/** Process the command line arguments.
*
* \param argc size of \a argv
* \param argv list of arguments.
*/
void getargs (int argc, char *argv[]);
/* Used by parse-gram.y. */
void language_argmatch (char const *arg, int prio, location loc);
void skeleton_arg (const char *arg, int prio, location loc);
void set_yacc (location loc);
/** In the string \c s, replace all characters \c from by \c to. */
void tr (char *s, char from, char to);
#endif /* !GETARGS_H_ */
@@ -0,0 +1,93 @@
/* Graphical symbols.
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "glyphs.h"
#include <assert.h>
#include <attribute.h>
#include <stdbool.h>
#include <string.h>
#include <mbswidth.h>
#include <unicodeio.h>
glyph_buffer_t arrow;
int arrow_width;
glyph_buffer_t down_arrow;
int down_arrow_width;
glyph_buffer_t dot;
int dot_width;
glyph_buffer_t empty;
int empty_width;
const char *derivation_separator = " ";
int derivation_separator_width = 1;
typedef struct
{
glyph_buffer_t *pbuf;
const char *fallback;
} callback_arg_t;
static long
on_success (const char *buf, size_t buflen, void *callback_arg)
{
callback_arg_t *arg = (callback_arg_t *) callback_arg;
assert (buflen + 1 < sizeof *arg->pbuf);
*stpncpy (*arg->pbuf, buf, buflen) = '\0';
return 1;
}
static long
on_failure (unsigned code MAYBE_UNUSED, const char *msg MAYBE_UNUSED,
void *callback_arg)
{
callback_arg_t *arg = (callback_arg_t *) callback_arg;
assert (strlen (arg->fallback) + 1 < sizeof *arg->pbuf);
strcpy (*arg->pbuf, arg->fallback);
return 0;
}
static bool
glyph_set (glyph_buffer_t *glyph, int *width,
unsigned code, const char *fallback)
{
callback_arg_t arg = { glyph, fallback };
int res = unicode_to_mb (code, on_success, on_failure, &arg);
*width = mbswidth (*glyph, 0);
return res;
}
void
glyphs_init (void)
{
glyph_set (&arrow, &arrow_width, 0x2192, "->");
glyph_set (&dot, &dot_width, 0x2022, ".");
glyph_set (&down_arrow, &down_arrow_width, 0x21b3, "`->");
glyph_set (&empty, &empty_width, 0x03b5, "%empty");
strncat (down_arrow, " ", sizeof down_arrow - strlen (down_arrow) - 1);
down_arrow_width += 1;
}
@@ -0,0 +1,50 @@
/* Graphical symbols.
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef GLYPHS_H
# define GLYPHS_H
/* Initialize the following variables. */
void glyphs_init (void);
/* In gnulib/lib/unicodeio.h unicode_to_mb uses a buffer of 25 bytes.
In down_arrow, we append one space. */
typedef char glyph_buffer_t[26];
/* "→", separates the lhs of a rule from its rhs. */
extern glyph_buffer_t arrow;
extern int arrow_width;
/* "•", a point in an item (aka, a dotted rule). */
extern glyph_buffer_t dot;
extern int dot_width;
/* "↳ ", below an lhs to announce the rhs. */
extern glyph_buffer_t down_arrow;
extern int down_arrow_width;
/* "ε", an empty rhs. */
extern glyph_buffer_t empty;
extern int empty_width;
/* " ", separate symbols in the rhs of a derivation. */
extern const char *derivation_separator;
extern int derivation_separator_width;
#endif /* GLYPHS_H */
+356
View File
@@ -0,0 +1,356 @@
/* Allocate input grammar variables for Bison.
Copyright (C) 1984, 1986, 1989, 2001-2003, 2005-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "complain.h"
#include "getargs.h"
#include "glyphs.h"
#include "gram.h"
#include "print-xml.h"
#include "reader.h"
#include "reduce.h"
#include "symtab.h"
/* Comments for these variables are in gram.h. */
item_number *ritem = NULL;
int nritems = 0;
rule *rules = NULL;
rule_number nrules = 0;
symbol **symbols = NULL;
int nsyms = 0;
int ntokens = 1;
int nnterms = 0;
symbol_number *token_translations = NULL;
int max_code = 256;
int required_version = 0;
void
item_print (item_number *item, rule const *previous_rule, FILE *out)
{
rule const *r = item_rule (item);
rule_lhs_print (r, previous_rule ? previous_rule->lhs : NULL, out);
if (0 <= *r->rhs)
{
// Non-empty rhs.
for (item_number *sp = r->rhs; sp < item; sp++)
fprintf (out, " %s", symbols[*sp]->tag);
fprintf (out, " %s", dot);
for (item_number *sp = item; 0 <= *sp; ++sp)
fprintf (out, " %s", symbols[*sp]->tag);
}
else
fprintf (out, " %s %s", empty, dot);
}
bool
rule_useful_in_grammar_p (rule const *r)
{
return r->number < nrules;
}
bool
rule_useless_in_grammar_p (rule const *r)
{
return !rule_useful_in_grammar_p (r);
}
bool
rule_useless_in_parser_p (rule const *r)
{
return !r->useful && rule_useful_in_grammar_p (r);
}
bool
rule_useless_chain_p (rule const *r)
{
return rule_rhs_length (r) == 1 && !r->action;
}
void
rule_lhs_print (rule const *r, sym_content const *previous_lhs, FILE *out)
{
fprintf (out, " %3d ", r->number);
if (previous_lhs != r->lhs)
fprintf (out, "%s:", r->lhs->symbol->tag);
else
fprintf (out, "%*s|", (int) strlen (previous_lhs->symbol->tag), "");
}
void
rule_lhs_print_xml (rule const *r, FILE *out, int level)
{
xml_printf (out, level, "<lhs>%s</lhs>", r->lhs->symbol->tag);
}
size_t
rule_rhs_length (rule const *r)
{
size_t res = 0;
for (item_number *rhsp = r->rhs; 0 <= *rhsp; ++rhsp)
++res;
return res;
}
void
rule_rhs_print (rule const *r, FILE *out)
{
if (0 <= *r->rhs)
for (item_number *rhsp = r->rhs; 0 <= *rhsp; ++rhsp)
fprintf (out, " %s", symbols[*rhsp]->tag);
else
fprintf (out, " %s", empty);
}
static void
rule_rhs_print_xml (rule const *r, FILE *out, int level)
{
if (*r->rhs >= 0)
{
xml_puts (out, level, "<rhs>");
for (item_number *rhsp = r->rhs; 0 <= *rhsp; ++rhsp)
xml_printf (out, level + 1, "<symbol>%s</symbol>",
xml_escape (symbols[*rhsp]->tag));
xml_puts (out, level, "</rhs>");
}
else
{
xml_puts (out, level, "<rhs>");
xml_puts (out, level + 1, "<empty/>");
xml_puts (out, level, "</rhs>");
}
}
void
rule_print (rule const *r, rule const *prev_rule, FILE *out)
{
rule_lhs_print (r, prev_rule ? prev_rule->lhs : NULL, out);
rule_rhs_print (r, out);
}
void
ritem_print (FILE *out)
{
fputs ("RITEM\n", out);
bool first = true;
for (int i = 0; i < nritems; ++i)
{
if (first)
{
fprintf (out, " %d: ", i);
first = false;
}
if (ritem[i] >= 0)
fprintf (out, " %s", symbols[ritem[i]]->tag);
else
{
fprintf (out, " (rule %d)\n", item_number_as_rule_number (ritem[i]));
first = true;
}
}
fputs ("\n\n", out);
}
size_t
ritem_longest_rhs (void)
{
int max = 0;
for (rule_number r = 0; r < nrules; ++r)
{
size_t length = rule_rhs_length (&rules[r]);
if (length > max)
max = length;
}
return max;
}
void
grammar_rules_partial_print (FILE *out, const char *title,
rule_filter filter)
{
bool first = true;
rule *previous_rule = NULL;
/* rule # : LHS -> RHS */
for (rule_number r = 0; r < nrules + nuseless_productions; r++)
{
if (filter && !filter (&rules[r]))
continue;
if (first)
fprintf (out, "%s\n\n", title);
else if (previous_rule && previous_rule->lhs != rules[r].lhs)
putc ('\n', out);
first = false;
rule_print (&rules[r], previous_rule, out);
putc ('\n', out);
previous_rule = &rules[r];
}
if (!first)
fputs ("\n\n", out);
}
void
grammar_rules_print (FILE *out)
{
grammar_rules_partial_print (out, _("Grammar"), rule_useful_in_grammar_p);
}
void
grammar_rules_print_xml (FILE *out, int level)
{
bool first = true;
for (rule_number r = 0; r < nrules + nuseless_productions; r++)
{
if (first)
xml_puts (out, level + 1, "<rules>");
first = false;
{
char const *usefulness
= rule_useless_in_grammar_p (&rules[r]) ? "useless-in-grammar"
: rule_useless_in_parser_p (&rules[r]) ? "useless-in-parser"
: "useful";
xml_indent (out, level + 2);
fprintf (out, "<rule number=\"%d\" usefulness=\"%s\"",
rules[r].number, usefulness);
if (rules[r].precsym)
fprintf (out, " percent_prec=\"%s\"",
xml_escape (rules[r].precsym->symbol->tag));
fputs (">\n", out);
}
rule_lhs_print_xml (&rules[r], out, level + 3);
rule_rhs_print_xml (&rules[r], out, level + 3);
xml_puts (out, level + 2, "</rule>");
}
if (!first)
xml_puts (out, level + 1, "</rules>");
else
xml_puts (out, level + 1, "<rules/>");
}
static void
section (FILE *out, const char *s)
{
fprintf (out, "%s\n", s);
for (int i = strlen (s); 0 < i; --i)
putc ('-', out);
putc ('\n', out);
putc ('\n', out);
}
void
grammar_dump (FILE *out, const char *title)
{
fprintf (out, "%s\n\n", title);
fprintf (out,
"ntokens = %d, nnterms = %d, nsyms = %d, nrules = %d, nritems = %d\n\n",
ntokens, nnterms, nsyms, nrules, nritems);
section (out, "Tokens");
{
fprintf (out, "Value Sprec Sassoc Tag\n");
for (symbol_number i = 0; i < ntokens; i++)
fprintf (out, "%5d %5d %5d %s\n",
i,
symbols[i]->content->prec, symbols[i]->content->assoc,
symbols[i]->tag);
fprintf (out, "\n\n");
}
section (out, "Nonterminals");
{
fprintf (out, "Value Tag\n");
for (symbol_number i = ntokens; i < nsyms; i++)
fprintf (out, "%5d %s\n",
i, symbols[i]->tag);
fprintf (out, "\n\n");
}
section (out, "Rules");
{
fprintf (out,
"Num (Prec, Assoc, Useful, UselessChain) Lhs"
" -> (Ritem Range) Rhs\n");
for (rule_number i = 0; i < nrules + nuseless_productions; ++i)
{
rule const *rule_i = &rules[i];
int const rhs_itemno = rule_i->rhs - ritem;
int length = rule_rhs_length (rule_i);
aver (item_number_as_rule_number (rule_i->rhs[length]) == i);
fprintf (out, "%3d (%2d, %2d, %2s, %2s) %2d -> (%2u-%2u)",
i,
rule_i->prec ? rule_i->prec->prec : 0,
rule_i->prec ? rule_i->prec->assoc : 0,
rule_i->useful ? "t" : "f",
rule_useless_chain_p (rule_i) ? "t" : "f",
rule_i->lhs->number,
rhs_itemno, rhs_itemno + length - 1);
/* Dumped the RHS. */
for (item_number *rhsp = rule_i->rhs; 0 <= *rhsp; ++rhsp)
fprintf (out, " %3d", *rhsp);
putc ('\n', out);
}
}
fprintf (out, "\n\n");
section (out, "Rules interpreted");
for (rule_number r = 0; r < nrules + nuseless_productions; ++r)
{
fprintf (out, "%-5d %s:", r, rules[r].lhs->symbol->tag);
rule_rhs_print (&rules[r], out);
putc ('\n', out);
}
fprintf (out, "\n\n");
}
void
grammar_rules_useless_report (const char *message)
{
for (rule_number r = 0; r < nrules; ++r)
/* Don't complain about rules whose LHS is useless, we already
complained about it. */
if (!reduce_nonterminal_useless_in_grammar (rules[r].lhs)
&& !rules[r].useful)
complain (&rules[r].location, Wother, "%s", message);
}
void
grammar_free (void)
{
if (ritem)
free (ritem - 1);
free (rules);
free (token_translations);
/* Free the symbol table data structure. */
symbols_free ();
free_merger_functions ();
}
+330
View File
@@ -0,0 +1,330 @@
/* Data definitions for internal representation of Bison's input.
Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2021
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef GRAM_H_
# define GRAM_H_
/* Representation of the grammar rules:
NTOKENS is the number of tokens, and NNTERMS is the number of
nonterminals (aka variables). NSYMS is the total number, NTOKENS +
NNTERMS.
Each symbol (either token or nterm) receives a symbol number.
Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
are for nterms. Symbol number zero is the end-of-input token.
This token is counted in ntokens. The true number of token values
assigned is NTOKENS reduced by one for each alias declaration.
The rules receive rule numbers 1 to NRULES in the order they are
written. More precisely Bison augments the grammar with the
initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
all the user rules are 2, 3 etc. Each time a rule number is
presented to the user, we subtract 1, so *displayed* rule numbers
are 0, 1, 2...
Internally, we cannot use the number 0 for a rule because for
instance RITEM stores both symbols (the RHS) and rule numbers: the
symbols are integers >= 0, and rule numbers are stored negative.
Therefore 0 cannot be used, since it would be both the rule number
0, and the token $end.
Actions are accessed via the rule number.
The rules themselves are described by several arrays: amongst which
RITEM, and RULES.
RULES is an array of rules, whose members are:
RULES[R].lhs -- the symbol of the left hand side of rule R.
RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
RULES[R].prec -- the symbol providing the precedence level of R.
RULES[R].precsym -- the symbol attached (via %prec) to give its
precedence to R. Of course, if set, it is equal to 'prec', but we
need to distinguish one from the other when reducing: a symbol used
in a %prec is not useless.
RULES[R].assoc -- the associativity of R.
RULES[R].dprec -- the dynamic precedence level of R (for GLR
parsing).
RULES[R].merger -- index of merging function for R (for GLR
parsing).
RULES[R].line -- the line where R was defined.
RULES[R].useful -- whether the rule is used. False if thrown away
by reduce().
The right hand side of rules is stored as symbol numbers in a
portion of RITEM.
The length of the portion is one greater than the number of symbols
in the rule's right hand side. The last element in the portion
contains -R, which identifies it as the end of a portion and says
which rule it is for.
The portions of RITEM come in order of increasing rule number.
NRITEMS is the total length of RITEM. Each element of RITEM is
called an "item" of type item_number and its index in RITEM is an
item_index.
Item numbers are used in the finite state machine to represent
places that parsing can get to.
SYMBOLS[I]->prec records the precedence level of each symbol.
Precedence levels are assigned in increasing order starting with 1
so that numerically higher precedence values mean tighter binding
as they ought to. Zero as a symbol or rule's precedence means none
is assigned.
Associativities are recorded similarly in SYMBOLS[I]->assoc. */
# include "system.h"
# include "location.h"
# include "symtab.h"
# define ISTOKEN(i) ((i) < ntokens)
# define ISVAR(i) ((i) >= ntokens)
extern int nsyms;
extern int ntokens;
extern int nnterms;
/* Elements of ritem. */
typedef int item_number;
# define ITEM_NUMBER_MAX INT_MAX
extern item_number *ritem;
extern int nritems;
/* Indices into ritem. */
typedef unsigned int item_index;
/* There is weird relationship between OT1H item_number and OTOH
symbol_number and rule_number: we store the latter in
item_number. symbol_number values are stored as-is, while
the negation of (rule_number + 1) is stored.
Therefore, a symbol_number must be a valid item_number, and we
sometimes have to perform the converse transformation. */
static inline item_number
symbol_number_as_item_number (symbol_number sym)
{
return sym;
}
static inline symbol_number
item_number_as_symbol_number (item_number i)
{
return i;
}
static inline bool
item_number_is_symbol_number (item_number i)
{
return i >= 0;
}
/* Rule numbers. */
typedef int rule_number;
# define RULE_NUMBER_MAX INT_MAX
static inline item_number
rule_number_as_item_number (rule_number r)
{
return -1 - r;
}
static inline rule_number
item_number_as_rule_number (item_number i)
{
return -1 - i;
}
static inline bool
item_number_is_rule_number (item_number i)
{
return i < 0;
}
/*--------.
| Rules. |
`--------*/
typedef struct
{
/* The number of the rule in the source. It is usually the index in
RULES too, except if there are useless rules. */
rule_number code;
/* The index in RULES. Usually the rule number in the source,
except if some rules are useless. */
rule_number number;
sym_content *lhs;
item_number *rhs;
/* This symbol provides both the associativity, and the precedence. */
sym_content *prec;
int dprec;
int merger;
/* This symbol was attached to the rule via %prec. */
sym_content *precsym;
/* Location of the rhs. */
location location;
bool useful;
bool is_predicate;
/* Counts of the numbers of expected conflicts for this rule, or -1 if none
given. */
int expected_sr_conflicts;
int expected_rr_conflicts;
const char *action;
location action_loc;
} rule;
/* The used rules (size NRULES). */
extern rule *rules;
extern rule_number nrules;
/* Get the rule associated to this item. ITEM points inside RITEM. */
static inline rule const *
item_rule (item_number const *item)
{
item_number const *sp = item;
while (!item_number_is_rule_number (*sp))
++sp;
rule_number r = item_number_as_rule_number (*sp);
return &rules[r];
}
/* Pretty-print this ITEM (as in the report). ITEM points inside
RITEM. PREVIOUS_RULE is used to see if the lhs is common, in which
case LHS is factored. Passing NULL is fine. */
void item_print (item_number *item, rule const *previous_rule,
FILE *out);
/*--------.
| Rules. |
`--------*/
/* A function that selects a rule. */
typedef bool (*rule_filter) (rule const *);
/* Whether is an accepting rule (i.e., its reduction terminates
parsing with success). */
static inline bool
rule_is_initial (rule const *r)
{
/* In the case of multistart, we need to check whether the LHS is
$accept. In the case of "unistart", it would suffice to
check whether this is rule number 0. */
return r->lhs == acceptsymbol->content;
}
/* Whether the rule has a 'number' smaller than NRULES. That is, it
is useful in the grammar. */
bool rule_useful_in_grammar_p (rule const *r);
/* Whether the rule has a 'number' higher than NRULES. That is, it is
useless in the grammar. */
bool rule_useless_in_grammar_p (rule const *r);
/* Whether the rule is not flagged as useful but is useful in the
grammar. In other words, it was discarded because of conflicts. */
bool rule_useless_in_parser_p (rule const *r);
/* Whether the rule has a single RHS, and no user action. */
bool rule_useless_chain_p (rule const *r);
/* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was
already displayed (by a previous call for another rule), avoid
useless repetitions. */
void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
FILE *out);
void rule_lhs_print_xml (rule const *r, FILE *out, int level);
/* The length of the RHS. */
size_t rule_rhs_length (rule const *r);
/* Print this rule's RHS on OUT. */
void rule_rhs_print (rule const *r, FILE *out);
/* Print this rule on OUT. If a PREVIOUS_RULE was already displayed,
avoid useless repetitions of their LHS. */
void rule_print (rule const *r, rule const *prev_rule, FILE *out);
/* Table of the symbols, indexed by the symbol number. */
extern symbol **symbols;
/* TOKEN_TRANSLATION -- a table indexed by a token number as returned
by the user's yylex routine, it yields the internal token number
used by the parser and throughout bison. */
extern symbol_number *token_translations;
extern int max_code;
/* Dump RITEM for traces. */
void ritem_print (FILE *out);
/* The size of the longest rule RHS. */
size_t ritem_longest_rhs (void);
/* Print the grammar's rules that match FILTER on OUT under TITLE. */
void grammar_rules_partial_print (FILE *out, const char *title,
rule_filter filter);
/* Print the grammar's useful rules on OUT. */
void grammar_rules_print (FILE *out);
/* Print all of the grammar's rules with a "usefulness" attribute. */
void grammar_rules_print_xml (FILE *out, int level);
/* Dump the grammar. */
void grammar_dump (FILE *out, const char *title);
/* Report on STDERR the rules that are not flagged USEFUL, using the
MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
reduction, or 'rule useless in parser due to conflicts' after conflicts
were taken into account). */
void grammar_rules_useless_report (const char *message);
/* Free the packed grammar. */
void grammar_free (void);
/* The version %required by the grammar file, as an int (100 * major +
minor). 0 if unspecified. */
extern int required_version;
#endif /* !GRAM_H_ */
@@ -0,0 +1,218 @@
/* Output Graphviz specification of a state machine generated by Bison.
Copyright (C) 2006-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Written by Paul Eggert and Satya Kiran Popuri. */
#include <config.h>
#include "system.h"
#include <quotearg.h>
#include "files.h"
#include "gram.h"
#include "graphviz.h"
#include "tables.h"
/* Return an unambiguous printable representation for NAME, suitable
for C strings. Use slot 2 since the user may use slots 0 and 1. */
static char *
quote (char const *name)
{
return quotearg_n_style (2, c_quoting_style, name);
}
void
start_graph (FILE *fout)
{
fprintf (fout,
_("// Generated by %s.\n"
"// Report bugs to <%s>.\n"
"// Home page: <%s>.\n"
"\n"),
PACKAGE_STRING,
PACKAGE_BUGREPORT,
PACKAGE_URL);
fprintf (fout,
"digraph %s\n"
"{\n",
quote (grammar_file));
fprintf (fout,
" node [fontname = courier, shape = box, colorscheme = paired6]\n"
" edge [fontname = courier]\n"
"\n");
}
void
output_node (int id, char const *label, FILE *fout)
{
fprintf (fout, " %d [label=\"%s\"]\n", id, label);
}
void
output_edge (int source, int destination, char const *label,
char const *style, FILE *fout)
{
fprintf (fout, " %d -> %d [style=%s", source, destination, style);
if (label)
{
fputs (" label=\"", fout);
for (const char *cp = label; *cp; ++cp)
switch (*cp)
{
case '"': fputs ("\\\"", fout); break;
case '\\': fputs ("\\\\", fout); break;
default: fputc (*cp, fout); break;
}
fputc ('"', fout);
}
fputs ("]\n", fout);
}
static void
no_reduce_bitset_init (state const *s, bitset *no_reduce_set)
{
*no_reduce_set = bitset_create (ntokens, BITSET_FIXED);
bitset_zero (*no_reduce_set);
{
int n;
FOR_EACH_SHIFT (s->transitions, n)
bitset_set (*no_reduce_set, TRANSITION_SYMBOL (s->transitions, n));
}
for (int n = 0; n < s->errs->num; ++n)
if (s->errs->symbols[n])
bitset_set (*no_reduce_set, s->errs->symbols[n]->content->number);
}
/* Show the reductions from state SOURCE on rule RULENO. */
static void
conclude_red (struct obstack *out, int source, rule_number ruleno,
bool enabled, bool first, FILE *fout)
{
/* If no lookahead tokens were valid transitions, this reduction is
actually hidden, so cancel everything. */
if (first)
(void) obstack_finish0 (out);
else
{
char const *ed = enabled ? "" : "d";
/* First, build the edge's head. The name of reduction nodes is "nRm",
with n the source state and m the rule number. This is because we
don't want all the reductions bearing a same rule number to point to
the same state, since that is not the desired format. */
fprintf (fout, " %d -> \"%dR%d%s\" [",
source, source, ruleno, ed);
/* (The lookahead tokens have been added to the beginning of the
obstack, in the caller function.) */
if (! obstack_empty_p (out))
{
char *label = obstack_finish0 (out);
fprintf (fout, "label=\"[%s]\", ", label);
obstack_free (out, label);
}
/* Then, the edge's tail. */
fprintf (fout, "style=solid]\n");
/* Build the associated diamond representation of the target rule. */
fprintf (fout, " \"%dR%d%s\" [label=\"",
source, ruleno, ed);
bool const final = rule_is_initial (&rules[ruleno]);
if (final)
fprintf (fout, "Acc");
else
fprintf (fout, "R%d", ruleno);
char const *color
= !enabled ? "5"
: final ? "1"
: "3";
fprintf (fout, "\", fillcolor=%s, shape=diamond, style=filled]\n",
color);
}
}
static bool
print_token (struct obstack *out, bool first, char const *tok)
{
if (! first)
obstack_sgrow (out, ", ");
obstack_backslash (out, tok);
return false;
}
void
output_red (state const *s, reductions const *reds, FILE *fout)
{
bitset no_reduce_set;
no_reduce_bitset_init (s, &no_reduce_set);
rule *default_reduction
= yydefact[s->number] ? &rules[yydefact[s->number] - 1] : NULL;
/* Two obstacks are needed: one for the enabled reductions, and one
for the disabled reductions, because in the end we want two
separate edges, even though in most cases only one will actually
be printed. */
struct obstack dout;
struct obstack eout;
obstack_init (&dout);
obstack_init (&eout);
const int source = s->number;
for (int j = 0; j < reds->num; ++j)
{
bool defaulted = default_reduction && default_reduction == reds->rules[j];
/* Build the lookahead tokens lists, one for enabled transitions
and one for disabled transitions. */
bool firstd = true;
bool firste = true;
rule_number ruleno = reds->rules[j]->number;
if (reds->lookaheads)
for (int i = 0; i < ntokens; i++)
if (bitset_test (reds->lookaheads[j], i))
{
if (bitset_test (no_reduce_set, i))
firstd = print_token (&dout, firstd, symbols[i]->tag);
else
{
if (! defaulted)
firste = print_token (&eout, firste, symbols[i]->tag);
bitset_set (no_reduce_set, i);
}
}
/* Do the actual output. */
conclude_red (&dout, source, ruleno, false, firstd, fout);
conclude_red (&eout, source, ruleno, true, firste && !defaulted, fout);
}
obstack_free (&dout, 0);
obstack_free (&eout, 0);
bitset_free (no_reduce_set);
}
void
finish_graph (FILE *fout)
{
fputs ("}\n", fout);
}
@@ -0,0 +1,66 @@
/* Output Graphviz specification of a state machine generated by Bison.
Copyright (C) 2006, 2010-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Written by Paul Eggert and Satya Kiran Popuri. */
#ifndef GRAPHVIZ_H_
# define GRAPHVIZ_H_
# include "state.h"
/** Begin a Dot graph.
*
* \param fout output stream.
*/
void start_graph (FILE *fout);
/** Output a Dot node.
*
* \param id identifier of the node
* \param label human readable label of the node (no Dot escaping needed).
* \param fout output stream.
*/
void output_node (int id, char const *label, FILE *fout);
/** Output a Dot edge.
* \param source id of the source node
* \param destination id of the target node
* \param label human readable label of the edge
* (no Dot escaping needed). Can be 0.
* \param style Dot style of the edge (e.g., "dotted" or "solid").
* \param fout output stream.
*/
void output_edge (int source, int destination, char const *label,
char const *style, FILE *fout);
/** Output a reduction.
* \param s current state
* \param reds the set of reductions
* \param fout output stream.
*/
void output_red (state const *s, reductions const *reds, FILE *fout);
/** End a Dot graph.
*
* \param fout output stream.
*/
void finish_graph (FILE *fout);
#endif /* ! GRAPHVIZ_H_ */
@@ -0,0 +1,37 @@
/* This file is not compiled in, it is used only to expose more
strings to gettextize.
Why is this needed?
Bison emits strings to translate in the generated code, for builtin
tokens. So they appear only in generated parsers, which are not
shipped, so they are not in the src tree, so we cannot use them in
our POTFILE.
Except src/parse-gram.c, which is in the source tree. And even in
the git repo. But to avoid useless diffs in the repo, we do not
keep the #line directives in the src tree. Yet, for the user, we
ship a src/parse-gram.c _with_ the #lines. This is done in a
dist-hook which regenerates src/parse-gram.c when we run "make
dist".
Unfortunately, then, update-po traverses the whole tree and sees
that the location of the strings to translate in src/parse-gram.c
have changed, so the bison.pot is to be updated. And that is not
possible in the "make dist" which is run within "make distcheck"
(not the one preparing the dist for distcheck, the one run by
distcheck to check that a distributed tarball can build a tarball)
because then the src tree is read-only.
So let's not put src/parse-gram.c in the POTFILE, and expose these
strings to gettextize by hand.
*/
// Please syntax-check.
#include <config.h>
static const char *const msgid[] =
{
N_("end of file"),
N_("invalid token")
}
File diff suppressed because it is too large Load Diff
+46
View File
@@ -0,0 +1,46 @@
/* IELR main implementation.
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef IELR_H_
# define IELR_H_
# include <bitset.h>
# include "state.h"
/**
* \pre
* - \c ::states is of size \c ::nstates and defines an LR(0) parser
* for the users's grammar.
* - \c ::ntokens is the number of tokens in the grammar.
* \post
* - \c ::states is of size \c ::nstates (which might be greater than
* <tt>::nstates \@pre</tt>) and defines the type of parser specified by
* the value of the \c \%define variable \c lr.type. Its value can be:
* - \c "lalr".
* - \c "ielr".
* - \c "canonical-lr".
*/
void ielr (void);
bool ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
symbol_number lookahead, state ***predecessors,
bitset **item_lookahead_sets);
#endif /* !IELR_H_ */
+624
View File
@@ -0,0 +1,624 @@
/* Compute lookahead criteria for Bison.
Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Find which rules need lookahead in each state, and which lookahead
tokens they accept. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include <bitsetv.h>
#include "complain.h"
#include "derives.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "lr0.h"
#include "muscle-tab.h"
#include "nullable.h"
#include "reader.h"
#include "relation.h"
#include "symtab.h"
goto_number *goto_map = NULL;
goto_number ngotos = 0;
state_number *from_state = NULL;
state_number *to_state = NULL;
bitsetv goto_follows = NULL;
/* Linked list of goto numbers. */
typedef struct goto_list
{
struct goto_list *next;
goto_number value;
} goto_list;
static goto_list *
goto_list_new (goto_number value, struct goto_list *next)
{
goto_list *res = xmalloc (sizeof *res);
res->next = next;
res->value = value;
return res;
}
/* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
LArule[l] is applicable in the appropriate state when the next
token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
it is a conflict. */
static bitsetv LA = NULL;
size_t nLA;
/* "(p, A) includes (p', B)" iff
B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
Definition p.621 [DeRemer 1982].
INCLUDES[(p, A)] = [(p', B),...] */
static goto_number **includes;
/* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
Definition p.621 [DeRemer 1982]. */
static goto_list **lookback;
static void
goto_print (goto_number i, FILE *out)
{
const state_number src = from_state[i];
const state_number dst = to_state[i];
symbol_number var = states[dst]->accessing_symbol;
fprintf (out,
"goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
}
void
set_goto_map (void)
{
/* Count the number of gotos (ngotos) per nterm (goto_map). */
if (trace_flag & trace_automaton)
fprintf (stderr, "nnterms: %d\n", nnterms);
goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
ngotos = 0;
for (state_number s = 0; s < nstates; ++s)
{
transitions *trans = states[s]->transitions;
for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
{
ngotos++;
/* Abort if (ngotos + 1) would overflow. */
aver (ngotos != GOTO_NUMBER_MAXIMUM);
goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
}
}
goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
{
goto_number k = 0;
for (symbol_number i = ntokens; i < nsyms; ++i)
{
temp_map[i - ntokens] = k;
k += goto_map[i - ntokens];
}
for (symbol_number i = ntokens; i < nsyms; ++i)
goto_map[i - ntokens] = temp_map[i - ntokens];
goto_map[nsyms - ntokens] = ngotos;
temp_map[nsyms - ntokens] = ngotos;
}
from_state = xcalloc (ngotos, sizeof *from_state);
to_state = xcalloc (ngotos, sizeof *to_state);
for (state_number s = 0; s < nstates; ++s)
{
const transitions *trans = states[s]->transitions;
for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
{
goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
from_state[k] = s;
to_state[k] = trans->states[i]->number;
}
}
free (temp_map);
if (trace_flag & trace_automaton)
{
for (int i = 0; i < nnterms; ++i)
fprintf (stderr, "goto_map[%d (%s)] = %ld .. %ld\n",
i, symbols[ntokens + i]->tag,
goto_map[i], goto_map[i+1] - 1);
for (int i = 0; i < ngotos; ++i)
{
goto_print (i, stderr);
fputc ('\n', stderr);
}
}
}
goto_number
map_goto (state_number src, symbol_number sym)
{
goto_number low = goto_map[sym - ntokens];
assert (goto_map[sym - ntokens] != goto_map[sym - ntokens + 1]);
goto_number high = goto_map[sym - ntokens + 1] - 1;
for (;;)
{
aver (low <= high);
goto_number middle = (low + high) / 2;
state_number s = from_state[middle];
if (s == src)
return middle;
else if (s < src)
low = middle + 1;
else
high = middle - 1;
}
}
/* Print FOLLOWS for debugging. */
static void
follows_print (const char* title, FILE *out)
{
fprintf (out, "%s:\n", title);
for (goto_number i = 0; i < ngotos; ++i)
{
fputs (" FOLLOWS[", out);
goto_print (i, out);
fputs ("] =", out);
bitset_iterator iter;
symbol_number sym;
BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
fprintf (out, " %s", symbols[sym]->tag);
fputc ('\n', out);
}
fputc ('\n', out);
}
/* Build goto_follows. */
static void
initialize_goto_follows (void)
{
goto_number **reads = xnmalloc (ngotos, sizeof *reads);
goto_number *edge = xnmalloc (ngotos, sizeof *edge);
goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
for (goto_number i = 0; i < ngotos; ++i)
{
state_number dst = to_state[i];
const transitions *trans = states[dst]->transitions;
int j;
FOR_EACH_SHIFT (trans, j)
bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
/* Gotos outgoing from DST. */
goto_number nedges = 0;
for (; j < trans->num; ++j)
{
symbol_number sym = TRANSITION_SYMBOL (trans, j);
if (nullable[sym - ntokens])
{
assert (nedges < ngotos);
edge[nedges++] = map_goto (dst, sym);
}
}
if (nedges == 0)
reads[i] = NULL;
else
{
reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
memcpy (reads[i], edge, nedges * sizeof edge[0]);
reads[i][nedges] = END_NODE;
}
}
if (trace_flag & trace_automaton)
{
follows_print ("follows after shifts", stderr);
relation_print ("reads", reads, ngotos, goto_print, stderr);
}
relation_digraph (reads, ngotos, goto_follows);
if (trace_flag & trace_automaton)
follows_print ("follows after read", stderr);
for (goto_number i = 0; i < ngotos; ++i)
free (reads[i]);
free (reads);
free (edge);
}
/* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
static const state *
lookback_find_state (int lookback_index)
{
state *res = NULL;
for (int j = 0; j < nstates; ++j)
if (states[j]->reductions
&& states[j]->reductions->lookaheads)
{
if (states[j]->reductions->lookaheads - LA > lookback_index)
/* Went too far. */
break;
else
res = states[j];
}
/* Pacify "potential null pointer dereference" warning. */
if (!res)
abort ();
return res;
}
/* Print LOOKBACK for debugging. */
static void
lookback_print (FILE *out)
{
fputs ("lookback:\n", out);
for (int i = 0; i < nLA; ++i)
if (lookback[i])
{
fprintf (out, " %3d = ", i);
const state *s = lookback_find_state (i);
int rnum = i - (s->reductions->lookaheads - LA);
const rule *r = s->reductions->rules[rnum];
fprintf (out, "(%3d, ", s->number);
rule_print (r, NULL, out);
fputs (") ->", out);
for (goto_list *sp = lookback[i]; sp; sp = sp->next)
{
fputc (' ', out);
goto_print (sp->value, out);
}
fputc ('\n', out);
}
fputc ('\n', out);
}
/* Add (S, R) -> GOTONO to LOOKBACK.
"(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
The goto number GOTONO, whose source is S (which is
inconsistent), */
static void
add_lookback_edge (state *s, rule const *r, goto_number gotono)
{
int ri = state_reduction_find (s, r);
int idx = (s->reductions->lookaheads - LA) + ri;
lookback[idx] = goto_list_new (gotono, lookback[idx]);
}
/* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
[DeRemer 1982]. */
static void
build_relations (void)
{
goto_number *edge = xnmalloc (ngotos, sizeof *edge);
state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
includes = xnmalloc (ngotos, sizeof *includes);
/* For each goto (from SRC to DST labeled by nterm VAR), iterate
over each rule with VAR as LHS, and find the path PATH from SRC
labeled with the RHS of the rule. */
for (goto_number i = 0; i < ngotos; ++i)
{
const state_number src = from_state[i];
const state_number dst = to_state[i];
symbol_number var = states[dst]->accessing_symbol;
/* Size of EDGE. */
int nedges = 0;
for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
{
rule const *r = *rulep;
state *s = states[src];
path[0] = s->number;
/* Length of PATH. */
int length = 1;
for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
{
symbol_number sym = item_number_as_symbol_number (*rp);
s = transitions_to (s, sym);
path[length++] = s->number;
}
/* S is the end of PATH. */
if (!s->consistent)
add_lookback_edge (s, r, i);
/* Walk back PATH from penultimate to beginning.
The "0 <= p" part is actually useless: each rhs ends in a
rule number (for which ISVAR(...) is false), and there is
a sentinel (ritem[-1]=0) before the first rhs. */
for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
{
symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
goto_number g = map_goto (path[p], sym);
/* Insert G if not already in EDGE.
FIXME: linear search. A bitset instead? */
{
bool found = false;
for (int j = 0; !found && j < nedges; ++j)
found = edge[j] == g;
if (!found)
{
assert (nedges < ngotos);
edge[nedges++] = g;
}
}
if (!nullable[sym - ntokens])
break;
}
}
if (trace_flag & trace_automaton)
{
goto_print (i, stderr);
fputs (" edges = ", stderr);
for (int j = 0; j < nedges; ++j)
{
fputc (' ', stderr);
goto_print (edge[j], stderr);
}
fputc ('\n', stderr);
}
if (nedges == 0)
includes[i] = NULL;
else
{
includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
for (int j = 0; j < nedges; ++j)
includes[i][j] = edge[j];
includes[i][nedges] = END_NODE;
}
}
free (edge);
free (path);
relation_transpose (&includes, ngotos);
if (trace_flag & trace_automaton)
relation_print ("includes", includes, ngotos, goto_print, stderr);
}
/* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
static void
compute_follows (void)
{
relation_digraph (includes, ngotos, goto_follows);
if (trace_flag & trace_sets)
follows_print ("follows after includes", stderr);
for (goto_number i = 0; i < ngotos; ++i)
free (includes[i]);
free (includes);
}
static void
compute_lookaheads (void)
{
if (trace_flag & trace_automaton)
lookback_print (stderr);
for (size_t i = 0; i < nLA; ++i)
for (goto_list *sp = lookback[i]; sp; sp = sp->next)
bitset_or (LA[i], LA[i], goto_follows[sp->value]);
/* Free LOOKBACK. */
for (size_t i = 0; i < nLA; ++i)
LIST_FREE (goto_list, lookback[i]);
free (lookback);
}
/*------------------------------------------------------.
| Count the number of lookahead tokens required for S. |
`------------------------------------------------------*/
static int
state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
{
const reductions *reds = s->reductions;
const transitions *trans = s->transitions;
/* Transitions are only disabled during conflict resolution, and that
hasn't happened yet, so there should be no need to check that
transition 0 hasn't been disabled before checking if it is a shift.
However, this check was performed at one time, so we leave it as an
aver. */
aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
/* We need a lookahead either to distinguish different reductions
(i.e., there are two or more), or to distinguish a reduction from a
shift. Otherwise, it is straightforward, and the state is
'consistent'. However, do not treat a state with any reductions as
consistent unless it is the accepting state (because there is never
a lookahead token that makes sense there, and so no lookahead token
should be read) if the user has otherwise disabled default
reductions. */
s->consistent =
!(reds->num > 1
|| (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
|| (reds->num == 1 && !rule_is_initial (reds->rules[0])
&& default_reduction_only_for_accept));
return s->consistent ? 0 : reds->num;
}
/*----------------------------------------------.
| Compute LA, NLA, and the lookaheads members. |
`----------------------------------------------*/
void
initialize_LA (void)
{
bool default_reduction_only_for_accept;
{
char *default_reductions =
muscle_percent_define_get ("lr.default-reduction");
default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
free (default_reductions);
}
/* Compute the total number of reductions requiring a lookahead. */
nLA = 0;
for (state_number i = 0; i < nstates; ++i)
nLA += state_lookaheads_count (states[i],
default_reduction_only_for_accept);
/* Avoid having to special case 0. */
if (!nLA)
nLA = 1;
bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
/* Initialize the members LOOKAHEADS for each state whose reductions
require lookahead tokens. */
for (state_number i = 0; i < nstates; ++i)
{
int count = state_lookaheads_count (states[i],
default_reduction_only_for_accept);
if (count)
{
states[i]->reductions->lookaheads = pLA;
pLA += count;
}
}
}
/*---------------------------------------------.
| Output the lookahead tokens for each state. |
`---------------------------------------------*/
static void
lookaheads_print (FILE *out)
{
fputs ("Lookaheads:\n", out);
for (state_number i = 0; i < nstates; ++i)
{
const reductions *reds = states[i]->reductions;
if (reds->num)
{
fprintf (out, " State %d:\n", i);
for (int j = 0; j < reds->num; ++j)
{
fprintf (out, " rule %d:", reds->rules[j]->number);
if (reds->lookaheads)
{
bitset_iterator iter;
int k;
BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
fprintf (out, " %s", symbols[k]->tag);
}
fputc ('\n', out);
}
}
}
fputc ('\n', out);
}
void
lalr (void)
{
if (trace_flag & trace_automaton)
{
fputc ('\n', stderr);
begin_use_class ("trace0", stderr);
fprintf (stderr, "lalr: begin");
end_use_class ("trace0", stderr);
fputc ('\n', stderr);
}
initialize_LA ();
set_goto_map ();
initialize_goto_follows ();
lookback = xcalloc (nLA, sizeof *lookback);
build_relations ();
compute_follows ();
compute_lookaheads ();
if (trace_flag & trace_sets)
lookaheads_print (stderr);
if (trace_flag & trace_automaton)
{
begin_use_class ("trace0", stderr);
fprintf (stderr, "lalr: done");
end_use_class ("trace0", stderr);
fputc ('\n', stderr);
}
}
void
lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
{
goto_number ngotos_reachable = 0;
symbol_number nonterminal = 0;
aver (nsyms == nnterms + ntokens);
for (goto_number i = 0; i < ngotos; ++i)
{
while (i == goto_map[nonterminal])
goto_map[nonterminal++] = ngotos_reachable;
/* If old_to_new[from_state[i]] = nstates_old, remove this goto
entry. */
if (old_to_new[from_state[i]] != nstates_old)
{
/* from_state[i] is not removed, so it and thus to_state[i] are
reachable, so to_state[i] != nstates_old. */
aver (old_to_new[to_state[i]] != nstates_old);
from_state[ngotos_reachable] = old_to_new[from_state[i]];
to_state[ngotos_reachable] = old_to_new[to_state[i]];
++ngotos_reachable;
}
}
while (nonterminal <= nnterms)
{
aver (ngotos == goto_map[nonterminal]);
goto_map[nonterminal++] = ngotos_reachable;
}
ngotos = ngotos_reachable;
}
void
lalr_free (void)
{
for (state_number s = 0; s < nstates; ++s)
states[s]->reductions->lookaheads = NULL;
bitsetv_free (LA);
}
+105
View File
@@ -0,0 +1,105 @@
/* Compute lookahead criteria for bison,
Copyright (C) 1984, 1986, 1989, 2000, 2002, 2004, 2006-2007,
2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef LALR_H_
# define LALR_H_
# include <bitset.h>
# include <bitsetv.h>
/* Import the definition of RULE_T. */
# include "gram.h"
/* Import the definition of CORE, TRANSITIONS and REDUCTIONS. */
# include "state.h"
/** Build the LALR(1) automaton.
Find which rules need lookahead in each state, and which lookahead
tokens they accept.
Also builds:
- #goto_map
- #from_state
- #to_state
- #goto_follows
*/
void lalr (void);
/**
* Set #nLA and allocate all reduction lookahead sets. Normally invoked by
* #lalr.
*/
void initialize_LA (void);
/**
* Build only:
* - #goto_map
* - #from_state
* - #to_state
* Normally invoked by #lalr.
*/
void set_goto_map (void);
/**
* Update state numbers recorded in #goto_map, #from_state, and #to_state such
* that:
* - \c nstates_old is the old number of states.
* - Where \c i is the old state number, <tt>old_to_new[i]</tt> is either:
* - \c nstates_old if state \c i is removed because it is unreachable.
* Thus, remove all goto entries involving this state.
* - The new state number.
*/
void lalr_update_state_numbers (state_number old_to_new[],
state_number nstates_old);
/** Release the information related to lookahead tokens.
Can be performed once the action tables are computed. */
void lalr_free (void);
typedef size_t goto_number;
# define GOTO_NUMBER_MAXIMUM ((goto_number) -1)
/** Index into #from_state and #to_state.
All the transitions that accept a particular variable are grouped
together in FROM_STATE and TO_STATE, with indexes from GOTO_MAP[I -
NTOKENS] to GOTO_MAP[I - NTOKENS + 1] - 1 (including both). */
extern goto_number *goto_map;
/** The size of #from_state and #to_state. */
extern goto_number ngotos;
/** State number which a transition leads from. */
extern state_number *from_state;
/** State number it leads to. */
extern state_number *to_state;
/** The number of the goto from state SRC labeled with nterm SYM. */
goto_number map_goto (state_number src, symbol_number sym);
/* goto_follows[i] is the set of tokens following goto i. */
extern bitsetv goto_follows;
#endif /* !LALR_H_ */
+162
View File
@@ -0,0 +1,162 @@
## Copyright (C) 2001-2015, 2018-2021 Free Software Foundation, Inc.
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <https://www.gnu.org/licenses/>.
CLEANDIRS += %D%/*.dSYM
bin_PROGRAMS = src/bison
# Prettify Automake-computed names of compiled objects.
src_bison_SHORTNAME = bison
src_bison_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
if RELOCATABLE_VIA_LD
src_bison_LDFLAGS = `$(RELOCATABLE_LDFLAGS) $(bindir)`
endif
src_bison_CFLAGS = $(AM_CFLAGS) $(WERROR_CFLAGS)
src_bison_SOURCES = \
src/AnnotationList.c \
src/AnnotationList.h \
src/InadequacyList.c \
src/InadequacyList.h \
src/Sbitset.c \
src/Sbitset.h \
src/assoc.c \
src/assoc.h \
src/closure.c \
src/closure.h \
src/complain.c \
src/complain.h \
src/conflicts.c \
src/conflicts.h \
src/counterexample.c \
src/counterexample.h \
src/derivation.c \
src/derivation.h \
src/derives.c \
src/derives.h \
src/files.c \
src/files.h \
src/fixits.c \
src/fixits.h \
src/flex-scanner.h \
src/getargs.c \
src/getargs.h \
src/glyphs.c \
src/glyphs.h \
src/gram.c \
src/gram.h \
src/graphviz.c \
src/graphviz.h \
src/ielr.c \
src/ielr.h \
src/lalr.c \
src/lalr.h \
src/location.c \
src/location.h \
src/lr0.c \
src/lr0.h \
src/lssi.c \
src/lssi.h \
src/main.c \
src/muscle-tab.c \
src/muscle-tab.h \
src/named-ref.c \
src/named-ref.h \
src/nullable.c \
src/nullable.h \
src/output.c \
src/output.h \
src/parse-gram.y \
src/parse-simulation.c \
src/parse-simulation.h \
src/print-graph.c \
src/print-graph.h \
src/print-xml.c \
src/print-xml.h \
src/print.c \
src/print.h \
src/reader.c \
src/reader.h \
src/reduce.c \
src/reduce.h \
src/relation.c \
src/relation.h \
src/scan-code-c.c \
src/scan-code.h \
src/scan-gram-c.c \
src/scan-gram.h \
src/scan-skel-c.c \
src/scan-skel.h \
src/state.c \
src/state.h \
src/state-item.c \
src/state-item.h \
src/strversion.c \
src/strversion.h \
src/symlist.c \
src/symlist.h \
src/symtab.c \
src/symtab.h \
src/system.h \
src/tables.c \
src/tables.h \
src/uniqstr.c \
src/uniqstr.h
EXTRA_src_bison_SOURCES = \
src/scan-code.l \
src/scan-gram.l \
src/scan-skel.l
BUILT_SOURCES += \
src/parse-gram.c \
src/parse-gram.h \
src/scan-code.c \
src/scan-gram.c \
src/scan-skel.c
# Although conceptually most of these guys would make more sense in the
# definition of libbison, beware that they might expand as flags such as
# `-lm`. Keep them here. Or use a Libtool convenience library.
src_bison_LDADD = \
lib/libbison.a \
$(ISNAND_LIBM) \
$(ISNANF_LIBM) \
$(ISNANL_LIBM) \
$(LDEXPL_LIBM) \
$(LDEXP_LIBM) \
$(LIBTHREAD) \
$(LIB_CLOCK_GETTIME) \
$(LIB_GETHRXTIME) \
$(LIB_HARD_LOCALE) \
$(LIB_MBRTOWC) \
$(LIB_SETLOCALE_NULL) \
$(LIBICONV) \
$(LIBINTL) \
$(LIBTEXTSTYLE)
EXTRA_DIST += %D%/i18n-strings.c
## ------ ##
## yacc. ##
## ------ ##
if ENABLE_YACC
nodist_bin_SCRIPTS = src/yacc
endif
EXTRA_SCRIPTS = src/yacc
MOSTLYCLEANFILES += src/yacc
@@ -0,0 +1,547 @@
/* Locations for Bison
Copyright (C) 2002, 2005-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <mbfile.h>
#include <mbswidth.h>
#include <quotearg.h>
#include <stdio.h> /* fileno */
#include <sys/ioctl.h>
#include <sys/stat.h> /* fstat */
#include <termios.h>
#ifdef WINSIZE_IN_PTEM
# include <sys/stream.h>
# include <sys/ptem.h>
#endif
#include "complain.h"
#include "getargs.h"
#include "location.h"
location const empty_loc = EMPTY_LOCATION_INIT;
/* The terminal width. Not less than 40. */
static int
columns (void)
{
const char *cp = getenv ("COLUMNS");
int res = 80;
if (cp && *cp)
{
long l = strtol (cp, NULL, 10);
res = 0 <= l && l <= INT_MAX ? l : INT_MAX;
}
else
{
#ifdef TIOCGWINSZ
struct winsize ws;
if (ioctl (STDERR_FILENO, TIOCGWINSZ, &ws) != -1
&& 0 < ws.ws_col && ws.ws_col == (size_t) ws.ws_col)
res = ws.ws_col;
#endif
}
return max_int (res, 40);
}
/* Available screen width. */
static int screen_width = 80;
/* The ellipsis symbol to use for this locale, and the number of
screen-columns it uses. */
static const char *ellipsis = "...";
static int ellipsize = 3;
/* If BUF is null, add BUFSIZE (which in this case must be less than
INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
COLUMN. If an overflow occurs, return INT_MAX. */
static inline int
add_column_width (int column, char const *buf, size_t bufsize)
{
int width
= buf ? mbsnwidth (buf, bufsize, 0)
: INT_MAX <= bufsize ? INT_MAX
: bufsize;
return column <= INT_MAX - width ? column + width : INT_MAX;
}
static void
boundary_compute (boundary *cur, char const *token, size_t size)
{
int line = cur->line;
int column = cur->column;
int byte = cur->byte;
char const *p0 = token;
char const *p = token;
char const *lim = token + size;
for (p = token; p < lim; ++p)
switch (*p)
{
case '\n':
line += line < INT_MAX;
column = 1;
byte = 1;
p0 = p + 1;
break;
case '\t':
column = add_column_width (column, p0, p - p0);
column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
p0 = p + 1;
byte += byte < INT_MAX;
break;
default:
byte += byte < INT_MAX;
break;
}
column = add_column_width (column, p0, p - p0);
cur->line = line;
cur->column = column;
cur->byte = byte;
}
/* Set *LOC and adjust scanner cursor to account for token TOKEN of
size SIZE. */
void
location_compute (location *loc, boundary *cur, char const *token, size_t size)
{
loc->start = *cur;
boundary_compute (cur, token, size);
loc->end = *cur;
if (loc->end.line == INT_MAX && loc->start.line != INT_MAX)
complain (loc, Wother, _("line number overflow"));
if (loc->end.column == INT_MAX && loc->start.column != INT_MAX)
complain (loc, Wother, _("column number overflow"));
/* TRANSLATORS: we are counting bytes, and there are too many. */
if (loc->end.byte == INT_MAX && loc->start.byte != INT_MAX)
complain (loc, Wother, _("byte number overflow"));
}
static int
boundary_print (boundary const *b, FILE *out)
{
return fprintf (out, "%s:%d.%d@%d",
quotearg_n_style (3, escape_quoting_style, b->file),
b->line, b->column, b->byte);
}
int
location_print (location loc, FILE *out)
{
int res = 0;
if (location_empty (loc))
res += fprintf (out, "(empty location)");
else if (trace_flag & trace_locations)
{
res += boundary_print (&loc.start, out);
res += fprintf (out, "-");
res += boundary_print (&loc.end, out);
}
else
{
aver (loc.start.file);
aver (loc.end.file);
int end_col = 0 != loc.end.column ? loc.end.column - 1 : 0;
res += fprintf (out, "%s",
quotearg_n_style (3, escape_quoting_style, loc.start.file));
if (0 < loc.start.line)
{
res += fprintf (out, ":%d", loc.start.line);
if (0 < loc.start.column)
res += fprintf (out, ".%d", loc.start.column);
}
if (loc.start.file != loc.end.file)
{
res += fprintf (out, "-%s",
quotearg_n_style (3, escape_quoting_style,
loc.end.file));
if (0 < loc.end.line)
{
res += fprintf (out, ":%d", loc.end.line);
if (0 <= end_col)
res += fprintf (out, ".%d", end_col);
}
}
else if (0 < loc.end.line)
{
if (loc.start.line < loc.end.line)
{
res += fprintf (out, "-%d", loc.end.line);
if (0 <= end_col)
res += fprintf (out, ".%d", end_col);
}
else if (0 <= end_col && loc.start.column < end_col)
res += fprintf (out, "-%d", end_col);
}
}
return res;
}
/* Persistent data used by location_caret to avoid reopening and rereading the
same file all over for each error. */
static struct
{
/* Raw input file. */
FILE *file;
/* Input file as a stream of multibyte characters. */
mb_file_t mbfile;
/* The position within the last file we quoted. If POS.FILE is non
NULL, but FILE is NULL, it means this file is special and should
not be quoted. */
boundary pos;
/* Offset in FILE of the current line (i.e., where line POS.LINE
starts). */
size_t offset;
/* Length of the current line. */
int line_len;
/* Given the initial column to display, the offset (number of
characters to skip at the beginning of the line). */
int skip;
/* Available width to quote the source file. Eight chars are
consumed by the left-margin (with line number). */
int width;
} caret_info;
void caret_init (void)
{
screen_width = columns ();
/* TRANSLATORS: This is used when a line is too long, and is
displayed truncated. Use an ellipsis appropriate for your
language, remembering that "…" (U+2026 HORIZONTAL ELLIPSIS)
sometimes misdisplays and that "..." (three ASCII periods) is a
safer choice in some locales. */
ellipsis = _("...");
ellipsize = mbswidth (ellipsis, 0);
}
void
caret_free (void)
{
if (caret_info.file)
{
fclose (caret_info.file);
caret_info.file = NULL;
}
}
/* Open FILE for quoting, if needed, and if possible. Return whether
the file can quoted. */
static bool
caret_set_file (const char *file)
{
/* If a different file than before, close and let the rest open
the new one. */
if (caret_info.pos.file && caret_info.pos.file != file)
{
caret_free ();
caret_info.pos.file = NULL;
}
if (!caret_info.pos.file)
{
caret_info.pos.file = file;
if ((caret_info.file = fopen (caret_info.pos.file, "r")))
{
/* If the file is not regular (imagine #line 1 "/dev/stdin"
in the input file for instance), don't try to quote the
file. Keep caret_info.file set so that we don't try to
open it again, but leave caret_info.file NULL so that we
don't try to quote it. */
struct stat buf;
if (fstat (fileno (caret_info.file), &buf) == 0
&& buf.st_mode & S_IFREG)
{
caret_info.pos.line = 1;
mbf_init (caret_info.mbfile, caret_info.file);
}
else
caret_free ();
}
}
return !!caret_info.file;
}
/* Getc, but smash \r\n as \n. */
static void
caret_getc_internal (mbchar_t *res)
{
mbf_getc (*res, caret_info.mbfile);
if (mb_iseq (*res, '\r'))
{
mbchar_t c;
mbf_getc (c, caret_info.mbfile);
if (mb_iseq (c, '\n'))
mb_copy (res, &c);
else
mbf_ungetc (c, caret_info.mbfile);
}
}
#define caret_getc(Var) caret_getc_internal(&Var)
/* Move CARET_INFO (which has a valid FILE) to the line number LINE.
Compute and cache that line's length in CARET_INFO.LINE_LEN.
Return whether successful. */
static bool
caret_set_line (int line)
{
/* If the line we want to quote is seekable (the same line as the previous
location), just seek it. If it was a previous line, we lost track of it,
so return to the start of file. */
if (line < caret_info.pos.line)
{
caret_info.pos.line = 1;
caret_info.offset = 0;
}
if (fseek (caret_info.file, caret_info.offset, SEEK_SET))
return false;
/* If this is the same line as the previous one, we are done. */
if (line < caret_info.pos.line)
return true;
/* Advance to the line's position, keeping track of the offset. */
while (caret_info.pos.line < line)
{
mbchar_t c;
caret_getc (c);
if (mb_iseof (c))
/* Something is wrong, that line number does not exist. */
return false;
caret_info.pos.line += mb_iseq (c, '\n');
}
caret_info.offset = ftell (caret_info.file);
caret_info.pos.column = 1;
/* Reset mbf's internal state.
FIXME: should be done in mbfile. */
caret_info.mbfile.eof_seen = 0;
/* Find the number of columns of this line. */
while (true)
{
mbchar_t c;
caret_getc (c);
if (mb_iseof (c) || mb_iseq (c, '\n'))
break;
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
}
caret_info.line_len = caret_info.pos.column;
/* Go back to the beginning of line. */
if (fseek (caret_info.file, caret_info.offset, SEEK_SET))
return false;
/* Reset mbf's internal state.
FIXME: should be done in mbfile. */
caret_info.mbfile.eof_seen = 0;
caret_info.pos.column = 1;
return true;
}
/* Compute CARET_INFO.WIDTH and CARET_INFO.SKIP based on the fact that
the first column to display in the current line is COL. */
static bool
caret_set_column (int col)
{
/* Available width. Eight chars are consumed by the left-margin
(with line number). */
caret_info.width = screen_width - 8;
caret_info.skip = 0;
if (caret_info.width < caret_info.line_len)
{
/* We cannot quote the whole line. Make sure we can see the
beginning of the location. */
caret_info.skip = caret_info.width < col ? col - 10 : 0;
}
/* If we skip the initial part, we insert "..." before. */
if (caret_info.skip)
caret_info.width -= ellipsize;
/* If the end of line does not fit, we also need to truncate the
end, and leave "..." there. */
if (caret_info.width < caret_info.line_len - caret_info.skip)
caret_info.width -= ellipsize;
return true;
}
void
location_caret (location loc, const char *style, FILE *out)
{
if (!(feature_flag & feature_caret))
return;
if (!loc.start.line)
return;
if (!caret_set_file (loc.start.file))
return;
if (!caret_set_line (loc.start.line))
return;
if (!caret_set_column (loc.start.column))
return;
const int width = caret_info.width;
const int skip = caret_info.skip;
/* Read the actual line. Don't update the offset, so that we keep a pointer
to the start of the line. */
{
mbchar_t c;
caret_getc (c);
if (!mb_iseof (c))
{
/* The last column to highlight. Only the first line of
multiline locations are quoted, in which case the ending
column is the end of line.
We used to work with byte offsets, and that was much
easier. However, we went back to using (visual) columns to
support truncating of long lines. */
const int col_end
= loc.start.line == loc.end.line
? loc.end.column
: caret_info.line_len;
/* Quote the file (at most the first line in the case of
multiline locations). */
{
fprintf (out, "%5d | %s", loc.start.line, skip ? ellipsis : "");
/* Whether we opened the style. If the line is not as
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
enum { before, inside, after } state = before;
while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
// We might have already opened (and even closed!) the
// style and yet have the equality of the columns if we
// just saw zero-width characters.
if (state == before
&& caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
state = inside;
}
if (skip < caret_info.pos.column)
mb_putc (c, out);
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
caret_getc (c);
if (state == inside
&& (caret_info.pos.column == col_end
|| width < caret_info.pos.column - skip))
{
end_use_class (style, out);
state = after;
}
if (width < caret_info.pos.column - skip)
{
fputs (ellipsis, out);
break;
}
}
if (state == inside)
{
// The line is shorter than expected.
end_use_class (style, out);
state = after;
}
putc ('\n', out);
}
/* Print the carets with the same indentation as above. */
{
fprintf (out, " | %*s",
loc.start.column - 1 - skip + (skip ? ellipsize : 0), "");
begin_use_class (style, out);
putc ('^', out);
/* Underlining a multiline location ends with the first
line. */
for (int i = loc.start.column - 1 - skip + 1,
i_end = min_int (col_end - 1 - skip, width);
i < i_end; ++i)
putc ('~', out);
end_use_class (style, out);
putc ('\n', out);
}
}
}
}
void
location_caret_suggestion (location loc, const char *s, FILE *out)
{
if (!(feature_flag & feature_caret))
return;
const char *style = "fixit-insert";
fprintf (out, " | %*s",
loc.start.column - 1 - caret_info.skip
+ (caret_info.skip ? ellipsize : 0),
"");
begin_use_class (style, out);
fputs (s, out);
end_use_class (style, out);
putc ('\n', out);
}
bool
location_empty (location loc)
{
return !loc.start.file && !loc.start.line && !loc.start.column
&& !loc.end.file && !loc.end.line && !loc.end.column;
}
static inline int
str_to_int (const char *s)
{
long l = strtol (s, NULL, 10);
return l < 0 ? -1 : l <= INT_MAX ? l : INT_MAX;
}
void
boundary_set_from_string (boundary *bound, char *str)
{
/* Search backwards: the file name may contain '.' or ':'. */
char *at = strrchr (str, '@');
if (at)
{
*at = '\0';
bound->byte = str_to_int (at + 1);
}
{
char *dot = strrchr (str, '.');
aver (dot);
*dot = '\0';
bound->column = str_to_int (dot + 1);
if (!at)
bound->byte = bound->column;
}
{
char *colon = strrchr (str, ':');
aver (colon);
*colon = '\0';
bound->line = str_to_int (colon + 1);
}
bound->file = uniqstr_new (str);
}
@@ -0,0 +1,154 @@
/* Locations for Bison
Copyright (C) 2002, 2004-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef LOCATION_H_
# define LOCATION_H_
# include <stdbool.h>
# include <stdio.h>
# include <string.h> /* strcmp */
# include "uniqstr.h"
/* A boundary between two characters. */
typedef struct
{
/* The name of the file that contains the boundary. */
uniqstr file;
/* If positive, the line (starting at 1) that contains the boundary.
If this is INT_MAX, the line number has overflowed.
Meaningless and not displayed if nonpositive.
*/
int line;
/* If positive, the column (starting at 1) just after the boundary.
This is neither a byte count, nor a character count; it is a
(visual) column count. If this is INT_MAX, the column number has
overflowed.
Meaningless and not displayed if nonpositive. */
int column;
/* If nonnegative, the byte number (starting at 0) in the current
line. Not displayed (unless --trace=location). */
int byte;
} boundary;
/* Set the position of \a p. */
static inline void
boundary_set (boundary *p, const char *f, int l, int c, int b)
{
p->file = f;
p->line = l;
p->column = c;
p->byte = b;
}
/* Return -1, 0, 1, depending whether a is before, equal, or
after b. */
static inline int
boundary_cmp (boundary a, boundary b)
{
/* Locations with no file first. */
int res =
a.file && b.file ? strcmp (a.file, b.file)
: a.file ? 1
: b.file ? -1
: 0;
if (!res)
res = a.line - b.line;
if (!res)
res = a.column - b.column;
return res;
}
/* Return nonzero if A and B are equal boundaries. */
static inline bool
equal_boundaries (boundary a, boundary b)
{
return (a.column == b.column
&& a.line == b.line
&& UNIQSTR_EQ (a.file, b.file));
}
/* A location, that is, a region of source code. */
typedef struct
{
/* Boundary just before the location starts. */
boundary start;
/* Boundary just after the location ends. */
boundary end;
} location;
# define GRAM_LTYPE location
# define EMPTY_LOCATION_INIT {{NULL, 0, 0, 0}, {NULL, 0, 0, 0}}
extern location const empty_loc;
/* Set *LOC and adjust scanner cursor to account for token TOKEN of
size SIZE. */
void location_compute (location *loc,
boundary *cur, char const *token, size_t size);
/* Print location to file.
Return number of actually printed characters.
Warning: uses quotearg's slot 3. */
int location_print (location loc, FILE *out);
/* Prepare the use of location_caret. */
void caret_init (void);
/* Free any allocated resources and close any open file handles that are
left-over by the usage of location_caret. */
void caret_free (void);
/* If -fcaret is enabled, quote the line containing LOC onto OUT.
Highlight the part of LOC with the color STYLE. */
void location_caret (location loc, const char* style, FILE *out);
/* If -fcaret is enabled, display a suggestion of replacement for LOC
with S. To call after location_caret. */
void location_caret_suggestion (location loc, const char *s, FILE *out);
/* Return -1, 0, 1, depending whether a is before, equal, or
after b. */
static inline int
location_cmp (location a, location b)
{
int res = boundary_cmp (a.start, b.start);
if (!res)
res = boundary_cmp (a.end, b.end);
return res;
}
/* Whether this is the empty location. */
bool location_empty (location loc);
/* STR must be formatted as 'file:line.column@byte' or 'file:line.column'.
It may be '<command line>:3.-1@-1', with -1 to denote no-column/no-byte.
STR will be modified. */
void boundary_set_from_string (boundary *bound, char *str);
#endif /* ! defined LOCATION_H_ */
+433
View File
@@ -0,0 +1,433 @@
/* Generate the LR(0) parser states for Bison.
Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* See comments in state.h for the data structures that represent it.
The entry point is generate_states. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include "closure.h"
#include "complain.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "lr0.h"
#include "reader.h"
#include "reduce.h"
#include "state.h"
#include "symtab.h"
typedef struct state_list
{
struct state_list *next;
state *state;
} state_list;
static state_list *first_state = NULL;
static state_list *last_state = NULL;
/* Print CORE for debugging. */
static void
core_print (size_t core_size, item_index *core, FILE *out)
{
for (int i = 0; i < core_size; ++i)
{
item_print (ritem + core[i], NULL, out);
fputc ('\n', out);
}
}
/*-----------------------------------------------------------------.
| A state was just discovered by transitioning on SYM from another |
| state. Queue this state for later examination, in order to find |
| its outgoing transitions. Return it. |
`-----------------------------------------------------------------*/
static state *
state_list_append (symbol_number sym, size_t core_size, item_index *core)
{
state_list *node = xmalloc (sizeof *node);
state *res = state_new (sym, core_size, core);
if (trace_flag & trace_automaton)
fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
nstates, sym, symbols[sym]->tag);
node->next = NULL;
node->state = res;
if (!first_state)
first_state = node;
if (last_state)
last_state->next = node;
last_state = node;
return res;
}
/* Symbols that can be "shifted" (including nonterminals) from the
current state. */
bitset shift_symbol;
static rule **redset;
/* For the current state, the list of pointers to states that can be
reached via a shift/goto. Could be indexed by the reaching symbol,
but labels of incoming transitions can be recovered by the state
itself. */
static state **shiftset;
/* KERNEL_BASE[symbol-number] -> list of item indices (offsets inside
RITEM) of length KERNEL_SIZE[symbol-number]. */
static item_index **kernel_base;
static int *kernel_size;
/* A single dimension array that serves as storage for
KERNEL_BASE. */
static item_index *kernel_items;
static void
allocate_itemsets (void)
{
/* Count the number of occurrences of all the symbols in RITEMS.
Note that useless productions (hence useless nonterminals) are
browsed too, hence we need to allocate room for _all_ the
symbols. */
size_t count = 0;
size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
sizeof *symbol_count);
for (rule_number r = 0; r < nrules; ++r)
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
{
symbol_number sym = item_number_as_symbol_number (*rhsp);
count += 1;
symbol_count[sym] += 1;
}
/* See comments before new_itemsets. All the vectors of items
live inside KERNEL_ITEMS. The number of active items after
some symbol S cannot be more than the number of times that S
appears as an item, which is SYMBOL_COUNT[S].
We allocate that much space for each symbol. */
kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
kernel_items = xnmalloc (count, sizeof *kernel_items);
count = 0;
for (symbol_number i = 0; i < nsyms; i++)
{
kernel_base[i] = kernel_items + count;
count += symbol_count[i];
}
free (symbol_count);
kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
}
/* Print the current kernel (in KERNEL_BASE). */
static void
kernel_print (FILE *out)
{
for (symbol_number i = 0; i < nsyms; ++i)
if (kernel_size[i])
{
fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
core_print (kernel_size[i], kernel_base[i], out);
}
}
/* Make sure the kernel is in sane state. */
static void
kernel_check (void)
{
for (symbol_number i = 0; i < nsyms - 1; ++i)
assert (kernel_base[i] + kernel_size[i] <= kernel_base[i + 1]);
}
static void
allocate_storage (void)
{
allocate_itemsets ();
shiftset = xnmalloc (nsyms, sizeof *shiftset);
redset = xnmalloc (nrules, sizeof *redset);
state_hash_new ();
shift_symbol = bitset_create (nsyms, BITSET_FIXED);
}
static void
free_storage (void)
{
bitset_free (shift_symbol);
free (redset);
free (shiftset);
free (kernel_base);
free (kernel_size);
free (kernel_items);
state_hash_free ();
}
/*------------------------------------------------------------------.
| Find which term/nterm symbols can be "shifted" in S, and for each |
| one record which items would be active after that transition. |
| Uses the contents of itemset. |
| |
| shift_symbol is a bitset of the term/nterm symbols that can be |
| shifted. For each symbol in the grammar, kernel_base[symbol] |
| points to a vector of item numbers activated if that symbol is |
| shifted, and kernel_size[symbol] is their numbers. |
| |
| itemset is sorted on item index in ritem, which is sorted on rule |
| number. Compute each kernel_base[symbol] with the same sort. |
`------------------------------------------------------------------*/
static void
new_itemsets (state *s)
{
if (trace_flag & trace_automaton)
fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
memset (kernel_size, 0, nsyms * sizeof *kernel_size);
bitset_zero (shift_symbol);
if (trace_flag & trace_automaton)
{
fprintf (stderr, "initial kernel:\n");
kernel_print (stderr);
}
for (size_t i = 0; i < nitemset; ++i)
if (item_number_is_symbol_number (ritem[itemset[i]]))
{
if (trace_flag & trace_automaton)
{
fputs ("working on: ", stderr);
item_print (ritem + itemset[i], NULL, stderr);
fputc ('\n', stderr);
}
symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
bitset_set (shift_symbol, sym);
kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
kernel_size[sym]++;
}
if (trace_flag & trace_automaton)
{
fprintf (stderr, "final kernel:\n");
kernel_print (stderr);
fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
}
kernel_check ();
}
/*--------------------------------------------------------------.
| Find the state we would get to (from the current state) by |
| shifting SYM. Create a new state if no equivalent one exists |
| already. Used by append_states. |
`--------------------------------------------------------------*/
static state *
get_state (symbol_number sym, size_t core_size, item_index *core)
{
if (trace_flag & trace_automaton)
{
fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
sym, symbols[sym]->tag);
core_print (core_size, core, stderr);
fputc ('\n', stderr);
}
state *s = state_hash_lookup (core_size, core);
if (!s)
s = state_list_append (sym, core_size, core);
if (trace_flag & trace_automaton)
fprintf (stderr, "Exiting get_state => %d\n", s->number);
return s;
}
/*---------------------------------------------------------------.
| Use the information computed by new_itemsets to find the state |
| numbers reached by each shift transition from S. |
| |
| SHIFTSET is set up as a vector of those states. |
`---------------------------------------------------------------*/
static void
append_states (state *s)
{
if (trace_flag & trace_automaton)
fprintf (stderr, "append_states: begin: state = %d\n", s->number);
bitset_iterator iter;
symbol_number sym;
int i = 0;
BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
{
shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
++i;
}
if (trace_flag & trace_automaton)
fprintf (stderr, "append_states: end: state = %d\n", s->number);
}
/*----------------------------------------------------------------.
| Find which rules can be used for reduction transitions from the |
| current state and make a reductions structure for the state to |
| record their rule numbers. |
`----------------------------------------------------------------*/
static void
save_reductions (state *s)
{
int count = 0;
/* Find and count the active items that represent ends of rules. */
for (size_t i = 0; i < nitemset; ++i)
{
item_number item = ritem[itemset[i]];
if (item_number_is_rule_number (item))
{
rule_number r = item_number_as_rule_number (item);
redset[count++] = &rules[r];
if (r == 0)
{
/* This is "reduce 0", i.e., accept. */
aver (!final_state);
final_state = s;
}
}
}
if (trace_flag & trace_automaton)
{
fprintf (stderr, "reduction[%d] = {\n", s->number);
for (int i = 0; i < count; ++i)
{
rule_print (redset[i], NULL, stderr);
fputc ('\n', stderr);
}
fputs ("}\n", stderr);
}
/* Make a reductions structure and copy the data into it. */
state_reductions_set (s, count, redset);
}
/*---------------.
| Build STATES. |
`---------------*/
static void
set_states (void)
{
states = xcalloc (nstates, sizeof *states);
while (first_state)
{
state_list *this = first_state;
/* Pessimization, but simplification of the code: make sure all
the states have valid transitions and reductions members,
even if reduced to 0. It is too soon for errs, which are
computed later, but set_conflicts. */
state *s = this->state;
if (!s->transitions)
state_transitions_set (s, 0, 0);
if (!s->reductions)
state_reductions_set (s, 0, 0);
states[s->number] = s;
first_state = this->next;
free (this);
}
first_state = NULL;
last_state = NULL;
}
/*-------------------------------------------------------------------.
| Compute the LR(0) parser states (see state.h for details) from the |
| grammar. |
`-------------------------------------------------------------------*/
void
generate_states (void)
{
allocate_storage ();
closure_new (nritems);
/* Create the initial state, whose accessing symbol (by convention)
is 0, aka $end. */
{
/* The items of its core: beginning of all the rules of $accept. */
kernel_size[0] = 0;
for (rule_number r = 0; r < nrules && rules[r].lhs->symbol == acceptsymbol; ++r)
kernel_base[0][kernel_size[0]++] = rules[r].rhs - ritem;
state_list_append (0, kernel_size[0], kernel_base[0]);
}
/* States are queued when they are created; process them all. */
for (state_list *list = first_state; list; list = list->next)
{
state *s = list->state;
if (trace_flag & trace_automaton)
fprintf (stderr, "Processing state %d (reached by %s)\n",
s->number,
symbols[s->accessing_symbol]->tag);
/* Set up itemset for the transitions out of this state. itemset gets a
vector of all the items that could be accepted next. */
closure (s->items, s->nitems);
/* Record the reductions allowed out of this state. */
save_reductions (s);
/* Find the itemsets of the states that shifts/gotos can reach. */
new_itemsets (s);
/* Find or create the core structures for those states. */
append_states (s);
/* Create the shifts structures for the shifts to those states,
now that the state numbers transitioning to are known. */
state_transitions_set (s, bitset_count (shift_symbol), shiftset);
}
/* discard various storage */
free_storage ();
/* Set up STATES. */
set_states ();
}
+26
View File
@@ -0,0 +1,26 @@
/* Generate the LR(0) parser states for Bison.
Copyright (C) 1984, 1986, 1989, 2000-2002, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef LR0_H_
# define LR0_H_
void generate_states (void);
#endif /* !LR0_H_ */
+373
View File
@@ -0,0 +1,373 @@
/* Lookahead sensitive state item searches for counterexample generation
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "lssi.h"
#include <gl_linked_list.h>
#include <gl_xlist.h>
#include <stdlib.h>
#include "getargs.h"
#include "nullable.h"
// Lookahead sensitive state item.
typedef struct lssi
{
state_item_number si;
struct lssi *parent;
// this is the precise lookahead set (follow_L from the CupEx paper)
bitset lookahead;
bool free_lookahead;
} lssi;
static lssi *
new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
{
lssi *res = xmalloc (sizeof *res);
res->si = si;
res->parent = p;
res->lookahead = l;
res->free_lookahead = free_l;
return res;
}
static void
lssi_free (lssi *sn)
{
if (sn == NULL)
return;
if (sn->free_lookahead)
bitset_free (sn->lookahead);
free (sn);
}
static size_t
lssi_hasher (lssi *sn, size_t max)
{
size_t hash = sn->si;
bitset_iterator biter;
symbol_number syn;
BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
hash += syn;
return hash % max;
}
static bool
lssi_comparator (lssi *s1, lssi *s2)
{
if (s1->si == s2->si)
{
if (s1->lookahead == s2->lookahead)
return true;
return bitset_equal_p (s1->lookahead, s2->lookahead);
}
return false;
}
typedef gl_list_t lssi_list;
static inline bool
append_lssi (lssi *sn, Hash_table *visited, lssi_list queue)
{
if (hash_lookup (visited, sn))
{
sn->free_lookahead = false;
lssi_free (sn);
return false;
}
hash_xinsert (visited, sn);
gl_list_add_last (queue, sn);
return true;
}
#if 0
static void
lssi_print (lssi *l)
{
FILE *out = stderr;
print_state_item (&state_items[l->si], out);
if (l->lookahead)
{
fprintf (out, "FOLLOWL = { ");
bitset_iterator biter;
symbol_number sin;
BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
fprintf (out, "%s, \n", symbols[sin]->tag);
fprintf (out, "}\n");
}
}
#endif
/**
* Compute the set of state-items that can reach the given conflict item via
* a combination of transitions or production steps.
*/
static bitset
eligible_state_items (state_item *target)
{
bitset result = bitset_create (nstate_items, BITSET_FIXED);
state_item_list queue =
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
(const void **) &target);
while (gl_list_size (queue) > 0)
{
state_item *si = (state_item *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
if (bitset_test (result, si - state_items))
continue;
bitset_set (result, si - state_items);
// search all reverse edges.
bitset rsi = si->revs;
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, rsi, sin, 0)
gl_list_add_last (queue, &state_items[sin]);
}
gl_list_free (queue);
return result;
}
/**
* Compute the shortest lookahead-sensitive path from the start state to
* this conflict. If optimized is true, only consider parser states
* that can reach the conflict state.
*/
state_item_list
shortest_path_from_start (state_item_number target, symbol_number next_sym)
{
bitset eligible = eligible_state_items (&state_items[target]);
Hash_table *visited = hash_initialize (32,
NULL,
(Hash_hasher) lssi_hasher,
(Hash_comparator) lssi_comparator,
(Hash_data_freer) lssi_free);
bitset il = bitset_create (nsyms, BITSET_FIXED);
bitset_set (il, 0);
lssi *init = new_lssi (0, NULL, il, true);
lssi_list queue = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
NULL, true);
append_lssi (init, visited, queue);
// breadth-first search
bool finished = false;
lssi *n;
while (gl_list_size (queue) > 0)
{
n = (lssi *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
state_item_number last = n->si;
if (target == last && bitset_test (n->lookahead, next_sym))
{
finished = true;
break;
}
state_item *si = &state_items[last];
// Transitions don't change follow_L
if (si->trans >= 0)
{
if (bitset_test (eligible, si->trans))
{
lssi *next = new_lssi (si->trans, n, n->lookahead, false);
append_lssi (next, visited, queue);
}
}
// For production steps, follow_L is based on the symbol after the
// nonterminal being produced.
// if no such symbol exists, follow_L is unchanged
// if the symbol is a terminal, follow_L only contains that terminal
// if the symbol is not nullable, follow_L is its FIRSTS set
// if the symbol is nullable, follow_L is its FIRSTS set unioned with
// this logic applied to the next symbol in the rule
if (si->prods)
{
// Compute follow_L as above
bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
item_number *pos = si->item + 1;
for (; !item_number_is_rule_number (*pos); ++pos)
{
item_number it = *pos;
if (ISTOKEN (it))
{
bitset_set (lookahead, it);
break;
}
else
{
bitset_union (lookahead, lookahead, FIRSTS (it));
if (!nullable[it - ntokens])
break;
}
}
if (item_number_is_rule_number (*pos))
bitset_union (lookahead, n->lookahead, lookahead);
bool lookahead_used = false;
// Try all possible production steps within this parser state.
bitset_iterator biter;
state_item_number nextSI;
BITSET_FOR_EACH (biter, si->prods, nextSI, 0)
{
if (!bitset_test (eligible, nextSI))
continue;
lssi *next = new_lssi (nextSI, n, lookahead,
!lookahead_used);
lookahead_used = append_lssi (next, visited, queue)
|| lookahead_used;
}
if (!lookahead_used)
bitset_free (lookahead);
}
}
bitset_free (eligible);
if (!finished)
{
gl_list_free (queue);
fputs ("Cannot find shortest path to conflict state.", stderr);
abort ();
}
state_item_list res =
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
for (lssi *sn = n; sn != NULL; sn = sn->parent)
gl_list_add_first (res, &state_items[sn->si]);
hash_free (visited);
gl_list_free (queue);
if (trace_flag & trace_cex)
{
fputs ("REDUCE ITEM PATH:\n", stderr);
gl_list_iterator_t it = gl_list_iterator (res);
const void *sip;
while (gl_list_iterator_next (&it, &sip, NULL))
state_item_print ((state_item *) sip, stderr, "");
}
return res;
}
/**
* Determine if the given terminal is in the given symbol set or can begin
* a nonterminal in the given symbol set.
*/
bool
intersect_symbol (symbol_number sym, bitset syms)
{
if (!syms)
return true;
bitset_iterator biter;
symbol_number sn;
BITSET_FOR_EACH (biter, syms, sn, 0)
{
if (sym == sn)
return true;
if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
return true;
}
return false;
}
/**
* Determine if any symbol in ts is in syms
* or can begin a nonterminal syms.
*/
bool
intersect (bitset ts, bitset syms)
{
if (!syms || !ts)
return true;
bitset_iterator biter;
symbol_number sn;
BITSET_FOR_EACH (biter, syms, sn, 0)
{
if (bitset_test (ts, sn))
return true;
if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
return true;
}
return false;
}
/**
* Compute a list of state_items that have a production to n with respect
* to its lookahead
*/
state_item_list
lssi_reverse_production (const state_item *si, bitset lookahead)
{
state_item_list result =
gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
if (SI_TRANSITION (si))
return result;
// A production step was made to the current lalr_item.
// Check that the next symbol in the parent lalr_item is
// compatible with the lookahead.
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, si->revs, sin, 0)
{
state_item *prevsi = &state_items[sin];
if (!production_allowed (prevsi, si))
continue;
bitset prev_lookahead = prevsi->lookahead;
if (item_number_is_rule_number (*(prevsi->item)))
{
// reduce item
// Check that some lookaheads can be preserved.
if (!intersect (prev_lookahead, lookahead))
continue;
}
else
{
// shift item
if (lookahead)
{
// Check that lookahead is compatible with the first
// possible symbols in the rest of the production.
// Alternatively, if the rest of the production is
// nullable, the lookahead must be compatible with
// the lookahead of the corresponding item.
bool applicable = false;
bool nlable = true;
for (item_number *pos = prevsi->item + 1;
!applicable && nlable && item_number_is_symbol_number (*pos);
++pos)
{
symbol_number next_sym = item_number_as_symbol_number (*pos);
if (ISTOKEN (next_sym))
{
applicable = intersect_symbol (next_sym, lookahead);
nlable = false;
}
else
{
applicable = intersect (FIRSTS (next_sym), lookahead);
if (!applicable)
nlable = nullable[next_sym - ntokens];
}
}
if (!applicable && !nlable)
continue;
}
}
gl_list_add_last (result, prevsi);
}
return result;
}
+57
View File
@@ -0,0 +1,57 @@
/* Lookahead sensitive state item searches for counterexample generation
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef LSSI_H
# define LSSI_H
# include "state-item.h"
/*
All state-item graph nodes should also include a precise follow set (follow_L).
However, ignoring follow_L saves a lot of memory and is a pretty good approximation.
These functions exist to enforce restrictions caused by follow_L sets.
*/
/*
* find shortest lookahead-sensitive path of state-items to target such that
* next_sym is in the follow_L set of target in that position.
*/
state_item_list shortest_path_from_start (state_item_number target,
symbol_number next_sym);
/**
* Determine if the given terminal is in the given symbol set or can begin
* a nonterminal in the given symbol set.
*/
bool intersect_symbol (symbol_number sym, bitset syms);
/**
* Determine if any symbol in ts is in syms
* or can begin with a nonterminal in syms.
*/
bool intersect (bitset ts, bitset syms);
/**
* Compute a set of sequences of state-items that can make production steps
* to this state-item such that the resulting possible lookahead symbols are
* as given.
*/
state_item_list lssi_reverse_production (const state_item *si, bitset lookahead);
#endif /* LSSI_H */
+283
View File
@@ -0,0 +1,283 @@
/* Top level entry point of Bison.
Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2002, 2004-2015,
2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include <bitset/stats.h>
#include <closeout.h>
#include <configmake.h>
#include <progname.h>
#include <quote.h>
#include <quotearg.h>
#include <relocatable.h> /* relocate2 */
#include <timevar.h>
#include "complain.h"
#include "conflicts.h"
#include "counterexample.h"
#include "derives.h"
#include "files.h"
#include "fixits.h"
#include "getargs.h"
#include "glyphs.h"
#include "gram.h"
#include "ielr.h"
#include "lalr.h"
#include "lr0.h"
#include "muscle-tab.h"
#include "nullable.h"
#include "output.h"
#include "parse-gram.h"
#include "print-graph.h"
#include "print-xml.h"
#include "print.h"
#include "reader.h"
#include "reduce.h"
#include "scan-code.h"
#include "scan-gram.h"
#include "scan-skel.h"
#include "symtab.h"
#include "tables.h"
#include "uniqstr.h"
int
main (int argc, char *argv[])
{
{
char *cp = getenv ("BISON_PROGRAM_NAME");
if (cp)
argv[0] = cp;
}
#define DEPENDS_ON_LIBINTL 1
set_program_name (argv[0]);
setlocale (LC_ALL, "");
{
char *cp = NULL;
char const *localedir = relocate2 (LOCALEDIR, &cp);
bindtextdomain ("bison", localedir);
bindtextdomain ("bison-gnulib", localedir);
bindtextdomain ("bison-runtime", localedir);
free (cp);
}
textdomain ("bison");
{
char const *cp = getenv ("LC_CTYPE");
if (cp && STREQ (cp, "C"))
set_custom_quoting (&quote_quoting_options, "'", "'");
else
set_quoting_style (&quote_quoting_options, locale_quoting_style);
}
atexit (close_stdout);
glyphs_init ();
uniqstrs_new ();
muscle_init ();
complain_init ();
code_scanner_init ();
getargs (argc, argv);
if (trace_flag)
fprintf (stderr, "bison (GNU Bison) %s\n", VERSION);
timevar_enabled = trace_flag & trace_time;
timevar_init ();
timevar_start (tv_total);
if (trace_flag & trace_bitsets)
bitset_stats_enable ();
/* Read the input. Copy some parts of it to FGUARD, FACTION, FTABLE
and FATTRS. In file reader.c. The other parts are recorded in
the grammar; see gram.h. */
timevar_push (tv_reader);
reader (grammar_file);
timevar_pop (tv_reader);
if (complaint_status == status_complaint)
goto finish;
/* Find useless nonterminals and productions and reduce the grammar. */
timevar_push (tv_reduce);
reduce_grammar ();
timevar_pop (tv_reduce);
/* Record other info about the grammar. In files derives and
nullable. */
timevar_push (tv_sets);
derives_compute ();
nullable_compute ();
timevar_pop (tv_sets);
/* Compute LR(0) parser states. See state.h for more info. */
timevar_push (tv_lr0);
generate_states ();
timevar_pop (tv_lr0);
/* Add lookahead sets to parser states. Except when LALR(1) is
requested, split states to eliminate LR(1)-relative
inadequacies. */
ielr ();
/* Find and record any conflicts: places where one token of
lookahead is not enough to disambiguate the parsing. In file
conflicts. Also resolve s/r conflicts based on precedence
declarations. */
timevar_push (tv_conflicts);
conflicts_solve ();
if (!muscle_percent_define_flag_if ("lr.keep-unreachable-state"))
{
state_number *old_to_new = xnmalloc (nstates, sizeof *old_to_new);
state_number nstates_old = nstates;
state_remove_unreachable_states (old_to_new);
lalr_update_state_numbers (old_to_new, nstates_old);
conflicts_update_state_numbers (old_to_new, nstates_old);
free (old_to_new);
}
if (report_flag & report_cex
|| warning_is_enabled (Wcounterexamples))
counterexample_init ();
conflicts_print ();
timevar_pop (tv_conflicts);
/* Compute the parser tables. */
timevar_push (tv_actions);
tables_generate ();
timevar_pop (tv_actions);
grammar_rules_useless_report (_("rule useless in parser due to conflicts"));
print_precedence_warnings ();
/* Whether to generate output files. */
bool generate = !(feature_flag & feature_syntax_only);
if (generate)
{
/* Output file names. */
compute_output_file_names ();
/* Output the detailed report on the grammar. */
if (report_flag)
{
timevar_push (tv_report);
print_results ();
timevar_pop (tv_report);
}
/* Output the graph. */
if (graph_flag)
{
timevar_push (tv_graph);
print_graph ();
timevar_pop (tv_graph);
}
/* Output xml. */
if (html_flag || xml_flag)
{
timevar_push (tv_xml);
print_xml ();
timevar_pop (tv_xml);
}
/* Output html. */
if (html_flag)
{
timevar_push (tv_html);
print_html ();
timevar_pop (tv_html);
}
}
/* Stop if there were errors, to avoid trashing previous output
files. */
if (complaint_status == status_complaint)
goto finish;
/* Lookahead tokens are no longer needed. */
timevar_push (tv_free);
lalr_free ();
timevar_pop (tv_free);
/* Output the tables and the parser to ftable. In file output. */
if (generate)
{
timevar_push (tv_parser);
output ();
timevar_pop (tv_parser);
}
finish:
timevar_push (tv_free);
nullable_free ();
derives_free ();
tables_free ();
states_free ();
reduce_free ();
conflicts_free ();
grammar_free ();
counterexample_free ();
output_file_names_free ();
/* The scanner and parser memory cannot be released right after
parsing, as it contains things such as user actions, prologue,
epilogue etc. */
gram_scanner_free ();
parser_free ();
muscle_free ();
code_scanner_free ();
skel_scanner_free ();
timevar_pop (tv_free);
if (trace_flag & trace_bitsets)
bitset_stats_dump (stderr);
/* Stop timing and print the times. */
timevar_stop (tv_total);
timevar_print (stderr);
/* Fix input file now, even if there are errors: that's less
warnings in the following runs. */
if (!fixits_empty ())
{
if (update_flag)
fixits_run ();
else
complain (NULL, Wother,
_("fix-its can be applied. Rerun with option '--update'."));
fixits_free ();
}
uniqstrs_free ();
complain_free ();
quotearg_free ();
return complaint_status ? EXIT_FAILURE : EXIT_SUCCESS;
}
@@ -0,0 +1,790 @@
/* Muscle table manager for Bison.
Copyright (C) 2001-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <hash.h>
#include <quote.h>
#include "complain.h"
#include "files.h"
#include "fixits.h"
#include "getargs.h"
#include "muscle-tab.h"
muscle_kind
muscle_kind_new (char const *k)
{
if (STREQ (k, "code"))
return muscle_code;
else if (STREQ (k, "keyword"))
return muscle_keyword;
else if (STREQ (k, "string"))
return muscle_string;
abort ();
}
char const *
muscle_kind_string (muscle_kind k)
{
switch (k)
{
case muscle_code: return "code";
case muscle_keyword: return "keyword";
case muscle_string: return "string";
}
abort ();
}
/* A key-value pair, along with storage that can be reclaimed when
this pair is no longer needed. */
typedef struct
{
char const *key;
char const *value;
char *storage;
muscle_kind kind;
} muscle_entry;
/* The name of muscle for the %define variable VAR (corresponding to
FIELD, if defined). */
static uniqstr
muscle_name (char const *var, char const *field)
{
if (field)
return UNIQSTR_CONCAT ("percent_define_", field, "(", var, ")");
else
return UNIQSTR_CONCAT ("percent_define(", var, ")");
}
/* An obstack used to create some entries. */
struct obstack muscle_obstack;
/* Initial capacity of muscles hash table. */
#define HT_INITIAL_CAPACITY 257
static struct hash_table *muscle_table = NULL;
static bool
hash_compare_muscles (void const *x, void const *y)
{
muscle_entry const *m1 = x;
muscle_entry const *m2 = y;
return STREQ (m1->key, m2->key);
}
static size_t
hash_muscle (const void *x, size_t tablesize)
{
muscle_entry const *m = x;
return hash_string (m->key, tablesize);
}
/* Create a fresh muscle name KEY, and insert in the hash table. */
static void *
muscle_entry_new (char const *key)
{
muscle_entry *res = xmalloc (sizeof *res);
res->key = key;
res->value = NULL;
res->storage = NULL;
hash_xinsert (muscle_table, res);
return res;
}
static void
muscle_entry_free (void *entry)
{
muscle_entry *mentry = entry;
free (mentry->storage);
free (mentry);
}
void
muscle_init (void)
{
/* Initialize the muscle obstack. */
obstack_init (&muscle_obstack);
muscle_table = hash_xinitialize (HT_INITIAL_CAPACITY, NULL, hash_muscle,
hash_compare_muscles, muscle_entry_free);
}
void
muscle_free (void)
{
hash_free (muscle_table);
obstack_free (&muscle_obstack, NULL);
}
/* Look for the muscle named KEY. Return NULL if does not exist. */
static muscle_entry *
muscle_lookup (char const *key)
{
muscle_entry probe;
probe.key = key;
return hash_lookup (muscle_table, &probe);
}
void
muscle_insert (char const *key, char const *value)
{
muscle_entry *entry = muscle_lookup (key);
if (entry)
free (entry->storage);
else
/* First insertion in the hash. */
entry = muscle_entry_new (key);
entry->value = value;
entry->storage = NULL;
}
/* Append VALUE to the current value of KEY. If KEY did not already
exist, create it. Use MUSCLE_OBSTACK. De-allocate the previously
associated value. Copy VALUE and SEPARATOR. If VALUE does not end
with TERMINATOR, append one. */
static void
muscle_grow (const char *key, const char *val,
const char *separator, const char *terminator)
{
muscle_entry *entry = muscle_lookup (key);
if (entry)
{
obstack_sgrow (&muscle_obstack, entry->value);
obstack_sgrow (&muscle_obstack, separator);
free (entry->storage);
}
else
entry = muscle_entry_new (key);
obstack_sgrow (&muscle_obstack, val);
size_t vals = strlen (val);
size_t terms = strlen (terminator);
if (terms <= vals
&& STRNEQ (val + vals - terms, terminator))
obstack_sgrow (&muscle_obstack, terminator);
{
char const *new_val = obstack_finish0 (&muscle_obstack);
entry->value = entry->storage = xstrdup (new_val);
obstack_free (&muscle_obstack, new_val);
}
}
/*------------------------------------------------------------------.
| Using muscle_grow, append a synchronization line for the location |
| LOC to the current value of KEY. |
`------------------------------------------------------------------*/
static void
muscle_syncline_grow (char const *key, location loc)
{
obstack_printf (&muscle_obstack, "]b4_syncline(%d, ", loc.start.line);
obstack_quote (&muscle_obstack,
quotearg_style (c_quoting_style, map_file_name (loc.start.file)));
obstack_sgrow (&muscle_obstack, ")dnl\n[");
char const *extension = obstack_finish0 (&muscle_obstack);
muscle_grow (key, extension, "", "");
obstack_free (&muscle_obstack, extension);
}
/*------------------------------------------------------------------.
| Append VALUE to the current value of KEY, using muscle_grow. But |
| in addition, issue a synchronization line for the location LOC |
| using muscle_syncline_grow. |
`------------------------------------------------------------------*/
void
muscle_code_grow (const char *key, const char *val, location loc)
{
muscle_syncline_grow (key, loc);
muscle_grow (key, val, "", "\n");
}
void
muscle_pair_list_grow (const char *muscle,
const char *a1, const char *a2)
{
obstack_sgrow (&muscle_obstack, "[");
obstack_quote (&muscle_obstack, a1);
obstack_sgrow (&muscle_obstack, ", ");
obstack_quote (&muscle_obstack, a2);
obstack_sgrow (&muscle_obstack, "]");
char const *pair = obstack_finish0 (&muscle_obstack);
muscle_grow (muscle, pair, ",\n", "");
obstack_free (&muscle_obstack, pair);
}
char const *
muscle_find_const (char const *key)
{
muscle_entry *entry = muscle_lookup (key);
return entry ? entry->value : NULL;
}
char *
muscle_find (char const *key)
{
muscle_entry *entry = muscle_lookup (key);
if (entry)
{
aver (entry->value == entry->storage);
return entry->storage;
}
return NULL;
}
/* In the format 'file_name:line.column', append BOUND to MUSCLE. Use
digraphs for special characters in the file name. */
static void
muscle_boundary_grow (char const *key, boundary bound)
{
obstack_sgrow (&muscle_obstack, "[[");
obstack_escape (&muscle_obstack, bound.file);
obstack_printf (&muscle_obstack, ":%d.%d@@%d]]", bound.line, bound.column, bound.byte);
char const *extension = obstack_finish0 (&muscle_obstack);
muscle_grow (key, extension, "", "");
obstack_free (&muscle_obstack, extension);
}
void
muscle_location_grow (char const *key, location loc)
{
muscle_boundary_grow (key, loc.start);
muscle_grow (key, "", ", ", "");
muscle_boundary_grow (key, loc.end);
}
#define COMMON_DECODE(Value) \
case '$': \
++(Value); aver (*(Value) == ']'); \
++(Value); aver (*(Value) == '['); \
obstack_sgrow (&muscle_obstack, "$"); \
break; \
case '@': \
switch (*++(Value)) \
{ \
case '@': obstack_sgrow (&muscle_obstack, "@" ); break; \
case '{': obstack_sgrow (&muscle_obstack, "[" ); break; \
case '}': obstack_sgrow (&muscle_obstack, "]" ); break; \
default: aver (false); break; \
} \
break; \
default: \
obstack_1grow (&muscle_obstack, *(Value)); \
break;
/* Reverse of obstack_escape. */
static char *
string_decode (char const *key)
{
char const *value = muscle_find_const (key);
if (!value)
return NULL;
do {
switch (*value)
{
COMMON_DECODE (value)
case '[':
case ']':
aver (false);
break;
}
} while (*value++);
char const *value_decoded = obstack_finish (&muscle_obstack);
char *res = xstrdup (value_decoded);
obstack_free (&muscle_obstack, value_decoded);
return res;
}
/* Reverse of muscle_location_grow. */
static location
location_decode (char const *value)
{
aver (value);
aver (*value == '[');
++value; aver (*value == '[');
location loc;
while (*++value)
switch (*value)
{
COMMON_DECODE (value)
case '[':
aver (false);
break;
case ']':
++value; aver (*value == ']');
char *boundary_str = obstack_finish0 (&muscle_obstack);
switch (*++value)
{
case ',':
boundary_set_from_string (&loc.start, boundary_str);
obstack_free (&muscle_obstack, boundary_str);
++value; aver (*value == ' ');
++value; aver (*value == '[');
++value; aver (*value == '[');
break;
case '\0':
boundary_set_from_string (&loc.end, boundary_str);
obstack_free (&muscle_obstack, boundary_str);
return loc;
break;
default:
aver (false);
break;
}
break;
}
aver (false);
return loc;
}
void
muscle_user_name_list_grow (char const *key, char const *user_name,
location loc)
{
muscle_grow (key, "[[[[", ",", "");
muscle_grow (key, user_name, "", "");
muscle_grow (key, "]], ", "", "");
muscle_location_grow (key, loc);
muscle_grow (key, "]]", "", "");
}
/** Return an allocated string that represents the %define directive
that performs the assignment.
@param assignment "VAR", or "VAR=VAL".
@param value default value if VAL \a assignment has no '='.
For instance:
"foo", NULL => "%define foo"
"foo", "baz" => "%define foo baz"
"foo=bar", NULL => "%define foo bar"
"foo=bar", "baz" => "%define foo bar"
"foo=", NULL => "%define foo"
"foo=", "baz" => "%define foo"
*/
static
char *
define_directive (char const *assignment,
muscle_kind kind,
char const *value)
{
char *eq = strchr (assignment, '=');
char const *fmt
= eq || !value || !*value ? "%%define %s"
: kind == muscle_code ? "%%define %s {%s}"
: kind == muscle_string ? "%%define %s \"%s\""
: "%%define %s %s";
char *res = xmalloc (strlen (fmt) + strlen (assignment)
+ (value ? strlen (value) : 0));
sprintf (res, fmt, assignment, value);
eq = strchr (res, '=');
if (eq)
*eq = eq[1] ? ' ' : '\0';
return res;
}
/** If the \a variable name is obsolete, return the name to use,
* otherwise \a variable. If the \a value is obsolete, update it too.
*
* Allocates the returned value if needed, otherwise the returned
* value is exactly \a variable. */
static
char const *
muscle_percent_variable_update (char const *variable,
muscle_kind kind,
char const **value,
char **old, char **upd)
{
typedef struct
{
const char *obsolete;
const char *updated;
muscle_kind kind;
} conversion_type;
const conversion_type conversion[] =
{
{ "%error-verbose", "parse.error=verbose", muscle_keyword },
{ "%error_verbose", "parse.error=verbose", muscle_keyword },
{ "abstract", "api.parser.abstract", muscle_keyword },
{ "annotations", "api.parser.annotations", muscle_code },
{ "api.push_pull", "api.push-pull", muscle_keyword },
{ "api.tokens.prefix", "api.token.prefix", muscle_code },
{ "extends", "api.parser.extends", muscle_keyword },
{ "filename_type", "api.filename.type", muscle_code },
{ "final", "api.parser.final", muscle_keyword },
{ "implements", "api.parser.implements", muscle_keyword },
{ "lex_symbol", "api.token.constructor", -1 },
{ "location_type", "api.location.type", muscle_code },
{ "lr.default-reductions", "lr.default-reduction", muscle_keyword },
{ "lr.keep-unreachable-states", "lr.keep-unreachable-state", muscle_keyword },
{ "lr.keep_unreachable_states", "lr.keep-unreachable-state", muscle_keyword },
{ "namespace", "api.namespace", muscle_code },
{ "package", "api.package", muscle_code },
{ "parser_class_name", "api.parser.class", muscle_code },
{ "public", "api.parser.public", muscle_keyword },
{ "strictfp", "api.parser.strictfp", muscle_keyword },
{ "stype", "api.value.type", -1 },
{ "variant=", "api.value.type=variant", -1 },
{ "variant=true", "api.value.type=variant", -1 },
{ NULL, NULL, -1, }
};
for (conversion_type const *c = conversion; c->obsolete; ++c)
{
char const *eq = strchr (c->obsolete, '=');
if (eq
? (!strncmp (c->obsolete, variable, eq - c->obsolete)
&& STREQ (eq + 1, *value))
: STREQ (c->obsolete, variable))
{
/* Generate the deprecation warning. */
*old = c->obsolete[0] == '%'
? xstrdup (c->obsolete)
: define_directive (c->obsolete, kind, *value);
*upd = define_directive (c->updated, c->kind, *value);
/* Update the variable and its value. */
{
char *res = xstrdup (c->updated);
char *eq2 = strchr (res, '=');
if (eq2)
{
*eq2 = '\0';
*value = eq2 + 1;
}
return res;
}
}
}
return variable;
}
void
muscle_percent_define_insert (char const *var, location variable_loc,
muscle_kind kind,
char const *value,
muscle_percent_define_how how)
{
/* Backward compatibility. */
char *old = NULL;
char *upd = NULL;
char const *variable
= muscle_percent_variable_update (var, kind,
&value, &old, &upd);
uniqstr name = muscle_name (variable, NULL);
uniqstr loc_name = muscle_name (variable, "loc");
uniqstr syncline_name = muscle_name (variable, "syncline");
uniqstr how_name = muscle_name (variable, "how");
uniqstr kind_name = muscle_name (variable, "kind");
/* Command-line options are processed before the grammar file. */
bool warned = false;
if (how == MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
{
char const *current_value = muscle_find_const (name);
if (current_value)
{
long l = strtol (muscle_find_const (how_name), NULL, 10);
muscle_percent_define_how how_old
= 0 <= l && l <= INT_MAX ? l : INT_MAX;
if (how_old == MUSCLE_PERCENT_DEFINE_F)
goto end;
/* If assigning the same value, make it a warning. */
warnings warn = STREQ (value, current_value) ? Wother : complaint;
complain (&variable_loc, warn,
_("%%define variable %s redefined"),
quote (variable));
location loc = muscle_percent_define_get_loc (variable);
subcomplain (&loc, warn, _("previous definition"));
fixits_register (&variable_loc, "");
warned = true;
}
}
if (!warned && old && upd)
deprecated_directive (&variable_loc, old, upd);
MUSCLE_INSERT_STRING (name, value);
muscle_insert (loc_name, "");
muscle_location_grow (loc_name, variable_loc);
muscle_insert (syncline_name, "");
muscle_syncline_grow (syncline_name, variable_loc);
muscle_user_name_list_grow ("percent_define_user_variables", variable,
variable_loc);
MUSCLE_INSERT_INT (how_name, how);
MUSCLE_INSERT_STRING (kind_name, muscle_kind_string (kind));
end:
free (old);
free (upd);
if (variable != var)
free ((char *) variable);
}
/* This is used for backward compatibility, e.g., "%define api.pure"
supersedes "%pure-parser". */
void
muscle_percent_define_ensure (char const *variable, location loc,
bool value)
{
uniqstr name = muscle_name (variable, NULL);
char const *val = value ? "" : "false";
/* Don't complain is VARIABLE is already defined, but be sure to set
its value to VAL. */
if (!muscle_find_const (name)
|| muscle_percent_define_flag_if (variable) != value)
muscle_percent_define_insert (variable, loc, muscle_keyword, val,
MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
}
/* Mark %define VARIABLE as used. */
static void
muscle_percent_define_use (char const *variable)
{
muscle_insert (muscle_name (variable, "bison_variables"), "");
}
/* The value of %define variable VARIABLE (corresponding to FIELD, if
defined). Do not register as used, but diagnose unset variables. */
static
char const *
muscle_percent_define_get_raw (char const *variable, char const *field)
{
uniqstr name = muscle_name (variable, field);
char const *res = muscle_find_const (name);
if (!res)
complain (NULL, fatal, _("%s: undefined %%define variable %s"),
"muscle_percent_define_get_raw", quote (variable));
return res;
}
char *
muscle_percent_define_get (char const *variable)
{
uniqstr name = muscle_name (variable, NULL);
char *value = string_decode (name);
if (!value)
value = xstrdup ("");
muscle_percent_define_use (variable);
return value;
}
/* The kind of VARIABLE. An error if undefined. */
static muscle_kind
muscle_percent_define_get_kind (char const *variable)
{
return muscle_kind_new (muscle_percent_define_get_raw (variable, "kind"));
}
/* Check the kind of VARIABLE. An error if undefined. */
static void
muscle_percent_define_check_kind (char const *variable, muscle_kind kind)
{
if (muscle_percent_define_get_kind (variable) != kind)
{
location loc = muscle_percent_define_get_loc (variable);
switch (kind)
{
case muscle_code:
complain (&loc, Wdeprecated,
_("%%define variable '%s' requires '{...}' values"),
variable);
break;
case muscle_keyword:
complain (&loc, Wdeprecated,
_("%%define variable '%s' requires keyword values"),
variable);
break;
case muscle_string:
complain (&loc, Wdeprecated,
_("%%define variable '%s' requires '\"...\"' values"),
variable);
break;
}
}
}
location
muscle_percent_define_get_loc (char const *variable)
{
return location_decode (muscle_percent_define_get_raw (variable, "loc"));
}
char const *
muscle_percent_define_get_syncline (char const *variable)
{
return muscle_percent_define_get_raw (variable, "syncline");
}
bool
muscle_percent_define_ifdef (char const *variable)
{
if (muscle_find_const (muscle_name (variable, NULL)))
{
muscle_percent_define_use (variable);
return true;
}
else
return false;
}
bool
muscle_percent_define_flag_if (char const *variable)
{
uniqstr invalid_boolean_name = muscle_name (variable, "invalid_boolean");
bool res = false;
if (muscle_percent_define_ifdef (variable))
{
char *value = muscle_percent_define_get (variable);
muscle_percent_define_check_kind (variable, muscle_keyword);
if (value[0] == '\0' || STREQ (value, "true"))
res = true;
else if (STREQ (value, "false"))
res = false;
else if (!muscle_find_const (invalid_boolean_name))
{
muscle_insert (invalid_boolean_name, "");
location loc = muscle_percent_define_get_loc (variable);
complain (&loc, complaint,
_("invalid value for %%define Boolean variable %s"),
quote (variable));
}
free (value);
}
else
complain (NULL, fatal, _("%s: undefined %%define variable %s"),
"muscle_percent_define_flag", quote (variable));
return res;
}
void
muscle_percent_define_default (char const *variable, char const *value)
{
uniqstr name = muscle_name (variable, NULL);
if (!muscle_find_const (name))
{
MUSCLE_INSERT_STRING (name, value);
MUSCLE_INSERT_STRING (muscle_name (variable, "kind"), "keyword");
{
uniqstr loc_name = muscle_name (variable, "loc");
location loc;
loc.start.file = "<default value>";
loc.start.line = -1;
loc.start.column = -1;
loc.start.byte = -1;
loc.end = loc.start;
muscle_insert (loc_name, "");
muscle_location_grow (loc_name, loc);
}
muscle_insert (muscle_name (variable, "syncline"), "");
}
}
void
muscle_percent_define_check_values (char const * const *values)
{
for (; *values; ++values)
{
char const * const *variablep = values;
uniqstr name = muscle_name (*variablep, NULL);
char *value = string_decode (name);
muscle_percent_define_check_kind (*variablep, muscle_keyword);
if (value)
{
for (++values; *values; ++values)
if (STREQ (value, *values))
break;
if (!*values)
{
location loc = muscle_percent_define_get_loc (*variablep);
complain (&loc, complaint,
_("invalid value for %%define variable %s: %s"),
quote (*variablep), quote_n (1, value));
for (values = variablep + 1; *values; ++values)
subcomplain (&loc, complaint | no_caret | silent,
_("accepted value: %s"), quote (*values));
}
else
while (*values)
++values;
free (value);
}
else
complain (NULL, fatal, _("%s: undefined %%define variable %s"),
"muscle_percent_define_check_values", quote (*variablep));
}
}
void
muscle_percent_code_grow (char const *qualifier, location qualifier_loc,
char const *code, location code_loc)
{
char const *name = UNIQSTR_CONCAT ("percent_code(", qualifier, ")");
muscle_code_grow (name, code, code_loc);
muscle_user_name_list_grow ("percent_code_user_qualifiers", qualifier,
qualifier_loc);
}
/*------------------------------------------------.
| Output the definition of ENTRY as a m4_define. |
`------------------------------------------------*/
static inline bool
muscle_m4_output (muscle_entry *entry, FILE *out)
{
fprintf (out,
"m4_define([b4_%s],\n"
"[[%s]])\n\n\n", entry->key, entry->value);
return true;
}
static bool
muscle_m4_output_processor (void *entry, void *out)
{
return muscle_m4_output (entry, out);
}
void
muscles_m4_output (FILE *out)
{
hash_do_for_each (muscle_table, muscle_m4_output_processor, out);
}
@@ -0,0 +1,223 @@
/* Muscle table manager for Bison,
Copyright (C) 2001-2003, 2006-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef MUSCLE_TAB_H_
# define MUSCLE_TAB_H_
# include <quotearg.h>
# include "location.h"
/* The kind of value associated to this muscle, depending on the
syntax of the value: keyword (no delimiter, e.g., true), string
(double quotes, e.g., "foo.h"), or code (braces, e.g., {int}). */
typedef enum
{
muscle_code,
muscle_keyword,
muscle_string
} muscle_kind;
/* Conversion from string. */
muscle_kind muscle_kind_new (char const *k);
/* Conversion to string. */
char const *muscle_kind_string (muscle_kind k);
/* Create the MUSCLE_TABLE, and initialize it with default values.
Also set up the MUSCLE_OBSTACK. */
void muscle_init (void);
/* Insert (KEY, VALUE). If KEY already existed, overwrite the
previous value. Otherwise create as a muscle_string type. */
void muscle_insert (char const *key, char const *value);
/* Find the value of muscle KEY. Unlike MUSCLE_FIND, this is always
reliable to determine whether KEY has a value. */
char const *muscle_find_const (char const *key);
/* Find the value of muscle KEY. Abort if muscle_insert was invoked
more recently than muscle_grow for KEY since muscle_find can't
return a char const *. */
char *muscle_find (char const *key);
/* Free all the memory consumed by the muscle machinery only. */
void muscle_free (void);
/* An obstack dedicated to receive muscle keys and values. */
extern struct obstack muscle_obstack;
# define MUSCLE_INSERT_BOOL(Key, Value) \
do { \
int v__ = Value; \
MUSCLE_INSERT_INT (Key, v__); \
} while (0)
# define MUSCLE_INSERTF(Key, Format, Value) \
do { \
obstack_printf (&muscle_obstack, Format, Value); \
muscle_insert (Key, obstack_finish0 (&muscle_obstack)); \
} while (0)
# define MUSCLE_INSERT_INT(Key, Value) \
MUSCLE_INSERTF (Key, "%d", Value)
# define MUSCLE_INSERT_LONG_INT(Key, Value) \
MUSCLE_INSERTF (Key, "%ld", Value)
/* Key -> Value, but don't apply escaping to Value. */
# define MUSCLE_INSERT_STRING_RAW(Key, Value) \
MUSCLE_INSERTF (Key, "%s", Value)
/* Key -> Value, applying M4 escaping to Value. */
# define MUSCLE_INSERT_STRING(Key, Value) \
do { \
obstack_escape (&muscle_obstack, Value); \
muscle_insert (Key, obstack_finish0 (&muscle_obstack)); \
} while (0)
/* Key -> Value, applying C escaping to Value (and then M4). */
# define MUSCLE_INSERT_C_STRING(Key, Value) \
MUSCLE_INSERT_STRING (Key, quotearg_style (c_quoting_style, Value))
/* Append VALUE to the current value of KEY, using muscle_grow. But
in addition, issue a synchronization line for the location LOC.
Be sure to append on a new line. */
void muscle_code_grow (const char *key, const char *value, location loc);
/* MUSCLE is an M4 list of pairs. Create or extend it with the pair
(A1, A2) after escaping both values with digraphs. Note that because the
muscle values are output *double* quoted, one needs to strip the first level
of quotes to reach the list itself. */
void muscle_pair_list_grow (const char *muscle,
const char *a1, const char *a2);
/* Grow KEY for the occurrence of the name USER_NAME at LOC appropriately for
use with b4_check_user_names in ../data/bison.m4. USER_NAME is not escaped
with digraphs, so it must not contain '[' or ']'. */
void muscle_user_name_list_grow (char const *key, char const *user_name,
location loc);
/* In the format '[[file_name:line.column]], [[file_name:line.column]]',
append LOC to MUSCLE. Use digraphs for special characters in each
file name. */
void muscle_location_grow (char const *key, location loc);
/* Indicates whether a variable's value was specified with -D/--define, with
-F/--force-define, or in the grammar file. */
typedef enum {
MUSCLE_PERCENT_DEFINE_D = 0,
MUSCLE_PERCENT_DEFINE_F,
MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE
} muscle_percent_define_how;
/* Define the muscles for %define variable VARIABLE with VALUE specified
at VARIABLE_LOC in the manner HOW unless it was specified in the
grammar file while the previous definition for VARIABLE was specified
with -F/--force-define. Complain if a previous definition is being
overridden and the new definition is specified in the grammar file.
(These rules support the documented behavior as long as command-line
definitions are processed before grammar file definitions.) Record
this as a user occurrence of VARIABLE by invoking
muscle_user_name_list_grow. */
void muscle_percent_define_insert (char const *variable, location variable_loc,
muscle_kind kind,
char const *value,
muscle_percent_define_how how);
/* Make sure that VARIABLE is set to the boolean VALUE. Warn on mismatches
only, but accept repeated declaration. Used for backward compatibility
between old directives such as %pure-parser, and the recommended use of
variables (%define api.pure). */
void muscle_percent_define_ensure (char const *variable, location variable_loc,
bool value);
/* Mimic b4_percent_define_get in ../data/bison.m4 exactly. That is, if the
%define variable VARIABLE is defined, return its value. Otherwise, return
the empty string. Also, record Bison's usage of VARIABLE by defining
b4_percent_define_bison_variables(VARIABLE). The caller is responsible for
freeing the memory of the returned string. */
char *muscle_percent_define_get (char const *variable);
/* Mimic muscle_percent_define_get_loc in ../data/bison.m4 exactly. That is,
if the %define variable VARIABLE is undefined, complain fatally since that's
a Bison error. Otherwise, return its definition location in a form
appropriate for the first argument of warn_at, complain_at, or fatal_at.
Don't record this as a Bison usage of VARIABLE as there's no reason to
suspect that the user-supplied value has yet influenced the output. */
location muscle_percent_define_get_loc (char const *variable);
/* Mimic muscle_percent_define_get_syncline in ../data/bison.m4 exactly. That
is, if the %define variable VARIABLE is undefined, complain fatally since
that's a Bison error. Otherwise, return its definition location as a
b4_syncline invocation. Don't record this as a Bison usage of VARIABLE as
there's no reason to suspect that the user-supplied value has yet influenced
the output. */
char const *muscle_percent_define_get_syncline (char const *variable);
/* Mimic b4_percent_define_ifdef in ../data/bison.m4 exactly. That is, if the
%define variable VARIABLE is defined, return true. Otherwise, return false.
Also, record Bison's usage of VARIABLE by defining
b4_percent_define_bison_variables(VARIABLE). */
bool muscle_percent_define_ifdef (char const *variable);
/* Mimic b4_percent_define_flag_if in ../data/bison.m4 exactly. That is, if
the %define variable VARIABLE is defined to "" or "true", return true. If
it is defined to "false", return false. Complain if it is undefined (a
Bison error since the default value should have been set already) or defined
to any other value (possibly a user error). Also, record Bison's usage of
VARIABLE by defining b4_percent_define_bison_variables(VARIABLE). */
bool muscle_percent_define_flag_if (char const *variable);
/* Mimic b4_percent_define_default in ../data/bison.m4 exactly. That is, if
the %define variable VARIABLE is undefined, set its value to VALUE.
Don't record this as a Bison usage of VARIABLE as there's no reason to
suspect that the value has yet influenced the output. */
void muscle_percent_define_default (char const *variable, char const *value);
/* Mimic b4_percent_define_check_values in ../data/bison.m4 exactly except that
the VALUES structure is more appropriate for C. That is, VALUES points to a
list of strings that is partitioned into sublists by NULL's, one terminating
each sublist. The last sublist is followed by a second NULL. For each
sublist, the first string is the name of a %define variable, and all
remaining strings in that sublist are the valid values for that variable.
Complain if such a variable is undefined (a Bison error since the default
value should have been set already) or defined to any other value (possibly
a user error). Don't record this as a Bison usage of the variable as
there's no reason to suspect that the value has yet influenced the
output. */
void muscle_percent_define_check_values (char const * const *values);
/* Grow the muscle for the %code qualifier QUALIFIER appearing at
QUALIFIER_LOC with code CODE appearing at CODE_LOC. Record this as a
user occurrence of QUALIFIER by invoking
muscle_user_name_list_grow. */
void muscle_percent_code_grow (char const *qualifier, location qualifier_loc,
char const *code, location code_loc);
/* Output the definition of all the current muscles into a list of
m4_defines. */
void muscles_m4_output (FILE *out);
#endif /* not MUSCLE_TAB_H_ */
@@ -0,0 +1,46 @@
/* Named symbol references for Bison
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "named-ref.h"
named_ref *
named_ref_new (uniqstr id, location loc)
{
named_ref *res = xmalloc (sizeof *res);
res->id = id;
res->loc = loc;
return res;
}
named_ref *
named_ref_copy (const named_ref *r)
{
return named_ref_new (r->id, r->loc);
}
void
named_ref_free (named_ref *r)
{
free (r);
}
@@ -0,0 +1,46 @@
/* Named symbol references for Bison
Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef NAMED_REF_H_
# define NAMED_REF_H_
# include "location.h"
# include "uniqstr.h"
/* Named reference object. Keeps information about
a symbolic name of a symbol in a rule. */
typedef struct named_ref
{
/* Symbolic named given by user. */
uniqstr id;
/* Location of the symbolic name. Not including brackets. */
location loc;
} named_ref;
/* Allocate a named reference object. */
named_ref *named_ref_new (uniqstr id, location loc);
/* Allocate and return a copy. */
named_ref *named_ref_copy (const named_ref *r);
/* Free a named reference object. */
void named_ref_free (named_ref *r);
#endif /* !NAMED_REF_H_ */
@@ -0,0 +1,135 @@
/* Calculate which nonterminals can expand into the null string for Bison.
Copyright (C) 1984, 1989, 2000-2006, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Set up NULLABLE, a vector saying which nonterminals can expand into
the null string. NULLABLE[I - NTOKENS] is nonzero if symbol I can
do so. */
#include <config.h>
#include "system.h"
#include "getargs.h"
#include "gram.h"
#include "nullable.h"
#include "reduce.h"
#include "symtab.h"
/* Linked list of rules. */
typedef struct rule_list
{
struct rule_list *next;
const rule *value;
} rule_list;
bool *nullable = NULL;
static void
nullable_print (FILE *out)
{
fputs ("NULLABLE\n", out);
for (int i = ntokens; i < nsyms; i++)
fprintf (out, " %s: %s\n", symbols[i]->tag,
nullable[i - ntokens] ? "yes" : "no");
fputs ("\n\n", out);
}
void
nullable_compute (void)
{
nullable = xcalloc (nnterms, sizeof *nullable);
size_t *rcount = xcalloc (nrules, sizeof *rcount);
/* RITEM contains all the rules, including useless productions.
Hence we must allocate room for useless nonterminals too. */
rule_list **rsets = xcalloc (nnterms, sizeof *rsets);
/* This is said to be more elements than we actually use.
Supposedly NRITEMS - NRULES is enough. But why take the risk? */
rule_list *relts = xnmalloc (nritems + nnterms + 1, sizeof *relts);
symbol_number *squeue = xnmalloc (nnterms, sizeof *squeue);
symbol_number *s2 = squeue;
{
rule_list *p = relts;
for (rule_number ruleno = 0; ruleno < nrules; ++ruleno)
if (rules[ruleno].useful)
{
const rule *r = &rules[ruleno];
if (r->rhs[0] >= 0)
{
/* This rule has a non empty RHS. */
bool any_tokens = false;
for (item_number *rp = r->rhs; *rp >= 0; ++rp)
if (ISTOKEN (*rp))
any_tokens = true;
/* This rule has only nonterminals: schedule it for the second
pass. */
if (!any_tokens)
for (item_number *rp = r->rhs; *rp >= 0; ++rp)
{
rcount[ruleno]++;
p->next = rsets[*rp - ntokens];
p->value = r;
rsets[*rp - ntokens] = p;
p++;
}
}
else
{
/* This rule has an empty RHS. */
if (r->useful
&& ! nullable[r->lhs->number - ntokens])
{
nullable[r->lhs->number - ntokens] = true;
*s2++ = r->lhs->number;
}
}
}
}
symbol_number *s1 = squeue;
while (s1 < s2)
for (rule_list *p = rsets[*s1++ - ntokens]; p; p = p->next)
{
const rule *r = p->value;
if (--rcount[r->number] == 0)
if (r->useful && ! nullable[r->lhs->number - ntokens])
{
nullable[r->lhs->number - ntokens] = true;
*s2++ = r->lhs->number;
}
}
free (squeue);
free (rcount);
free (rsets);
free (relts);
if (trace_flag & trace_sets)
nullable_print (stderr);
}
void
nullable_free (void)
{
free (nullable);
}
@@ -0,0 +1,33 @@
/* Part of the bison parser generator,
Copyright (C) 2000, 2002, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef NULLABLE_H_
# define NULLABLE_H_
/* A vector saying which nonterminals can expand into the null string.
NULLABLE[I - NTOKENS] is nonzero if symbol I can do so. */
extern bool *nullable;
/* Set up NULLABLE. */
extern void nullable_compute (void);
/* Free NULLABLE. */
extern void nullable_free (void);
#endif /* !NULLABLE_H_ */
+911
View File
@@ -0,0 +1,911 @@
/* Output the generated parsing program for Bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <filename.h> /* IS_PATH_WITH_DIR */
#include <get-errno.h>
#include <mbswidth.h>
#include <path-join.h>
#include <quotearg.h>
#include <spawn-pipe.h>
#include <timevar.h>
#include <wait-process.h>
#include "complain.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "muscle-tab.h"
#include "output.h"
#include "reader.h"
#include "reduce.h"
#include "scan-code.h" /* max_left_semantic_context */
#include "scan-skel.h"
#include "symtab.h"
#include "tables.h"
#include "strversion.h"
static struct obstack format_obstack;
/*-------------------------------------------------------------------.
| Create a function NAME which associates to the muscle NAME the |
| result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
| TYPE), and to the muscle NAME_max, the max value of the |
| TABLE_DATA. |
| |
| For the typical case of outputting a complete table from 0, pass |
| TABLE[0] as FIRST, and 1 as BEGIN. For instance |
| muscle_insert_base_table ("pact", base, base[0], 1, nstates); |
`-------------------------------------------------------------------*/
#define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
\
static void \
Name (char const *name, Type *table_data, Type first, \
int begin, int end) \
{ \
Type min = first; \
Type max = first; \
int j = 1; \
\
obstack_printf (&format_obstack, "%6d", first); \
for (int i = begin; i < end; ++i) \
{ \
obstack_1grow (&format_obstack, ','); \
if (j >= 10) \
{ \
obstack_sgrow (&format_obstack, "\n "); \
j = 1; \
} \
else \
++j; \
obstack_printf (&format_obstack, "%6d", table_data[i]); \
if (table_data[i] < min) \
min = table_data[i]; \
if (max < table_data[i]) \
max = table_data[i]; \
} \
muscle_insert (name, obstack_finish0 (&format_obstack)); \
\
long lmin = min; \
long lmax = max; \
/* Build 'NAME_min' and 'NAME_max' in the obstack. */ \
obstack_printf (&format_obstack, "%s_min", name); \
MUSCLE_INSERT_LONG_INT (obstack_finish0 (&format_obstack), lmin); \
obstack_printf (&format_obstack, "%s_max", name); \
MUSCLE_INSERT_LONG_INT (obstack_finish0 (&format_obstack), lmax); \
}
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_int_table, int)
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_base_table, base_number)
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_rule_number_table, rule_number)
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_symbol_number_table, symbol_number)
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_item_number_table, item_number)
GENERATE_MUSCLE_INSERT_TABLE (muscle_insert_state_number_table, state_number)
/*----------------------------------------------------------------.
| Print to OUT a representation of CP quoted and escaped for M4. |
`----------------------------------------------------------------*/
static void
output_escaped (FILE *out, const char *cp)
{
for (; *cp; cp++)
switch (*cp)
{
case '$': fputs ("$][", out); break;
case '@': fputs ("@@", out); break;
case '[': fputs ("@{", out); break;
case ']': fputs ("@}", out); break;
default: fputc (*cp, out); break;
}
}
static void
output_quoted (FILE *out, char const *cp)
{
fprintf (out, "[[");
output_escaped (out, cp);
fprintf (out, "]]");
}
/*----------------------------------------------------------------.
| Print to OUT a representation of STRING quoted and escaped both |
| for C and M4. |
`----------------------------------------------------------------*/
static void
string_output (FILE *out, char const *string)
{
output_quoted (out, quotearg_style (c_quoting_style, string));
}
/* Store in BUFFER a copy of SRC where trigraphs are escaped, return
the size of the result (including the final NUL). If called with
BUFFERSIZE = 0, returns the needed size for BUFFER. */
static ptrdiff_t
escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src)
{
#define STORE(c) \
do \
{ \
if (res < buffersize) \
buffer[res] = (c); \
++res; \
} \
while (0)
ptrdiff_t res = 0;
for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i)
{
if (i + 2 < len
&& src[i] == '?' && src[i+1] == '?')
{
switch (src[i+2])
{
case '!': case '\'':
case '(': case ')': case '-': case '/':
case '<': case '=': case '>':
i += 1;
STORE ('?');
STORE ('"');
STORE ('"');
STORE ('?');
continue;
}
}
STORE (src[i]);
}
STORE ('\0');
#undef STORE
return res;
}
/* Same as xstrdup, except that trigraphs are escaped. */
static char *
xescape_trigraphs (const char *src)
{
ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src);
char *buf = xcharalloc (bufsize);
escape_trigraphs (buf, bufsize, src);
return buf;
}
/* The tag to show in the generated parsers. Use "end of file" rather
than "$end". But keep "$end" in the reports, it's shorter and more
consistent. Support i18n if the user already uses it. */
static const char *
symbol_tag (const symbol *sym)
{
const bool eof_is_user_defined
= !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
if (!eof_is_user_defined && sym->content == eoftoken->content)
return "\"end of file\"";
else if (sym->content == undeftoken->content)
return "\"invalid token\"";
else
return sym->tag;
}
/* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the
symbol names (aka tags). */
static void
prepare_symbol_names (char const *muscle_name)
{
// Whether to add a pair of quotes around the name.
const bool quote = STREQ (muscle_name, "tname");
bool has_translations = false;
/* We assume that the table will be output starting at column 2. */
int col = 2;
struct quoting_options *qo = clone_quoting_options (0);
set_quoting_style (qo, c_quoting_style);
set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS);
for (int i = 0; i < nsyms; i++)
{
const char *tag = symbol_tag (symbols[i]);
bool translatable = !quote && symbols[i]->translatable;
if (translatable)
has_translations = true;
char *cp
= tag[0] == '"' && !quote
? xescape_trigraphs (tag)
: quotearg_alloc (tag, -1, qo);
/* Width of the next token, including the two quotes, the
comma and the space. */
int width
= mbswidth (cp, 0) + 2
+ (translatable ? strlen ("N_()") : 0);
if (col + width > 75)
{
obstack_sgrow (&format_obstack, "\n ");
col = 1;
}
if (i)
obstack_1grow (&format_obstack, ' ');
if (translatable)
obstack_sgrow (&format_obstack, "]b4_symbol_translate""([");
obstack_escape (&format_obstack, cp);
if (translatable)
obstack_sgrow (&format_obstack, "])[");
free (cp);
obstack_1grow (&format_obstack, ',');
col += width;
}
free (qo);
obstack_sgrow (&format_obstack, " ]b4_null[");
/* Finish table and store. */
muscle_insert (muscle_name, obstack_finish0 (&format_obstack));
/* Announce whether translation support is needed. */
MUSCLE_INSERT_BOOL ("has_translations_flag", has_translations);
}
/*------------------------------------------------------------------.
| Prepare the muscles related to the symbols: translate, tname, and |
| toknum. |
`------------------------------------------------------------------*/
static void
prepare_symbols (void)
{
MUSCLE_INSERT_INT ("tokens_number", ntokens);
MUSCLE_INSERT_INT ("nterms_number", nnterms);
MUSCLE_INSERT_INT ("symbols_number", nsyms);
MUSCLE_INSERT_INT ("code_max", max_code);
muscle_insert_symbol_number_table ("translate",
token_translations,
token_translations[0],
1, max_code + 1);
/* tname -- token names. */
prepare_symbol_names ("tname");
prepare_symbol_names ("symbol_names");
/* translatable -- whether a token is translatable. */
{
bool translatable = false;
for (int i = 0; i < ntokens; ++i)
if (symbols[i]->translatable)
{
translatable = true;
break;
}
if (translatable)
{
int *values = xnmalloc (nsyms, sizeof *values);
for (int i = 0; i < ntokens; ++i)
values[i] = symbols[i]->translatable;
muscle_insert_int_table ("translatable", values,
values[0], 1, ntokens);
free (values);
}
}
/* Output YYTOKNUM. */
{
int *values = xnmalloc (ntokens, sizeof *values);
for (int i = 0; i < ntokens; ++i)
values[i] = symbols[i]->content->code;
muscle_insert_int_table ("toknum", values,
values[0], 1, ntokens);
free (values);
}
}
/*-------------------------------------------------------------.
| Prepare the muscles related to the rules: rhs, prhs, r1, r2, |
| rline, dprec, merger, immediate. |
`-------------------------------------------------------------*/
static void
prepare_rules (void)
{
int *prhs = xnmalloc (nrules, sizeof *prhs);
item_number *rhs = xnmalloc (nritems, sizeof *rhs);
int *rline = xnmalloc (nrules, sizeof *rline);
symbol_number *r1 = xnmalloc (nrules, sizeof *r1);
int *r2 = xnmalloc (nrules, sizeof *r2);
int *dprec = xnmalloc (nrules, sizeof *dprec);
int *merger = xnmalloc (nrules, sizeof *merger);
int *immediate = xnmalloc (nrules, sizeof *immediate);
/* Index in RHS. */
int i = 0;
for (rule_number r = 0; r < nrules; ++r)
{
/* Index of rule R in RHS. */
prhs[r] = i;
/* RHS of the rule R. */
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
rhs[i++] = *rhsp;
/* Separator in RHS. */
rhs[i++] = -1;
/* Line where rule was defined. */
rline[r] = rules[r].location.start.line;
/* LHS of the rule R. */
r1[r] = rules[r].lhs->number;
/* Length of rule R's RHS. */
r2[r] = rule_rhs_length (&rules[r]);
/* Dynamic precedence (GLR). */
dprec[r] = rules[r].dprec;
/* Merger-function index (GLR). */
merger[r] = rules[r].merger;
/* Immediate reduction flags (GLR). */
immediate[r] = rules[r].is_predicate;
}
aver (i == nritems);
muscle_insert_item_number_table ("rhs", rhs, ritem[0], 1, nritems);
muscle_insert_int_table ("prhs", prhs, 0, 0, nrules);
muscle_insert_int_table ("rline", rline, 0, 0, nrules);
muscle_insert_symbol_number_table ("r1", r1, 0, 0, nrules);
muscle_insert_int_table ("r2", r2, 0, 0, nrules);
muscle_insert_int_table ("dprec", dprec, 0, 0, nrules);
muscle_insert_int_table ("merger", merger, 0, 0, nrules);
muscle_insert_int_table ("immediate", immediate, 0, 0, nrules);
MUSCLE_INSERT_INT ("rules_number", nrules);
MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context);
free (prhs);
free (rhs);
free (rline);
free (r1);
free (r2);
free (dprec);
free (merger);
free (immediate);
}
/*--------------------------------------------.
| Prepare the muscles related to the states. |
`--------------------------------------------*/
static void
prepare_states (void)
{
symbol_number *values = xnmalloc (nstates, sizeof *values);
for (state_number i = 0; i < nstates; ++i)
values[i] = states[i]->accessing_symbol;
muscle_insert_symbol_number_table ("stos", values,
0, 1, nstates);
free (values);
MUSCLE_INSERT_INT ("last", high);
MUSCLE_INSERT_INT ("final_state_number", final_state->number);
MUSCLE_INSERT_INT ("states_number", nstates);
}
/*-------------------------------------------------------.
| Compare two symbols by type-name, and then by number. |
`-------------------------------------------------------*/
static int
symbol_type_name_cmp (const symbol **lhs, const symbol **rhs)
{
int res = uniqstr_cmp ((*lhs)->content->type_name, (*rhs)->content->type_name);
if (!res)
res = (*lhs)->content->number - (*rhs)->content->number;
return res;
}
/*----------------------------------------------------------------.
| Return a (malloc'ed) table of the symbols sorted by type-name. |
`----------------------------------------------------------------*/
static symbol **
symbols_by_type_name (void)
{
typedef int (*qcmp_type) (const void *, const void *);
symbol **res = xmemdup (symbols, nsyms * sizeof *res);
qsort (res, nsyms, sizeof *res, (qcmp_type) &symbol_type_name_cmp);
return res;
}
/*------------------------------------------------------------------.
| Define b4_type_names, which is a list of (lists of the numbers of |
| symbols with same type-name). |
`------------------------------------------------------------------*/
static void
type_names_output (FILE *out)
{
symbol **syms = symbols_by_type_name ();
fputs ("m4_define([b4_type_names],\n[", out);
for (int i = 0; i < nsyms; /* nothing */)
{
/* The index of the first symbol of the current type-name. */
int i0 = i;
fputs (i ? ",\n[" : "[", out);
for (; i < nsyms
&& syms[i]->content->type_name == syms[i0]->content->type_name; ++i)
fprintf (out, "%s%d", i != i0 ? ", " : "", syms[i]->content->number);
fputs ("]", out);
}
fputs ("])\n\n", out);
free (syms);
}
/* Define the list of start symbols *if* there are several. Define
them by pairs: [START-SYMBOL-NUM, SWITCHING-TOKEN-SYMBOL-NUM]. */
static void
start_symbols_output (FILE *out)
{
if (start_symbols && start_symbols->next)
{
fputs ("m4_define([b4_start_symbols],\n[", out);
for (symbol_list *list = start_symbols; list; list = list->next)
{
const symbol *start = list->content.sym;
const symbol *swtok = switching_token (start);
fprintf (out, "%s[%d, %d]",
list == start_symbols ? "" : ", ",
start->content->number, swtok->content->number);
}
fputs ("])\n\n", out);
}
}
/*-------------------------------------.
| The list of all the symbol numbers. |
`-------------------------------------*/
static void
symbol_numbers_output (FILE *out)
{
fputs ("m4_define([b4_symbol_numbers],\n[", out);
for (int i = 0; i < nsyms; ++i)
fprintf (out, "%s[%d]", i ? ", " : "", i);
fputs ("])\n\n", out);
}
/*-------------------------------------------.
| Output the user reduction actions to OUT. |
`-------------------------------------------*/
static void
rule_output (const rule *r, FILE *out)
{
output_escaped (out, r->lhs->symbol->tag);
fputc (':', out);
if (0 <= *r->rhs)
for (item_number *rhsp = r->rhs; 0 <= *rhsp; ++rhsp)
{
fputc (' ', out);
output_escaped (out, symbols[*rhsp]->tag);
}
else
fputs (" %empty", out);
}
static void
user_actions_output (FILE *out)
{
fputs ("m4_define([b4_actions], \n[", out);
for (rule_number r = 0; r < nrules; ++r)
if (rules[r].action)
{
/* The useless "" is there to pacify syntax-check. */
fprintf (out, "%s""(%d, [",
rules[r].is_predicate ? "b4_predicate_case" : "b4_case",
r + 1);
if (!no_lines_flag)
{
fprintf (out, "b4_syncline(%d, ",
rules[r].action_loc.start.line);
string_output (out, map_file_name (rules[r].action_loc.start.file));
fprintf (out, ")dnl\n");
}
fprintf (out, "[%*s%s]],\n[[",
rules[r].action_loc.start.column - 1, "",
rules[r].action);
rule_output (&rules[r], out);
fprintf (out, "]])\n\n");
}
fputs ("])\n\n", out);
}
/*------------------------------------.
| Output the merge functions to OUT. |
`------------------------------------*/
static void
merger_output (FILE *out)
{
fputs ("m4_define([b4_mergers], \n[[", out);
int n;
merger_list* p;
for (n = 1, p = merge_functions; p != NULL; n += 1, p = p->next)
fprintf (out, "]b4_call_merger""([%d], [%s], [%d])[\n",
n, p->name, p->sym->content->number);
fputs ("]])\n\n", out);
}
/*---------------------------------------------.
| Prepare the muscles for symbol definitions. |
`---------------------------------------------*/
static void
prepare_symbol_definitions (void)
{
/* Map "orig NUM" to new numbers. See data/README. */
for (symbol_number i = ntokens; i < nsyms + nuseless_nonterminals; ++i)
{
obstack_printf (&format_obstack, "symbol""(orig %d, number)", i);
const char *key = obstack_finish0 (&format_obstack);
MUSCLE_INSERT_INT (key, nterm_map ? nterm_map[i - ntokens] : i);
}
for (int i = 0; i < nsyms; ++i)
{
symbol *sym = symbols[i];
const char *key;
#define SET_KEY(Entry) \
obstack_printf (&format_obstack, "symbol""(%d, %s)", \
i, Entry); \
key = obstack_finish0 (&format_obstack);
#define SET_KEY2(Entry, Suffix) \
obstack_printf (&format_obstack, "symbol""(%d, %s_%s)", \
i, Entry, Suffix); \
key = obstack_finish0 (&format_obstack);
/* Whether the symbol has an identifier. */
const char *id = symbol_id_get (sym);
SET_KEY ("has_id");
MUSCLE_INSERT_INT (key, !!id);
/* Its identifier. */
SET_KEY ("id");
MUSCLE_INSERT_STRING (key, id ? id : "");
/* Its tag. Typically for documentation purpose. */
SET_KEY ("tag");
MUSCLE_INSERT_STRING (key, symbol_tag (sym));
SET_KEY ("code");
MUSCLE_INSERT_INT (key, sym->content->code);
SET_KEY ("is_token");
MUSCLE_INSERT_INT (key, i < ntokens);
SET_KEY ("number");
MUSCLE_INSERT_INT (key, sym->content->number);
SET_KEY ("has_type");
MUSCLE_INSERT_INT (key, !!sym->content->type_name);
SET_KEY ("type");
MUSCLE_INSERT_STRING (key, sym->content->type_name
? sym->content->type_name : "");
for (int j = 0; j < CODE_PROPS_SIZE; ++j)
{
/* "printer", not "%printer". */
char const *pname = code_props_type_string (j) + 1;
code_props const *p = symbol_code_props_get (sym, j);
SET_KEY2 ("has", pname);
MUSCLE_INSERT_INT (key, !!p->code);
if (p->code)
{
SET_KEY2 (pname, "file");
MUSCLE_INSERT_C_STRING (key, map_file_name (p->location.start.file));
SET_KEY2 (pname, "line");
MUSCLE_INSERT_INT (key, p->location.start.line);
SET_KEY2 (pname, "loc");
muscle_location_grow (key, p->location);
SET_KEY (pname);
obstack_printf (&muscle_obstack,
"%*s%s", p->location.start.column - 1, "", p->code);
muscle_insert (key, obstack_finish0 (&muscle_obstack));
}
}
#undef SET_KEY2
#undef SET_KEY
}
}
static void
prepare_actions (void)
{
/* Figure out the actions for the specified state. */
muscle_insert_rule_number_table ("defact", yydefact,
yydefact[0], 1, nstates);
/* Figure out what to do after reducing with each rule, depending on
the saved state from before the beginning of parsing the data
that matched this rule. */
muscle_insert_state_number_table ("defgoto", yydefgoto,
yydefgoto[0], 1, nsyms - ntokens);
/* Output PACT. */
muscle_insert_base_table ("pact", base,
base[0], 1, nstates);
MUSCLE_INSERT_INT ("pact_ninf", base_ninf);
/* Output PGOTO. */
muscle_insert_base_table ("pgoto", base,
base[nstates], nstates + 1, nvectors);
muscle_insert_base_table ("table", table,
table[0], 1, high + 1);
MUSCLE_INSERT_INT ("table_ninf", table_ninf);
muscle_insert_base_table ("check", check,
check[0], 1, high + 1);
/* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
YYPACT) so that in states with unresolved conflicts, the default
reduction is not used in the conflicted entries, so that there is
a place to put a conflict pointer.
This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
parser, so we could avoid accidents by not writing them out in
that case. Nevertheless, it seems even better to be able to use
the GLR skeletons even without the non-deterministic tables. */
muscle_insert_int_table ("conflict_list_heads", conflict_table,
conflict_table[0], 1, high + 1);
muscle_insert_int_table ("conflicting_rules", conflict_list,
0, 1, conflict_list_cnt);
}
/*--------------------------------------------.
| Output the definitions of all the muscles. |
`--------------------------------------------*/
static void
muscles_output (FILE *out)
{
fputs ("m4_init()\n", out);
merger_output (out);
symbol_numbers_output (out);
type_names_output (out);
start_symbols_output (out);
user_actions_output (out);
/* Must be last. */
muscles_m4_output (out);
}
/*---------------------------.
| Call the skeleton parser. |
`---------------------------*/
static void
output_skeleton (void)
{
/* Compute the names of the package data dir and skeleton files. */
char const *m4 = m4path ();
char const *datadir = pkgdatadir ();
char *skeldir = xpath_join (datadir, "skeletons");
char *m4sugar = xpath_join (datadir, "m4sugar/m4sugar.m4");
char *m4bison = xpath_join (skeldir, "bison.m4");
char *traceon = xpath_join (skeldir, "traceon.m4");
char *skel = (IS_PATH_WITH_DIR (skeleton)
? xstrdup (skeleton)
: xpath_join (skeldir, skeleton));
/* Test whether m4sugar.m4 is readable, to check for proper
installation. A faulty installation can cause deadlock, so a
cheap sanity check is worthwhile. */
xfclose (xfopen (m4sugar, "r"));
/* Create an m4 subprocess connected to us via two pipes. */
int filter_fd[2];
pid_t pid;
{
char const *argv[11];
int i = 0;
argv[i++] = m4;
/* When POSIXLY_CORRECT is set, GNU M4 1.6 and later disable GNU
extensions, which Bison's skeletons depend on. With older M4,
it has no effect. M4 1.4.12 added a -g/--gnu command-line
option to make it explicit that a program wants GNU M4
extensions even when POSIXLY_CORRECT is set.
See the thread starting at
<https://lists.gnu.org/r/bug-bison/2008-07/msg00000.html>
for details. */
if (*M4_GNU_OPTION)
argv[i++] = M4_GNU_OPTION;
argv[i++] = "-I";
argv[i++] = datadir;
/* Some future version of GNU M4 (most likely 1.6) may treat the
-dV in a position-dependent manner. See the thread starting at
<https://lists.gnu.org/r/bug-bison/2008-07/msg00000.html>
for details. */
if (trace_flag & trace_m4_early)
argv[i++] = "-dV";
argv[i++] = m4sugar;
argv[i++] = "-";
argv[i++] = m4bison;
if (trace_flag & trace_m4)
argv[i++] = traceon;
argv[i++] = skel;
argv[i++] = NULL;
aver (i <= ARRAY_CARDINALITY (argv));
if (trace_flag & trace_tools)
{
fputs ("running:", stderr);
for (int j = 0; argv[j]; ++j)
fprintf (stderr, " %s", argv[j]);
fputc ('\n', stderr);
}
pid = create_pipe_bidi ("m4", m4, argv,
/* directory */ NULL,
/* null_stderr */ false,
/* slave_process */ true,
/* exit_on_error */ true,
filter_fd);
}
free (skeldir);
free (m4sugar);
free (m4bison);
free (traceon);
free (skel);
if (trace_flag & trace_muscles)
muscles_output (stderr);
{
FILE *out = xfdopen (filter_fd[1], "w");
muscles_output (out);
xfclose (out);
}
/* Read and process m4's output. */
timevar_push (tv_m4);
{
FILE *in = xfdopen (filter_fd[0], "r");
scan_skel (in);
/* scan_skel should have read all of M4's output. Otherwise, when we
close the pipe, we risk letting M4 report a broken-pipe to the
Bison user. */
aver (feof (in));
xfclose (in);
}
wait_subprocess (pid, "m4", false, false, true, true, NULL);
timevar_pop (tv_m4);
}
static void
prepare (void)
{
/* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be
documented for the user. */
char const *cp = getenv ("BISON_USE_PUSH_FOR_PULL");
bool use_push_for_pull_flag = cp && *cp && strtol (cp, 0, 10);
/* Versions. */
MUSCLE_INSERT_STRING ("version_string", VERSION);
MUSCLE_INSERT_INT ("version", strversion_to_int (VERSION));
MUSCLE_INSERT_INT ("required_version", required_version);
/* Flags. */
MUSCLE_INSERT_BOOL ("header_flag", header_flag);
MUSCLE_INSERT_BOOL ("glr_flag", glr_parser);
MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser);
MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag);
MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen);
MUSCLE_INSERT_BOOL ("token_table_flag", token_table_flag);
MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag);
MUSCLE_INSERT_BOOL ("yacc_flag", !location_empty (yacc_loc));
/* File names. */
if (spec_name_prefix)
MUSCLE_INSERT_STRING ("prefix", spec_name_prefix);
MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext);
const char *spec_mapped_header_file = map_file_name (spec_header_file);
const char *mapped_dir_prefix = map_file_name (dir_prefix);
#define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
DEFINE (dir_prefix);
DEFINE (mapped_dir_prefix);
DEFINE (parser_file_name);
DEFINE (spec_header_file);
DEFINE (spec_mapped_header_file);
DEFINE (spec_file_prefix);
DEFINE (spec_graph_file);
DEFINE (spec_name_prefix);
DEFINE (spec_outfile);
DEFINE (spec_verbose_file);
#undef DEFINE
/* Find the right skeleton file, and add muscles about the skeletons. */
if (skeleton)
MUSCLE_INSERT_C_STRING ("skeleton", skeleton);
else
skeleton = language->skeleton;
/* About the skeletons. */
{
/* b4_skeletonsdir is used inside m4_include in the skeletons, so digraphs
would never be expanded. Hopefully no one has M4-special characters in
his Bison installation path. */
char *skeldir = xpath_join (pkgdatadir (), "skeletons");
MUSCLE_INSERT_STRING_RAW ("skeletonsdir", skeldir);
free (skeldir);
}
}
/*----------------------------------------------------------.
| Output the parsing tables and the parser code to ftable. |
`----------------------------------------------------------*/
void
output (void)
{
obstack_init (&format_obstack);
prepare_symbols ();
prepare_rules ();
prepare_states ();
prepare_actions ();
prepare_symbol_definitions ();
prepare ();
/* Process the selected skeleton file. */
output_skeleton ();
/* If late errors were generated, destroy the generated source
files. */
if (complaint_status)
unlink_generated_sources ();
obstack_free (&format_obstack, NULL);
}
@@ -0,0 +1,27 @@
/* Output the generated parsing program for bison,
Copyright (C) 2000-2003, 2006-2007, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef OUTPUT_H_
# define OUTPUT_H_
/* Output the parsing tables and the parser code to FTABLE. */
void output (void);
#endif /* !OUTPUT_H_ */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,236 @@
/* A Bison parser, made by GNU Bison 3.8.2. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
under terms of your choice, so long as that work isn't itself a
parser generator using the skeleton or a modified version thereof
as a parser skeleton. Alternatively, if you modify or redistribute
the parser skeleton itself, you may (at your option) remove this
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
especially those whose name start with YY_ or yy_. They are
private implementation details that can be changed or removed. */
#ifndef YY_GRAM_SRC_PARSE_GRAM_H_INCLUDED
# define YY_GRAM_SRC_PARSE_GRAM_H_INCLUDED
/* Debug traces. */
#ifndef GRAM_DEBUG
# if defined YYDEBUG
#if YYDEBUG
# define GRAM_DEBUG 1
# else
# define GRAM_DEBUG 0
# endif
# else /* ! defined YYDEBUG */
# define GRAM_DEBUG 1
# endif /* ! defined YYDEBUG */
#endif /* ! defined GRAM_DEBUG */
#if GRAM_DEBUG
extern int gram_debug;
#endif
/* "%code requires" blocks. */
#line 21 "src/parse-gram.y"
#include "symlist.h"
#include "symtab.h"
#line 268 "src/parse-gram.y"
typedef enum
{
param_none = 0,
param_lex = 1 << 0,
param_parse = 1 << 1,
param_both = param_lex | param_parse
} param_type;
#line 737 "src/parse-gram.y"
#include "muscle-tab.h"
typedef struct
{
char const *chars;
muscle_kind kind;
} value_type;
#line 79 "src/parse-gram.h"
/* Token kinds. */
#ifndef GRAM_TOKENTYPE
# define GRAM_TOKENTYPE
enum gram_tokentype
{
GRAM_EMPTY = -2,
GRAM_EOF = 0, /* "end of file" */
GRAM_error = 1, /* error */
GRAM_UNDEF = 2, /* "invalid token" */
STRING = 3, /* "string" */
TSTRING = 4, /* "translatable string" */
PERCENT_TOKEN = 5, /* "%token" */
PERCENT_NTERM = 6, /* "%nterm" */
PERCENT_TYPE = 7, /* "%type" */
PERCENT_DESTRUCTOR = 8, /* "%destructor" */
PERCENT_PRINTER = 9, /* "%printer" */
PERCENT_LEFT = 10, /* "%left" */
PERCENT_RIGHT = 11, /* "%right" */
PERCENT_NONASSOC = 12, /* "%nonassoc" */
PERCENT_PRECEDENCE = 13, /* "%precedence" */
PERCENT_PREC = 14, /* "%prec" */
PERCENT_DPREC = 15, /* "%dprec" */
PERCENT_MERGE = 16, /* "%merge" */
PERCENT_CODE = 17, /* "%code" */
PERCENT_DEFAULT_PREC = 18, /* "%default-prec" */
PERCENT_DEFINE = 19, /* "%define" */
PERCENT_ERROR_VERBOSE = 20, /* "%error-verbose" */
PERCENT_EXPECT = 21, /* "%expect" */
PERCENT_EXPECT_RR = 22, /* "%expect-rr" */
PERCENT_FILE_PREFIX = 23, /* "%file-prefix" */
PERCENT_FLAG = 24, /* "%<flag>" */
PERCENT_GLR_PARSER = 25, /* "%glr-parser" */
PERCENT_HEADER = 26, /* "%header" */
PERCENT_INITIAL_ACTION = 27, /* "%initial-action" */
PERCENT_LANGUAGE = 28, /* "%language" */
PERCENT_NAME_PREFIX = 29, /* "%name-prefix" */
PERCENT_NO_DEFAULT_PREC = 30, /* "%no-default-prec" */
PERCENT_NO_LINES = 31, /* "%no-lines" */
PERCENT_NONDETERMINISTIC_PARSER = 32, /* "%nondeterministic-parser" */
PERCENT_OUTPUT = 33, /* "%output" */
PERCENT_PURE_PARSER = 34, /* "%pure-parser" */
PERCENT_REQUIRE = 35, /* "%require" */
PERCENT_SKELETON = 36, /* "%skeleton" */
PERCENT_START = 37, /* "%start" */
PERCENT_TOKEN_TABLE = 38, /* "%token-table" */
PERCENT_VERBOSE = 39, /* "%verbose" */
PERCENT_YACC = 40, /* "%yacc" */
BRACED_CODE = 41, /* "{...}" */
BRACED_PREDICATE = 42, /* "%?{...}" */
BRACKETED_ID = 43, /* "[identifier]" */
CHAR_LITERAL = 44, /* "character literal" */
COLON = 45, /* ":" */
EPILOGUE = 46, /* "epilogue" */
EQUAL = 47, /* "=" */
ID = 48, /* "identifier" */
ID_COLON = 49, /* "identifier:" */
PERCENT_PERCENT = 50, /* "%%" */
PIPE = 51, /* "|" */
PROLOGUE = 52, /* "%{...%}" */
SEMICOLON = 53, /* ";" */
TAG = 54, /* "<tag>" */
TAG_ANY = 55, /* "<*>" */
TAG_NONE = 56, /* "<>" */
INT_LITERAL = 57, /* "integer literal" */
PERCENT_PARAM = 58, /* "%param" */
PERCENT_UNION = 59, /* "%union" */
PERCENT_EMPTY = 60 /* "%empty" */
};
typedef enum gram_tokentype gram_token_kind_t;
#endif
/* Value type. */
#if ! defined GRAM_STYPE && ! defined GRAM_STYPE_IS_DECLARED
union GRAM_STYPE
{
assoc precedence_declarator; /* precedence_declarator */
char* STRING; /* "string" */
char* TSTRING; /* "translatable string" */
char* BRACED_CODE; /* "{...}" */
char* BRACED_PREDICATE; /* "%?{...}" */
char* EPILOGUE; /* "epilogue" */
char* PROLOGUE; /* "%{...%}" */
char* yykind_75; /* string.opt */
code_props_type code_props_type; /* code_props_type */
int INT_LITERAL; /* "integer literal" */
int yykind_84; /* int.opt */
named_ref* yykind_97; /* named_ref.opt */
param_type PERCENT_PARAM; /* "%param" */
symbol* token_decl; /* token_decl */
symbol* alias; /* alias */
symbol* token_decl_for_prec; /* token_decl_for_prec */
symbol* id; /* id */
symbol* id_colon; /* id_colon */
symbol* symbol; /* symbol */
symbol* string_as_id; /* string_as_id */
symbol_list* generic_symlist; /* generic_symlist */
symbol_list* generic_symlist_item; /* generic_symlist_item */
symbol_list* nterm_decls; /* nterm_decls */
symbol_list* token_decls; /* token_decls */
symbol_list* yykind_82; /* token_decl.1 */
symbol_list* token_decls_for_prec; /* token_decls_for_prec */
symbol_list* yykind_87; /* token_decl_for_prec.1 */
symbol_list* symbol_decls; /* symbol_decls */
symbol_list* yykind_90; /* symbols.1 */
uniqstr PERCENT_ERROR_VERBOSE; /* "%error-verbose" */
uniqstr PERCENT_FILE_PREFIX; /* "%file-prefix" */
uniqstr PERCENT_FLAG; /* "%<flag>" */
uniqstr PERCENT_NAME_PREFIX; /* "%name-prefix" */
uniqstr PERCENT_PURE_PARSER; /* "%pure-parser" */
uniqstr BRACKETED_ID; /* "[identifier]" */
uniqstr ID; /* "identifier" */
uniqstr ID_COLON; /* "identifier:" */
uniqstr TAG; /* "<tag>" */
uniqstr yykind_76; /* tag.opt */
uniqstr tag; /* tag */
uniqstr variable; /* variable */
unsigned char CHAR_LITERAL; /* "character literal" */
value_type value; /* value */
#line 200 "src/parse-gram.h"
};
typedef union GRAM_STYPE GRAM_STYPE;
# define GRAM_STYPE_IS_TRIVIAL 1
# define GRAM_STYPE_IS_DECLARED 1
#endif
/* Location type. */
#if ! defined GRAM_LTYPE && ! defined GRAM_LTYPE_IS_DECLARED
typedef struct GRAM_LTYPE GRAM_LTYPE;
struct GRAM_LTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define GRAM_LTYPE_IS_DECLARED 1
# define GRAM_LTYPE_IS_TRIVIAL 1
#endif
int gram_parse (void);
/* "%code provides" blocks. */
#line 27 "src/parse-gram.y"
/* Initialize unquote. */
void parser_init (void);
/* Deallocate storage for unquote. */
void parser_free (void);
#line 235 "src/parse-gram.h"
#endif /* !YY_GRAM_SRC_PARSE_GRAM_H_INCLUDED */
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,600 @@
/* Parser simulator for unifying counterexample search
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "parse-simulation.h"
#include <gl_linked_list.h>
#include <gl_xlist.h>
#include <stdlib.h>
#include "lssi.h"
#include "nullable.h"
struct parse_state
{
// Path of state-items the parser has traversed.
struct si_chunk
{
// Elements newly added in this chunk.
state_item_list contents;
// Properties of the linked list this chunk represents.
const state_item *head_elt;
const state_item *tail_elt;
size_t total_size;
} state_items;
// List of derivations of the symbols.
struct deriv_chunk
{
derivation_list contents;
const derivation *head_elt;
const derivation *tail_elt;
size_t total_size;
} derivs;
struct parse_state *parent;
int reference_count;
// Incremented during productions, decremented during reductions.
int depth;
// Whether the contents of the chunks should be prepended or
// appended to the list the chunks represent.
bool prepend;
// Causes chunk contents to be freed when the reference count is
// one. Used when only the chunk metadata will be needed.
bool free_contents_early;
};
static void
ps_si_prepend (parse_state *ps, const state_item *si)
{
struct si_chunk *sic = &ps->state_items;
gl_list_add_first (sic->contents, si);
sic->head_elt = si;
++sic->total_size;
if (!sic->tail_elt)
sic->tail_elt = si;
}
static void
ps_si_append (parse_state *ps, const state_item *si)
{
struct si_chunk *sic = &ps->state_items;
gl_list_add_last (sic->contents, si);
sic->tail_elt = si;
++sic->total_size;
if (!sic->head_elt)
sic->head_elt = si;
}
static void
ps_derivs_prepend (parse_state *ps, derivation *d)
{
struct deriv_chunk *dc = &ps->derivs;
derivation_list_prepend (dc->contents, d);
dc->head_elt = d;
++dc->total_size;
if (!dc->tail_elt)
dc->tail_elt = d;
}
static void
ps_derivs_append (parse_state *ps, derivation *d)
{
struct deriv_chunk *dc = &ps->derivs;
derivation_list_append (dc->contents, d);
dc->tail_elt = d;
++dc->total_size;
if (!dc->head_elt)
dc->head_elt = d;
}
static int allocs = 0;
static int frees = 0;
static parse_state *
empty_parse_state (void)
{
parse_state *res = xcalloc (1, sizeof *res);
res->state_items.contents
= gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
res->derivs.contents = derivation_list_new ();
++allocs;
return res;
}
parse_state *
new_parse_state (const state_item *si)
{
parse_state *res = empty_parse_state ();
ps_si_append (res, si);
ps_derivs_append (res, derivation_dot ());
return res;
}
static parse_state *
copy_parse_state (bool prepend, parse_state *parent)
{
parse_state *res = xmalloc (sizeof *res);
*res = *parent;
res->state_items.contents
= gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
res->derivs.contents = derivation_list_new ();
res->parent = parent;
res->prepend = prepend;
res->reference_count = 0;
res->free_contents_early = false;
parse_state_retain (parent);
++allocs;
return res;
}
bool
parse_state_derivation_completed (const parse_state *ps)
{
return ps->derivs.total_size == 1;
}
derivation *
parse_state_derivation (const parse_state *ps)
{
return (derivation *) ps->derivs.head_elt;
}
const state_item *
parse_state_head (const parse_state *ps)
{
return ps->state_items.head_elt;
}
const state_item *
parse_state_tail (const parse_state *ps)
{
return ps->state_items.tail_elt;
}
int
parse_state_length (const parse_state *ps)
{
return ps->state_items.total_size;
}
int
parse_state_depth (const parse_state *ps)
{
return ps->depth;
}
void
parse_state_retain (parse_state *ps)
{
++ps->reference_count;
}
void
parse_state_free_contents_early (parse_state *ps)
{
ps->free_contents_early = true;
}
void
free_parse_state (parse_state *original_ps)
{
bool free_contents = true;
parse_state *parent_ps = NULL;
for (parse_state *ps = original_ps; ps && free_contents; ps = parent_ps)
{
--ps->reference_count;
free_contents = (ps->reference_count == 1 && ps->free_contents_early)
|| (ps->reference_count == 0 && !ps->free_contents_early);
// need to keep the parse state around for visited hash set,
// but its contents and parent can be freed
if (free_contents)
{
if (ps->state_items.contents)
gl_list_free (ps->state_items.contents);
if (ps->derivs.contents)
derivation_list_free (ps->derivs.contents);
}
parent_ps = ps->parent;
if (ps->reference_count <= 0)
{
free (ps);
++frees;
}
}
}
size_t
parse_state_hasher (const parse_state *ps, size_t max)
{
const struct si_chunk *sis = &ps->state_items;
return ((state_item *) sis->head_elt - state_items +
(state_item *) sis->tail_elt - state_items + sis->total_size) % max;
}
bool
parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
{
const struct si_chunk *sis1 = &ps1->state_items;
const struct si_chunk *sis2 = &ps2->state_items;
return sis1->head_elt == sis2->head_elt
&& sis1->tail_elt == sis2->tail_elt
&& sis1->total_size == sis2->total_size;
}
void
parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions)
{
// traverse to the root parse_state,
// which will have a list of all completed productions.
const parse_state *root_ps = ps;
while (root_ps->parent)
root_ps = root_ps->parent;
state_item_list sis = root_ps->state_items.contents;
int count = 0;
state_item *last = NULL;
state_item *next = NULL;
for (gl_list_iterator_t it = gl_list_iterator (sis);
state_item_list_next (&it, &next);
)
{
if (last && last->state == next->state)
++count;
last = next;
}
*productions = count;
*shifts = root_ps->state_items.total_size - count;
}
typedef void (*chunk_append_fn) (gl_list_t, const void *);
// A version of gl_list_add_last which has the chunk_append_fn
// signature.
static void
list_add_last (gl_list_t list, const void *elt)
{
gl_list_add_last (list, elt);
}
// takes an array of n gl_lists and flattens them into two list
// based off of the index split
static void
list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n,
chunk_append_fn append_fn)
{
int ret_index = 0;
int ret_array = 0;
for (int i = 0; i < n; ++i)
{
const void *p = NULL;
gl_list_iterator_t it = gl_list_iterator (list[i]);
while (gl_list_iterator_next (&it, &p, NULL))
if (p)
{
gl_list_t l = (gl_list_t) p;
const void *si = NULL;
gl_list_iterator_t it2 = gl_list_iterator (l);
while (gl_list_iterator_next (&it2, &si, NULL))
{
if (ret_index++ == split)
++ret_array;
if (rets[ret_array])
append_fn (rets[ret_array], si);
}
gl_list_iterator_free (&it2);
}
gl_list_iterator_free (&it);
}
}
static parse_state_list
parse_state_list_new (void)
{
return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
(gl_listelement_dispose_fn)free_parse_state,
true);
}
static void
parse_state_list_append (parse_state_list pl, parse_state *ps)
{
parse_state_retain (ps);
gl_list_add_last (pl, ps);
}
// Emulates a reduction on a parse state by popping some amount of
// derivations and state_items off of the parse_state and returning
// the result in ret. Returns the derivation of what's popped.
static derivation_list
parser_pop (parse_state *ps, int deriv_index,
int si_index, parse_state *ret)
{
// prepend sis, append sis, prepend derivs, append derivs
gl_list_t chunks[4];
for (int i = 0; i < 4; ++i)
chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
if (pn->prepend)
{
gl_list_add_last (chunks[0], pn->state_items.contents);
gl_list_add_last (chunks[2], pn->derivs.contents);
}
else
{
gl_list_add_first (chunks[1], pn->state_items.contents);
gl_list_add_first (chunks[3], pn->derivs.contents);
}
derivation_list popped_derivs = derivation_list_new ();
gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
ret->derivs.contents, popped_derivs
};
list_flatten_and_split (chunks, ret_chunks, si_index, 2,
list_add_last);
list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2,
(chunk_append_fn)derivation_list_append);
size_t s_size = gl_list_size (ret->state_items.contents);
ret->state_items.total_size = s_size;
if (s_size > 0)
{
ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
s_size - 1);
ret->state_items.head_elt =
gl_list_get_at (ret->state_items.contents, 0);
}
else
{
ret->state_items.tail_elt = NULL;
ret->state_items.head_elt = NULL;
}
size_t d_size = gl_list_size (ret->derivs.contents);
ret->derivs.total_size = d_size;
if (d_size > 0)
{
ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
d_size - 1);
ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
}
else
{
ret->derivs.tail_elt = NULL;
ret->derivs.head_elt = NULL;
}
for (int i = 0; i < 4; ++i)
gl_list_free (chunks[i]);
return popped_derivs;
}
void
parse_state_lists (parse_state *ps, state_item_list *sitems,
derivation_list *derivs)
{
parse_state *temp = empty_parse_state ();
size_t si_size = ps->state_items.total_size;
size_t deriv_size = ps->derivs.total_size;
derivation_list dl = parser_pop (ps, si_size, deriv_size, temp);
*sitems = temp->state_items.contents;
*derivs = temp->derivs.contents;
// prevent the return lists from being freed
temp->state_items.contents = NULL;
temp->derivs.contents = NULL;
free_parse_state (temp);
derivation_list_free (dl);
}
/**
* Compute the parse states that result from taking a transition on
* nullable symbols whenever possible from the given state_item.
*/
static void
nullable_closure (parse_state *ps, state_item *si, parse_state_list state_list)
{
parse_state *current_ps = ps;
state_item_number prev_sin = si - state_items;
for (state_item_number sin = si->trans; sin != -1;
prev_sin = sin, sin = state_items[sin].trans)
{
state_item *psi = &state_items[prev_sin];
symbol_number sp = item_number_as_symbol_number (*psi->item);
if (ISTOKEN (sp) || !nullable[sp - ntokens])
break;
state_item *nsi = &state_items[sin];
current_ps = copy_parse_state (false, current_ps);
ps_si_append (current_ps, nsi);
ps_derivs_append (current_ps,
derivation_new (sp, derivation_list_new (),
state_item_rule (nsi)));
parse_state_list_append (state_list, current_ps);
}
}
parse_state_list
simulate_transition (parse_state *ps)
{
const state_item *si = ps->state_items.tail_elt;
symbol_number sym = item_number_as_symbol_number (*si->item);
// Transition on the same next symbol, taking nullable
// symbols into account.
parse_state_list result = parse_state_list_new ();
state_item_number si_next = si->trans;
// Check for disabled transition, shouldn't happen as any
// state_items that lead to these should be disabled.
if (si_next < 0)
return result;
parse_state *next_ps = copy_parse_state (false, ps);
ps_si_append (next_ps, &state_items[si_next]);
ps_derivs_append (next_ps, derivation_new_leaf (sym));
parse_state_list_append (result, next_ps);
nullable_closure (next_ps, &state_items[si_next], result);
return result;
}
/**
* Determine if the given symbols are equal or their first sets
* intersect.
*/
static bool
compatible (symbol_number sym1, symbol_number sym2)
{
if (sym1 == sym2)
return true;
if (ISTOKEN (sym1) && ISVAR (sym2))
return bitset_test (FIRSTS (sym2), sym1);
else if (ISVAR (sym1) && ISTOKEN (sym2))
return bitset_test (FIRSTS (sym1), sym2);
else if (ISVAR (sym1) && ISVAR (sym2))
return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
else
return false;
}
parse_state_list
simulate_production (parse_state *ps, symbol_number compat_sym)
{
parse_state_list result = parse_state_list_new ();
const state_item *si = parse_state_tail (ps);
if (si->prods)
{
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, si->prods, sin, 0)
{
// Take production step only if lhs is not nullable and
// if first rhs symbol is compatible with compat_sym
state_item *next = &state_items[sin];
item_number *itm1 = next->item;
if (!compatible (*itm1, compat_sym) || !production_allowed (si, next))
continue;
parse_state *next_ps = copy_parse_state (false, ps);
ps_si_append (next_ps, next);
parse_state_list_append (result, next_ps);
if (next_ps->depth >= 0)
++next_ps->depth;
nullable_closure (next_ps, next, result);
}
}
return result;
}
// simulates a reduction on the given parse state, conflict_item is the
// item associated with ps's conflict. symbol_set is a lookahead set this
// reduction must be compatible with
parse_state_list
simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
{
parse_state_list result = parse_state_list_new ();
int s_size = ps->state_items.total_size;
int d_size = ps->derivs.total_size;
if (ps->depth >= 0)
d_size--; // account for dot
parse_state *new_root = empty_parse_state ();
derivation_list popped_derivs =
parser_pop (ps, d_size - rule_len,
s_size - rule_len - 1, new_root);
// update derivation
state_item *si = (state_item *) ps->state_items.tail_elt;
const rule *r = item_rule (si->item);
symbol_number lhs = r->lhs->number;
derivation *deriv = derivation_new (lhs, popped_derivs, state_item_rule (si));
--new_root->depth;
ps_derivs_append (new_root, deriv);
if (s_size != rule_len + 1)
{
state_item *tail = (state_item *) new_root->state_items.tail_elt;
ps_si_append (new_root, &state_items[tail->trans]);
parse_state_list_append (result, new_root);
}
else
{
// The head state_item is a production item, so we need to prepend
// with possible source state-items.
const state_item *head = ps->state_items.head_elt;
state_item_list prev = lssi_reverse_production (head, symbol_set);
// TODO: better understand what causes this case.
if (gl_list_size (prev) == 0)
{
// new_root needs to have an RC of 1 to be freed correctly here.
parse_state_retain (new_root);
free_parse_state (new_root);
}
else
{
state_item *psis = NULL;
for (gl_list_iterator_t it = gl_list_iterator (prev);
state_item_list_next (&it, &psis);
)
{
// Prepend the result from the reverse production.
parse_state *copy = copy_parse_state (true, new_root);
ps_si_prepend (copy, psis);
// Append the left hand side to the end of the parser state
copy = copy_parse_state (false, copy);
struct si_chunk *sis = &copy->state_items;
const state_item *tail = sis->tail_elt;
ps_si_append (copy, &state_items[tail->trans]);
parse_state_list_append (result, copy);
nullable_closure (copy, (state_item *) sis->tail_elt, result);
}
}
gl_list_free (prev);
}
return result;
}
parse_state_list
parser_prepend (parse_state *ps)
{
parse_state_list res = parse_state_list_new ();
const state_item *head = ps->state_items.head_elt;
symbol_number prepend_sym =
item_number_as_symbol_number (*(head->item - 1));
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, head->revs, sin, 0)
{
parse_state *copy = copy_parse_state (true, ps);
ps_si_prepend (copy, &state_items[sin]);
if (SI_TRANSITION (head))
ps_derivs_prepend (copy, derivation_new_leaf (prepend_sym));
parse_state_list_append (res, copy);
}
return res;
}
void
print_parse_state (parse_state *ps)
{
FILE *out = stderr;
fprintf (out, "(size %zu depth %d rc %d)\n",
ps->state_items.total_size, ps->depth, ps->reference_count);
state_item_print (ps->state_items.head_elt, out, "");
state_item_print (ps->state_items.tail_elt, out, "");
if (ps->derivs.total_size > 0)
derivation_print (ps->derivs.head_elt, out, "");
putc ('\n', out);
}
@@ -0,0 +1,146 @@
/* Parser simulator for unifying counterexample search
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef PARSE_SIMULATION_H
# define PARSE_SIMULATION_H
# include <stdio.h>
# include <gl_xlist.h>
# include "derivation.h"
# include "state-item.h"
/*
Simulating states of the parser:
Each state is an array of state-items and an array of derivations.
Each consecutive state-item represents a transition/goto or production,
and the derivations are the derivation trees associated with the symbols
transitioned on each step. In more detail:
Parse states are stored as a tree. Each new parse state contains two "chunks,"
one corresponding to its state-items and the other corresponding to its derivations.
Chunks only have new elements which weren't present in its parent.
Each chunk also stores the head, tail, and total_size of the list it represents.
So if a parse state was to be copied it retains the list metadata but its
contents are empty.
A transition gets the state-item which the last state-item of the parse state
transitions to. This is appended to the state-item list, and a derivation with
just the symbol being transitioned on is appended to the derivation list.
A production appends the new state-item, but does not have a derivation
associated with it.
A reduction looks at the rule of the last state-item in the state, and pops
the last few state-items that make up the rhs of the rule along with their
derivations. The derivations become the derivation of the lhs which is then
shifted over.
Effectively, every time a derivation is appended, it represents a shift in
the parser. So a parse state that contains
start: A . B C D
start: A B C D .
and the state-items in between will represent a parser that has BCD on the
parse stack.
However, the above example cannot be reduced, as it's missing A.
Since we start at a state-item that can have a dot in the middle of a rule,
it's necessary to support a prepend operation. Luckily the prepend operations
are very similar to transitions and productions with the difference being that
they operate on the head of the state-item list instead of the tail.
A production
A transition gets the state-item which the last state-item of the parse state
transitions to. This is appended to the state-item list, and a derivation with
just the symbol being transitioned on is appended to the derivation list.
*/
typedef struct parse_state parse_state;
typedef gl_list_t parse_state_list;
static inline bool
parse_state_list_next (gl_list_iterator_t *it, parse_state **ps)
{
const void *p = NULL;
bool res = gl_list_iterator_next (it, &p, NULL);
if (res)
*ps = (parse_state *) p;
else
gl_list_iterator_free (it);
return res;
}
parse_state *new_parse_state (const state_item *conflict);
size_t parse_state_hasher (const parse_state *ps, size_t max);
bool parse_state_comparator (const parse_state *ps1, const parse_state *ps2);
/* Memory management */
void parse_state_retain (parse_state *ps);
/* This allows a parse_state to free its contents list
* when its reference count reaches 1. This is used to
* free memory while the parse state is in a hash set. */
void parse_state_free_contents_early (parse_state *ps);
void free_parse_state (parse_state *ps);
/* counts the amount of shift and production steps in this parse state */
void parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions);
/* parse state getters */
bool parse_state_derivation_completed (const parse_state *ps);
derivation *parse_state_derivation (const parse_state *ps);
const state_item *parse_state_head (const parse_state *ps);
const state_item *parse_state_tail (const parse_state *ps);
int parse_state_length (const parse_state *ps);
int parse_state_depth (const parse_state *ps);
/* returns the linked lists that the parse state is supposed to represent */
void parse_state_lists (parse_state *ps, state_item_list *state_items,
derivation_list *derivs);
/* various functions that return a list of states based off of
* whatever operation is simulated. After whatever operation, every possible
* transition on nullable nonterminals will be added to the returned list. */
/* Look at the tail state-item of the parse state and transition on the symbol
* after its dot. The symbol gets added to derivs, and the resulting state-item
* is appended to state-items. */
parse_state_list simulate_transition (parse_state *ps);
/* Look at all of the productions for the nonterminal following the dot in the tail
* state-item. Appends to state-items each production state-item which may start with
* compat_sym. */
parse_state_list simulate_production (parse_state *ps, symbol_number compat_sym);
/* Removes the last rule_len state-items along with their derivations. A new state-item is
* appended representing the goto after the reduction. A derivation for the nonterminal that
* was just reduced is appended which consists of the list of derivations that were just removed. */
parse_state_list simulate_reduction (parse_state *ps, int rule_len,
bitset symbol_set);
/* Generate states with a state-item prepended for each state-item that has a
* transition or production step to ps's head. */
parse_state_list parser_prepend (parse_state *ps);
/* For debugging traces. */
void print_parse_state (parse_state *ps);
#endif /* PARSE_SIMULATION_H */
@@ -0,0 +1,197 @@
/* Output a graph of the generated parser, for Bison.
Copyright (C) 2001-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "print-graph.h"
#include "system.h"
#include "closure.h"
#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "graphviz.h"
#include "lalr.h"
#include "lr0.h"
#include "reader.h"
#include "state.h"
#include "symtab.h"
/*----------------------------.
| Construct the node labels. |
`----------------------------*/
/* Print the lhs of a rule in such a manner that there is no vertical
repetition, like in *.output files. */
static void
print_core (struct obstack *oout, state *s)
{
item_index const *sitems = s->items;
sym_content *previous_lhs = NULL;
size_t snritems = s->nitems;
/* Output all the items of a state, not just its kernel. */
if (report_flag & report_itemsets)
{
closure (sitems, snritems);
sitems = itemset;
snritems = nitemset;
}
obstack_printf (oout, _("State %d"), s->number);
obstack_sgrow (oout, "\\n\\l");
for (size_t i = 0; i < snritems; ++i)
{
item_number const *sp1 = ritem + sitems[i];
rule const *r = item_rule (sp1);
obstack_printf (oout, "%3d ", r->number);
if (previous_lhs && UNIQSTR_EQ (previous_lhs->symbol->tag,
r->lhs->symbol->tag))
obstack_printf (oout, "%*s| ",
(int) strlen (previous_lhs->symbol->tag), "");
else
{
obstack_backslash (oout, r->lhs->symbol->tag);
obstack_printf (oout, ": ");
}
previous_lhs = r->lhs;
for (item_number const *sp = r->rhs; sp < sp1; sp++)
{
obstack_backslash (oout, symbols[*sp]->tag);
obstack_1grow (oout, ' ');
}
obstack_sgrow (oout, "");
if (0 <= *r->rhs)
for (item_number const *sp = sp1; 0 <= *sp; ++sp)
{
obstack_1grow (oout, ' ');
obstack_backslash (oout, symbols[*sp]->tag);
}
else
obstack_sgrow (oout, " %empty");
/* Experimental feature: display the lookahead tokens. */
if (report_flag & report_lookaheads
&& item_number_is_rule_number (*sp1))
{
/* Find the reduction we are handling. */
reductions *reds = s->reductions;
int redno = state_reduction_find (s, r);
/* Print them if there are. */
if (reds->lookaheads && redno != -1)
{
bitset_iterator biter;
int k;
char const *sep = "";
obstack_sgrow (oout, " [");
BITSET_FOR_EACH (biter, reds->lookaheads[redno], k, 0)
{
obstack_sgrow (oout, sep);
obstack_backslash (oout, symbols[k]->tag);
sep = ", ";
}
obstack_1grow (oout, ']');
}
}
obstack_sgrow (oout, "\\l");
}
}
/*---------------------------------------------------------------.
| Output in graph_obstack edges specifications in incidence with |
| current node. |
`---------------------------------------------------------------*/
static void
print_actions (state const *s, FILE *fgraph)
{
transitions const *trans = s->transitions;
if (!trans->num && !s->reductions)
return;
for (int i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i))
{
const state *s1 = trans->states[i];
const symbol_number sym = s1->accessing_symbol;
/* Shifts are solid, gotos are dashed, and error is dotted. */
char const *style =
(TRANSITION_IS_ERROR (trans, i) ? "dotted"
: TRANSITION_IS_SHIFT (trans, i) ? "solid"
: "dashed");
if (TRANSITION_IS_ERROR (trans, i)
&& STRNEQ (symbols[sym]->tag, "error"))
abort ();
output_edge (s->number, s1->number,
TRANSITION_IS_ERROR (trans, i) ? NULL : symbols[sym]->tag,
style, fgraph);
}
/* Display reductions. */
output_red (s, s->reductions, fgraph);
}
/*-------------------------------------------------------------.
| Output in FGRAPH the current node specifications and exiting |
| edges. |
`-------------------------------------------------------------*/
static void
print_state (state *s, FILE *fgraph)
{
struct obstack node_obstack;
/* A node's label contains its items. */
obstack_init (&node_obstack);
print_core (&node_obstack, s);
output_node (s->number, obstack_finish0 (&node_obstack), fgraph);
obstack_free (&node_obstack, 0);
/* Output the edges. */
print_actions (s, fgraph);
}
void
print_graph (void)
{
FILE *fgraph = xfopen (spec_graph_file, "w");
start_graph (fgraph);
/* Output nodes and edges. */
for (int i = 0; i < nstates; i++)
print_state (states[i], fgraph);
finish_graph (fgraph);
xfclose (fgraph);
}
@@ -0,0 +1,26 @@
/* Output a graph of the generated parser, for Bison.
Copyright (C) 2000, 2006, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef PRINT_GRAPH_H_
# define PRINT_GRAPH_H_
void print_graph (void);
#endif /* !PRINT_GRAPH_H_ */
@@ -0,0 +1,577 @@
/* Print an xml on generated parser, for Bison,
Copyright (C) 2007, 2009-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "print-xml.h"
#include "system.h"
#include <bitset.h>
#include <stdarg.h>
#include "closure.h"
#include "complain.h"
#include "conflicts.h"
#include "execute.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "lr0.h"
#include "muscle-tab.h"
#include "path-join.h"
#include "print.h"
#include "reader.h"
#include "reduce.h"
#include "state.h"
#include "symtab.h"
#include "tables.h"
static bitset no_reduce_set;
struct escape_buf
{
char *ptr;
size_t size;
};
enum { num_escape_bufs = 3 };
static struct escape_buf escape_bufs[num_escape_bufs];
/*--------------------------------.
| Report information on a state. |
`--------------------------------*/
static void
print_core (FILE *out, int level, state *s)
{
item_index *sitems = s->items;
size_t snritems = s->nitems;
/* Output all the items of a state, not only its kernel. */
closure (sitems, snritems);
sitems = itemset;
snritems = nitemset;
if (!snritems)
{
xml_puts (out, level, "<itemset/>");
return;
}
xml_puts (out, level, "<itemset>");
for (size_t i = 0; i < snritems; i++)
{
bool printed = false;
item_number *sp1 = ritem + sitems[i];
rule const *r = item_rule (sp1);
item_number *sp = r->rhs;
/* Display the lookahead tokens? */
if (item_number_is_rule_number (*sp1))
{
reductions *reds = s->reductions;
int red = state_reduction_find (s, r);
/* Print item with lookaheads if there are. */
if (reds->lookaheads && red != -1)
{
xml_printf (out, level + 1,
"<item rule-number=\"%d\" dot=\"%d\">",
r->number, sp1 - sp);
state_rule_lookaheads_print_xml (s, r,
out, level + 2);
xml_puts (out, level + 1, "</item>");
printed = true;
}
}
if (!printed)
xml_printf (out, level + 1,
"<item rule-number=\"%d\" dot=\"%d\"/>",
r->number,
sp1 - sp);
}
xml_puts (out, level, "</itemset>");
}
/*-----------------------------------------------------------.
| Report the shifts if DISPLAY_SHIFTS_P or the gotos of S on |
| OUT. |
`-----------------------------------------------------------*/
static void
print_transitions (state *s, FILE *out, int level)
{
transitions *trans = s->transitions;
int n = 0;
int i;
for (i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i))
{
n++;
}
/* Nothing to report. */
if (!n)
{
xml_puts (out, level, "<transitions/>");
return;
}
/* Report lookahead tokens and shifts. */
xml_puts (out, level, "<transitions>");
for (i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i)
&& TRANSITION_IS_SHIFT (trans, i))
{
symbol *sym = symbols[TRANSITION_SYMBOL (trans, i)];
char const *tag = sym->tag;
state *s1 = trans->states[i];
xml_printf (out, level + 1,
"<transition type=\"shift\" symbol=\"%s\" state=\"%d\"/>",
xml_escape (tag), s1->number);
}
for (i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i)
&& !TRANSITION_IS_SHIFT (trans, i))
{
symbol *sym = symbols[TRANSITION_SYMBOL (trans, i)];
char const *tag = sym->tag;
state *s1 = trans->states[i];
xml_printf (out, level + 1,
"<transition type=\"goto\" symbol=\"%s\" state=\"%d\"/>",
xml_escape (tag), s1->number);
}
xml_puts (out, level, "</transitions>");
}
/*--------------------------------------------------------.
| Report the explicit errors of S raised from %nonassoc. |
`--------------------------------------------------------*/
static void
print_errs (FILE *out, int level, state *s)
{
errs *errp = s->errs;
bool count = false;
int i;
for (i = 0; i < errp->num; ++i)
if (errp->symbols[i])
count = true;
/* Nothing to report. */
if (!count)
{
xml_puts (out, level, "<errors/>");
return;
}
/* Report lookahead tokens and errors. */
xml_puts (out, level, "<errors>");
for (i = 0; i < errp->num; ++i)
if (errp->symbols[i])
{
char const *tag = errp->symbols[i]->tag;
xml_printf (out, level + 1,
"<error symbol=\"%s\">nonassociative</error>",
xml_escape (tag));
}
xml_puts (out, level, "</errors>");
}
/*-------------------------------------------------------------------.
| Report a reduction of RULE on LOOKAHEAD (which can be 'default'). |
| If not ENABLED, the rule is masked by a shift or a reduce (S/R and |
| R/R conflicts). |
`-------------------------------------------------------------------*/
static void
print_reduction (FILE *out, int level, char const *lookahead,
rule *r, bool enabled)
{
if (rule_is_initial (r))
xml_printf (out, level,
"<reduction symbol=\"%s\" rule=\"accept\" enabled=\"%s\"/>",
xml_escape (lookahead),
enabled ? "true" : "false");
else
xml_printf (out, level,
"<reduction symbol=\"%s\" rule=\"%d\" enabled=\"%s\"/>",
xml_escape (lookahead),
r->number,
enabled ? "true" : "false");
}
/*-------------------------------------------.
| Report on OUT the reduction actions of S. |
`-------------------------------------------*/
static void
print_reductions (FILE *out, int level, state *s)
{
transitions *trans = s->transitions;
reductions *reds = s->reductions;
rule *default_reduction = NULL;
int report = false;
int i, j;
if (reds->num == 0)
{
xml_puts (out, level, "<reductions/>");
return;
}
if (yydefact[s->number] != 0)
default_reduction = &rules[yydefact[s->number] - 1];
bitset_zero (no_reduce_set);
FOR_EACH_SHIFT (trans, i)
bitset_set (no_reduce_set, TRANSITION_SYMBOL (trans, i));
for (i = 0; i < s->errs->num; ++i)
if (s->errs->symbols[i])
bitset_set (no_reduce_set, s->errs->symbols[i]->content->number);
if (default_reduction)
report = true;
if (reds->lookaheads)
for (i = 0; i < ntokens; i++)
{
bool count = bitset_test (no_reduce_set, i);
for (j = 0; j < reds->num; ++j)
if (bitset_test (reds->lookaheads[j], i))
{
if (! count)
{
if (reds->rules[j] != default_reduction)
report = true;
count = true;
}
else
{
report = true;
}
}
}
/* Nothing to report. */
if (!report)
{
xml_puts (out, level, "<reductions/>");
return;
}
xml_puts (out, level, "<reductions>");
/* Report lookahead tokens (or $default) and reductions. */
if (reds->lookaheads)
for (i = 0; i < ntokens; i++)
{
bool defaulted = false;
bool count = bitset_test (no_reduce_set, i);
for (j = 0; j < reds->num; ++j)
if (bitset_test (reds->lookaheads[j], i))
{
if (! count)
{
if (reds->rules[j] != default_reduction)
print_reduction (out, level + 1, symbols[i]->tag,
reds->rules[j], true);
else
defaulted = true;
count = true;
}
else
{
if (defaulted)
print_reduction (out, level + 1, symbols[i]->tag,
default_reduction, true);
defaulted = false;
print_reduction (out, level + 1, symbols[i]->tag,
reds->rules[j], false);
}
}
}
if (default_reduction)
print_reduction (out, level + 1,
"$default", default_reduction, true);
xml_puts (out, level, "</reductions>");
}
/*--------------------------------------------------------------.
| Report on OUT all the actions (shifts, gotos, reductions, and |
| explicit errors from %nonassoc) of S. |
`--------------------------------------------------------------*/
static void
print_actions (FILE *out, int level, state *s)
{
xml_puts (out, level, "<actions>");
print_transitions (s, out, level + 1);
print_errs (out, level + 1, s);
print_reductions (out, level + 1, s);
xml_puts (out, level, "</actions>");
}
/*----------------------------------.
| Report all the data on S on OUT. |
`----------------------------------*/
static void
print_state (FILE *out, int level, state *s)
{
fputc ('\n', out);
xml_printf (out, level, "<state number=\"%d\">", s->number);
print_core (out, level + 1, s);
print_actions (out, level + 1, s);
if (s->solved_conflicts_xml)
{
xml_puts (out, level + 1, "<solved-conflicts>");
fputs (s->solved_conflicts_xml, out);
xml_puts (out, level + 1, "</solved-conflicts>");
}
else
xml_puts (out, level + 1, "<solved-conflicts/>");
xml_puts (out, level, "</state>");
}
/*-----------------------------------------.
| Print information on the whole grammar. |
`-----------------------------------------*/
static void
print_grammar (FILE *out, int level)
{
fputc ('\n', out);
xml_puts (out, level, "<grammar>");
grammar_rules_print_xml (out, level);
/* Terminals */
xml_puts (out, level + 1, "<terminals>");
for (int i = 0; i < max_code + 1; i++)
if (token_translations[i] != undeftoken->content->number)
{
symbol const *sym = symbols[token_translations[i]];
char const *tag = sym->tag;
char const *type = sym->content->type_name;
int precedence = sym->content->prec;
assoc associativity = sym->content->assoc;
xml_indent (out, level + 2);
fprintf (out,
"<terminal symbol-number=\"%d\" token-number=\"%d\""
" name=\"%s\" type=\"%s\" usefulness=\"%s\"",
token_translations[i], i, xml_escape_n (0, tag),
type ? xml_escape_n (1, type) : "",
reduce_token_unused_in_grammar (token_translations[i])
? "unused-in-grammar" : "useful");
if (precedence)
fprintf (out, " prec=\"%d\"", precedence);
if (associativity != undef_assoc)
fprintf (out, " assoc=\"%s\"", assoc_to_string (associativity) + 1);
fputs ("/>\n", out);
}
xml_puts (out, level + 1, "</terminals>");
/* Nonterminals */
xml_puts (out, level + 1, "<nonterminals>");
for (symbol_number i = ntokens; i < nsyms + nuseless_nonterminals; i++)
{
symbol const *sym = symbols[i];
char const *tag = sym->tag;
char const *type = sym->content->type_name;
xml_printf (out, level + 2,
"<nonterminal symbol-number=\"%d\" name=\"%s\""
" type=\"%s\""
" usefulness=\"%s\"/>",
i, xml_escape_n (0, tag),
type ? xml_escape_n (1, type) : "",
reduce_nonterminal_useless_in_grammar (sym->content)
? "useless-in-grammar" : "useful");
}
xml_puts (out, level + 1, "</nonterminals>");
xml_puts (out, level, "</grammar>");
}
void
xml_indent (FILE *out, int level)
{
for (int i = 0; i < level; i++)
fputs (" ", out);
}
void
xml_puts (FILE *out, int level, char const *s)
{
xml_indent (out, level);
fputs (s, out);
fputc ('\n', out);
}
void
xml_printf (FILE *out, int level, char const *fmt, ...)
{
va_list arglist;
xml_indent (out, level);
va_start (arglist, fmt);
vfprintf (out, fmt, arglist);
va_end (arglist);
fputc ('\n', out);
}
static char const *
xml_escape_string (struct escape_buf *buf, char const *str)
{
size_t len = strlen (str);
size_t max_expansion = sizeof "&quot;" - 1;
if (buf->size <= max_expansion * len)
{
buf->size = max_expansion * len + 1;
buf->ptr = x2realloc (buf->ptr, &buf->size);
}
char *p = buf->ptr;
for (; *str; str++)
switch (*str)
{
default: *p++ = *str; break;
case '&': p = stpcpy (p, "&amp;" ); break;
case '<': p = stpcpy (p, "&lt;" ); break;
case '>': p = stpcpy (p, "&gt;" ); break;
case '"': p = stpcpy (p, "&quot;"); break;
}
*p = '\0';
return buf->ptr;
}
char const *
xml_escape_n (int n, char const *str)
{
return xml_escape_string (escape_bufs + n, str);
}
char const *
xml_escape (char const *str)
{
return xml_escape_n (0, str);
}
void
print_xml (void)
{
FILE *out = xfopen (spec_xml_file, "w");
fputs ("<?xml version=\"1.0\"?>\n\n", out);
int level = 0;
xml_printf (out, level,
"<bison-xml-report version=\"%s\" bug-report=\"%s\""
" url=\"%s\">",
xml_escape_n (0, VERSION),
xml_escape_n (1, PACKAGE_BUGREPORT),
xml_escape_n (2, PACKAGE_URL));
fputc ('\n', out);
xml_printf (out, level + 1, "<filename>%s</filename>",
xml_escape (grammar_file));
/* print grammar */
print_grammar (out, level + 1);
no_reduce_set = bitset_create (ntokens, BITSET_FIXED);
/* print automaton */
fputc ('\n', out);
xml_puts (out, level + 1, "<automaton>");
for (state_number i = 0; i < nstates; i++)
print_state (out, level + 2, states[i]);
xml_puts (out, level + 1, "</automaton>");
bitset_free (no_reduce_set);
xml_puts (out, 0, "</bison-xml-report>");
for (int i = 0; i < num_escape_bufs; ++i)
free (escape_bufs[i].ptr);
xfclose (out);
}
void
print_html (void)
{
assert (xml_flag);
char *xml2html = xpath_join (pkgdatadir (), "xslt/xml2xhtml.xsl");
char *xsltproc = muscle_percent_define_get ("tool.xsltproc");
char const *argv[11];
int i = 0;
argv[i++] = xsltproc;
argv[i++] = "-o";
argv[i++] = spec_html_file;
argv[i++] = xml2html;
argv[i++] = spec_xml_file;
argv[i++] = NULL;
aver (i <= ARRAY_CARDINALITY (argv));
if (trace_flag & trace_tools)
{
fputs ("running:", stderr);
for (int j = 0; argv[j]; ++j)
fprintf (stderr, " %s", argv[j]);
fputc ('\n', stderr);
}
int status
= execute (argv[0],
argv[0], argv,
/* directory */ NULL,
/* ignore_sigpipe */ false,
/* null_stdin, null_stdout, null_stderr */ true, true, true,
/* slave_process */ true, /* exit_on_error */ false,
/* termsigp */ NULL);
if (status)
complain (NULL, complaint, _("%s failed with status %d"), argv[0], status);
free (xsltproc);
free (xml2html);
}
@@ -0,0 +1,36 @@
/* Output an xml of the generated parser, for Bison.
Copyright (C) 2007, 2009-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef PRINT_XML_H_
# define PRINT_XML_H_
# include <stdio.h>
void xml_indent (FILE *out, int level);
void xml_puts (FILE *, int, char const *);
void xml_printf (FILE *, int, char const *, ...);
char const *xml_escape_n (int n, char const *str);
char const *xml_escape (char const *str);
void print_xml (void);
/* Use xsltproc to generate HTML from XML output. */
void print_html (void);
#endif /* !PRINT_XML_H_ */
+477
View File
@@ -0,0 +1,477 @@
/* Print information on generated parser, for bison,
Copyright (C) 1984, 1986, 1989, 2000-2005, 2007, 2009-2015, 2018-2021
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "print.h"
#include "system.h"
#include <bitset.h>
#include <mbswidth.h>
#include "closure.h"
#include "complain.h"
#include "conflicts.h"
#include "counterexample.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "lr0.h"
#include "muscle-tab.h"
#include "reader.h"
#include "reduce.h"
#include "state.h"
#include "symtab.h"
#include "tables.h"
/* For a given state, the symbol numbers of the lookahead tokens for
shifts and errors (i.e. not reduce). */
static bitset no_reduce_set;
/*---------------------------------------.
| *WIDTH := max (*WIDTH, strlen (STR)). |
`---------------------------------------*/
static void
max_length (size_t *width, const char *str)
{
size_t len = mbswidth (str, 0);
if (len > *width)
*width = len;
}
/*--------------------------------.
| Report information on a state. |
`--------------------------------*/
static void
print_core (FILE *out, const state *s)
{
const item_index *sitems = s->items;
size_t snritems = s->nitems;
/* Output all the items of a state, not only its kernel. */
if (report_flag & report_itemsets)
{
closure (sitems, snritems);
sitems = itemset;
snritems = nitemset;
}
if (!snritems)
return;
fputc ('\n', out);
rule const *previous_rule = NULL;
for (size_t i = 0; i < snritems; i++)
{
item_number *sp1 = ritem + sitems[i];
rule const *r = item_rule (sp1);
item_print (sp1, previous_rule, out);
previous_rule = r;
/* Display the lookahead tokens? */
if (report_flag & report_lookaheads
&& item_number_is_rule_number (*sp1))
state_rule_lookaheads_print (s, r, out);
fputc ('\n', out);
}
}
/*------------------------------------------------------------.
| Report the shifts iff DISPLAY_SHIFTS_P or the gotos of S on |
| OUT. |
`------------------------------------------------------------*/
static void
print_transitions (const state *s, FILE *out, bool display_transitions_p)
{
transitions *trans = s->transitions;
size_t width = 0;
/* Compute the width of the lookahead token column. */
for (int i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i)
&& TRANSITION_IS_SHIFT (trans, i) == display_transitions_p)
{
symbol *sym = symbols[TRANSITION_SYMBOL (trans, i)];
max_length (&width, sym->tag);
}
/* Nothing to report. */
if (!width)
return;
fputc ('\n', out);
width += 2;
/* Report lookahead tokens and shifts. */
for (int i = 0; i < trans->num; i++)
if (!TRANSITION_IS_DISABLED (trans, i)
&& TRANSITION_IS_SHIFT (trans, i) == display_transitions_p)
{
symbol *sym = symbols[TRANSITION_SYMBOL (trans, i)];
const char *tag = sym->tag;
const state *s1 = trans->states[i];
fprintf (out, " %s", tag);
for (int j = width - mbswidth (tag, 0); j > 0; --j)
fputc (' ', out);
if (display_transitions_p)
fprintf (out, _("shift, and go to state %d\n"), s1->number);
else
fprintf (out, _("go to state %d\n"), s1->number);
}
}
/*--------------------------------------------------------.
| Report the explicit errors of S raised from %nonassoc. |
`--------------------------------------------------------*/
static void
print_errs (FILE *out, const state *s)
{
errs *errp = s->errs;
size_t width = 0;
/* Compute the width of the lookahead token column. */
for (int i = 0; i < errp->num; ++i)
if (errp->symbols[i])
max_length (&width, errp->symbols[i]->tag);
/* Nothing to report. */
if (!width)
return;
fputc ('\n', out);
width += 2;
/* Report lookahead tokens and errors. */
for (int i = 0; i < errp->num; ++i)
if (errp->symbols[i])
{
const char *tag = errp->symbols[i]->tag;
fprintf (out, " %s", tag);
for (int j = width - mbswidth (tag, 0); j > 0; --j)
fputc (' ', out);
fputs (_("error (nonassociative)\n"), out);
}
}
/*-------------------------------------------------------------------.
| Report a reduction of RULE on LOOKAHEAD (which can be 'default'). |
| If not ENABLED, the rule is masked by a shift or a reduce (S/R and |
| R/R conflicts). |
`-------------------------------------------------------------------*/
static void
print_reduction (FILE *out, size_t width,
const char *lookahead,
rule *r, bool enabled)
{
fprintf (out, " %s", lookahead);
for (int j = width - mbswidth (lookahead, 0); j > 0; --j)
fputc (' ', out);
if (!enabled)
fputc ('[', out);
if (rule_is_initial (r))
fprintf (out, _("accept"));
else
fprintf (out, _("reduce using rule %d (%s)"), r->number,
r->lhs->symbol->tag);
if (!enabled)
fputc (']', out);
fputc ('\n', out);
}
/*-------------------------------------------.
| Report on OUT the reduction actions of S. |
`-------------------------------------------*/
static void
print_reductions (FILE *out, const state *s)
{
reductions *reds = s->reductions;
if (reds->num == 0)
return;
rule *default_reduction = NULL;
if (yydefact[s->number] != 0)
default_reduction = &rules[yydefact[s->number] - 1];
transitions *trans = s->transitions;
bitset_zero (no_reduce_set);
{
int i;
FOR_EACH_SHIFT (trans, i)
bitset_set (no_reduce_set, TRANSITION_SYMBOL (trans, i));
}
for (int i = 0; i < s->errs->num; ++i)
if (s->errs->symbols[i])
bitset_set (no_reduce_set, s->errs->symbols[i]->content->number);
/* Compute the width of the lookahead token column. */
size_t width = 0;
if (default_reduction)
width = mbswidth (_("$default"), 0);
if (reds->lookaheads)
for (int i = 0; i < ntokens; i++)
{
bool count = bitset_test (no_reduce_set, i);
for (int j = 0; j < reds->num; ++j)
if (bitset_test (reds->lookaheads[j], i))
{
if (! count)
{
if (reds->rules[j] != default_reduction)
max_length (&width, symbols[i]->tag);
count = true;
}
else
max_length (&width, symbols[i]->tag);
}
}
/* Nothing to report. */
if (!width)
return;
fputc ('\n', out);
width += 2;
bool default_reduction_only = true;
/* Report lookahead tokens (or $default) and reductions. */
if (reds->lookaheads)
for (int i = 0; i < ntokens; i++)
{
bool defaulted = false;
bool count = bitset_test (no_reduce_set, i);
if (count)
default_reduction_only = false;
for (int j = 0; j < reds->num; ++j)
if (bitset_test (reds->lookaheads[j], i))
{
if (! count)
{
if (reds->rules[j] != default_reduction)
{
default_reduction_only = false;
print_reduction (out, width,
symbols[i]->tag,
reds->rules[j], true);
}
else
defaulted = true;
count = true;
}
else
{
default_reduction_only = false;
if (defaulted)
print_reduction (out, width,
symbols[i]->tag,
default_reduction, true);
defaulted = false;
print_reduction (out, width,
symbols[i]->tag,
reds->rules[j], false);
}
}
}
if (default_reduction)
{
char *default_reductions =
muscle_percent_define_get ("lr.default-reduction");
print_reduction (out, width, _("$default"), default_reduction, true);
aver (STREQ (default_reductions, "most")
|| (STREQ (default_reductions, "consistent")
&& default_reduction_only)
|| (reds->num == 1 && rule_is_initial (reds->rules[0])));
(void) default_reduction_only;
free (default_reductions);
}
}
/*--------------------------------------------------------------.
| Report on OUT all the actions (shifts, gotos, reductions, and |
| explicit errors from %nonassoc) of S. |
`--------------------------------------------------------------*/
static void
print_actions (FILE *out, const state *s)
{
/* Print shifts. */
print_transitions (s, out, true);
print_errs (out, s);
print_reductions (out, s);
/* Print gotos. */
print_transitions (s, out, false);
}
/*----------------------------------.
| Report all the data on S on OUT. |
`----------------------------------*/
static void
print_state (FILE *out, const state *s)
{
fputs ("\n\n", out);
fprintf (out, _("State %d"), s->number);
fputc ('\n', out);
print_core (out, s);
print_actions (out, s);
if ((report_flag & report_solved_conflicts) && s->solved_conflicts)
{
fputc ('\n', out);
fputs (s->solved_conflicts, out);
}
if (has_conflicts (s)
&& (report_flag & report_cex
|| warning_is_enabled (Wcounterexamples)))
{
fputc ('\n', out);
counterexample_report_state (s, out, " ");
}
}
/*-----------------------------------------.
| Print information on the whole grammar. |
`-----------------------------------------*/
static void
print_terminal_symbols (FILE *out)
{
/* TERMINAL (type #) : rule #s terminal is on RHS */
fprintf (out, "%s\n\n", _("Terminals, with rules where they appear"));
for (int i = 0; i < max_code + 1; ++i)
if (token_translations[i] != undeftoken->content->number)
{
const symbol *sym = symbols[token_translations[i]];
const char *tag = sym->tag;
fprintf (out, "%4s%s", "", tag);
if (sym->content->type_name)
fprintf (out, " <%s>", sym->content->type_name);
fprintf (out, " (%d)", i);
for (rule_number r = 0; r < nrules; r++)
for (item_number *rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
if (item_number_as_symbol_number (*rhsp) == token_translations[i])
{
fprintf (out, " %d", r);
break;
}
fputc ('\n', out);
}
fputs ("\n\n", out);
}
static void
print_nonterminal_symbols (FILE *out)
{
fprintf (out, "%s\n\n", _("Nonterminals, with rules where they appear"));
for (symbol_number i = ntokens; i < nsyms; i++)
{
const symbol *sym = symbols[i];
const char *tag = sym->tag;
bool on_left = false;
bool on_right = false;
for (rule_number r = 0; r < nrules; r++)
{
on_left |= rules[r].lhs->number == i;
for (item_number *rhsp = rules[r].rhs; !on_right && 0 <= *rhsp; ++rhsp)
on_right |= item_number_as_symbol_number (*rhsp) == i;
if (on_left && on_right)
break;
}
int column = 4 + mbswidth (tag, 0);
fprintf (out, "%4s%s", "", tag);
if (sym->content->type_name)
column += fprintf (out, " <%s>",
sym->content->type_name);
fprintf (out, " (%d)\n", i);
if (on_left)
{
fprintf (out, "%8s%s", "", _("on left:"));
for (rule_number r = 0; r < nrules; r++)
if (rules[r].lhs->number == i)
fprintf (out, " %d", r);
fputc ('\n', out);
}
if (on_right)
{
fprintf (out, "%8s%s", "", _("on right:"));
for (rule_number r = 0; r < nrules; r++)
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
if (item_number_as_symbol_number (*rhsp) == i)
{
fprintf (out, " %d", r);
break;
}
fputc ('\n', out);
}
}
}
void
print_results (void)
{
/* We used to use just .out if SPEC_NAME_PREFIX (-p) was used, but
that conflicts with Posix. */
FILE *out = xfopen (spec_verbose_file, "w");
reduce_output (out);
grammar_rules_partial_print (out,
_("Rules useless in parser due to conflicts"),
rule_useless_in_parser_p);
conflicts_output (out);
grammar_rules_print (out);
print_terminal_symbols (out);
print_nonterminal_symbols (out);
/* Storage for print_reductions. */
no_reduce_set = bitset_create (ntokens, BITSET_FIXED);
for (state_number i = 0; i < nstates; i++)
print_state (out, states[i]);
bitset_free (no_reduce_set);
xfclose (out);
}
@@ -0,0 +1,26 @@
/* Print information on generated parser, for bison,
Copyright (C) 2000, 2009-2015, 2018-2021 Free Software Foundation,
Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef PRINT_H_
# define PRINT_H_
void print_results (void);
#endif /* !PRINT_H_ */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,85 @@
/* Input parser for Bison
Copyright (C) 2000-2003, 2005-2007, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef READER_H_
# define READER_H_
# include "location.h"
# include "symlist.h"
# include "named-ref.h"
# include "parse-gram.h"
typedef struct merger_list
{
struct merger_list* next;
uniqstr name;
/* One symbol whose type is the one used by all the symbols on which
this merging function is used. */
symbol *sym;
/* Where SYM was bound to this merging function. */
location type_declaration_loc;
} merger_list;
void free_merger_functions (void);
extern merger_list *merge_functions;
/* List of the start symbols. */
extern symbol_list *start_symbols;
/* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
We don't use the simple "YY_FOO" because (i) we might get clashes
with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
introduces possible clashes between terminal FOO and nonterminal
foo. */
symbol *switching_token (const symbol *start);
void grammar_start_symbols_add (symbol_list *syms);
void grammar_current_rule_begin (symbol *lhs, location loc,
named_ref *lhs_named_ref);
void grammar_current_rule_end (location loc);
void grammar_midrule_action (void);
/* Apply %empty to the current rule. */
void grammar_current_rule_empty_set (location loc);
void grammar_current_rule_prec_set (symbol *precsym, location loc);
void grammar_current_rule_dprec_set (int dprec, location loc);
void grammar_current_rule_merge_set (uniqstr name, location loc);
void grammar_current_rule_expect_sr (int count, location loc);
void grammar_current_rule_expect_rr (int count, location loc);
void grammar_current_rule_symbol_append (symbol *sym, location loc,
named_ref *nref);
/* Attach an ACTION to the current rule. */
void grammar_current_rule_action_append (const char *action, location loc,
named_ref *nref, uniqstr tag);
/* Attach a PREDICATE to the current rule. */
void grammar_current_rule_predicate_append (const char *predicate, location loc);
/* Read in the grammar specification. */
void reader (const char *gram);
/* Was %union seen? */
extern bool union_seen;
/* Should rules have a default precedence? */
extern bool default_prec;
#endif /* !READER_H_ */
+445
View File
@@ -0,0 +1,445 @@
/* Grammar reduction for Bison.
Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* Reduce the grammar: Find and eliminate unreachable terminals,
nonterminals, and productions. David S. Bakin. */
/* Don't eliminate unreachable terminals: They may be used by the
user's parser. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include "complain.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "print-xml.h"
#include "reader.h"
#include "reduce.h"
#include "symtab.h"
/* Set of nonterminals whose language is not empty. */
static bitset N;
/* Set of rules that have no useless nonterminals in their RHS. */
static bitset P;
/* Set of accessible symbols. */
static bitset V;
/* Set of symbols used to define rule precedence (so they are
'useless', but no warning should be issued). */
static bitset V1;
int nuseless_productions;
int nuseless_nonterminals;
#define bitset_swap(Lhs, Rhs) \
do { \
bitset lhs__ = Lhs; \
Lhs = Rhs; \
Rhs = lhs__; \
} while (0)
/*-------------------------------------------------------------------.
| Another way to do this would be with a set for each production and |
| then do subset tests against N0, but even for the C grammar the |
| whole reducing process takes only 2 seconds on my 8Mhz AT. |
`-------------------------------------------------------------------*/
static bool
useful_production (rule_number r, bitset N0)
{
/* A production is useful if all of the nonterminals in its appear
in the set of useful nonterminals. */
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
return false;
return true;
}
/*-----------------------------------------------------------------.
| Compute N, the set of nonterminals whose language is not empty. |
| |
| Remember that rules are 1-origin, symbols are 0-origin. |
`-----------------------------------------------------------------*/
static void
useless_nonterminals (void)
{
/* N is set as built. Np is set being built this iteration. P is
set of all productions which have a RHS all in N. */
bitset Np = bitset_create (nnterms, BITSET_FIXED);
/* The set being computed is a set of nonterminals which can derive
the empty string or strings consisting of all terminals. At each
iteration a nonterminal is added to the set if there is a
production with that nonterminal as its LHS for which all the
nonterminals in its RHS are already in the set. Iterate until
the set being computed remains unchanged. Any nonterminals not
in the set at that point are useless in that they will never be
used in deriving a sentence of the language.
This iteration doesn't use any special traversal over the
productions. A set is kept of all productions for which all the
nonterminals in the RHS are in useful. Only productions not in
this set are scanned on each iteration. At the end, this set is
saved to be used when finding useful productions: only
productions in this set will appear in the final grammar. */
while (1)
{
bitset_copy (Np, N);
for (rule_number r = 0; r < nrules; ++r)
if (!bitset_test (P, r)
&& useful_production (r, N))
{
bitset_set (Np, rules[r].lhs->number - ntokens);
bitset_set (P, r);
}
if (bitset_equal_p (N, Np))
break;
bitset_swap (N, Np);
}
bitset_free (N);
N = Np;
}
static void
inaccessable_symbols (void)
{
/* Find out which productions are reachable and which symbols are
used. Starting with an empty set of productions and a set of
symbols which only has the start symbol in it, iterate over all
productions until the set of productions remains unchanged for an
iteration. For each production which has a LHS in the set of
reachable symbols, add the production to the set of reachable
productions, and add all of the nonterminals in the RHS of the
production to the set of reachable symbols.
Consider only the (partially) reduced grammar which has only
nonterminals in N and productions in P.
The result is the set P of productions in the reduced grammar,
and the set V of symbols in the reduced grammar.
Although this algorithm also computes the set of terminals which
are reachable, no terminal will be deleted from the grammar. Some
terminals might not be in the grammar but might be generated by
semantic routines, and so the user might want them available with
specified numbers. (Is this true?) However, the nonreachable
terminals are printed (if running in verbose mode) so that the
user can know. */
bitset Vp = bitset_create (nsyms, BITSET_FIXED);
bitset Pp = bitset_create (nrules, BITSET_FIXED);
/* If the start symbol isn't useful, then nothing will be useful. */
if (bitset_test (N, acceptsymbol->content->number - ntokens))
{
bitset_set (V, acceptsymbol->content->number);
while (1)
{
bitset_copy (Vp, V);
for (rule_number r = 0; r < nrules; ++r)
if (!bitset_test (Pp, r)
&& bitset_test (P, r)
&& bitset_test (V, rules[r].lhs->number))
{
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
bitset_set (Vp, *rhsp);
bitset_set (Pp, r);
}
if (bitset_equal_p (V, Vp))
break;
bitset_swap (V, Vp);
}
}
bitset_free (V);
V = Vp;
/* These tokens (numbered 0, 1, and 2) are internal to Bison.
Consider them useful. */
bitset_set (V, eoftoken->content->number); /* end-of-input token */
bitset_set (V, errtoken->content->number); /* error token */
bitset_set (V, undeftoken->content->number); /* some undefined token */
bitset_free (P);
P = Pp;
int nuseful_productions = bitset_count (P);
nuseless_productions = nrules - nuseful_productions;
int nuseful_nonterminals = 0;
for (symbol_number i = ntokens; i < nsyms; ++i)
nuseful_nonterminals += bitset_test (V, i);
nuseless_nonterminals = nnterms - nuseful_nonterminals;
/* A token that was used in %prec should not be warned about. */
for (rule_number r = 0; r < nrules; ++r)
if (rules[r].precsym != 0)
bitset_set (V1, rules[r].precsym->number);
}
/*-------------------------------------------------------------------.
| Put the useless productions at the end of RULES, and adjust NRULES |
| accordingly. |
`-------------------------------------------------------------------*/
static void
reduce_grammar_tables (void)
{
/* Report and flag useless productions. */
{
for (rule_number r = 0; r < nrules; ++r)
rules[r].useful = bitset_test (P, r);
grammar_rules_useless_report (_("rule useless in grammar"));
}
/* Map the nonterminals to their new index: useful first, useless
afterwards. Kept for later report. */
{
int useful = 0;
int useless = nrules - nuseless_productions;
rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
for (rule_number r = 0; r < nrules; ++r)
rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
free (rules);
rules = rules_sorted;
/* Renumber the rules markers in RITEMS. */
for (rule_number r = 0; r < nrules; ++r)
{
item_number *rhsp = rules[r].rhs;
for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
continue;
*rhsp = rule_number_as_item_number (r);
rules[r].number = r;
}
nrules -= nuseless_productions;
}
/* Adjust NRITEMS. */
for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
nritems -= rule_rhs_length (&rules[r]) + 1;
}
/*------------------------------.
| Remove useless nonterminals. |
`------------------------------*/
symbol_number *nterm_map = NULL;
static void
nonterminals_reduce (void)
{
nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
/* Map the nonterminals to their new index: useful first, useless
afterwards. Kept for later report. */
{
symbol_number n = ntokens;
for (symbol_number i = ntokens; i < nsyms; ++i)
if (bitset_test (V, i))
nterm_map[i - ntokens] = n++;
for (symbol_number i = ntokens; i < nsyms; ++i)
if (!bitset_test (V, i))
{
nterm_map[i - ntokens] = n++;
if (symbols[i]->content->status != used
&& symbols[i] != acceptsymbol)
complain (&symbols[i]->location, Wother,
_("nonterminal useless in grammar: %s"),
symbols[i]->tag);
}
}
/* Shuffle elements of tables indexed by symbol number. */
{
symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
for (symbol_number i = ntokens; i < nsyms; ++i)
symbols[i]->content->number = nterm_map[i - ntokens];
for (symbol_number i = ntokens; i < nsyms; ++i)
symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
for (symbol_number i = ntokens; i < nsyms; ++i)
symbols[i] = symbols_sorted[i - ntokens];
free (symbols_sorted);
}
/* Update nonterminal numbers in the RHS of the rules. LHS are
pointers to the symbol structure, they don't need renumbering. */
{
for (rule_number r = 0; r < nrules; ++r)
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
if (ISVAR (*rhsp))
*rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
}
nsyms -= nuseless_nonterminals;
nnterms -= nuseless_nonterminals;
}
/*------------------------------------------------------------------.
| Output the detailed results of the reductions. For FILE.output. |
`------------------------------------------------------------------*/
void
reduce_output (FILE *out)
{
if (nuseless_nonterminals)
{
fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
for (int i = 0; i < nuseless_nonterminals; ++i)
fprintf (out, " %s\n", symbols[nsyms + i]->tag);
fputs ("\n\n", out);
}
{
bool b = false;
for (int i = 0; i < ntokens; ++i)
if (reduce_token_unused_in_grammar (i))
{
if (!b)
fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
b = true;
fprintf (out, " %s\n", symbols[i]->tag);
}
if (b)
fputs ("\n\n", out);
}
if (nuseless_productions)
grammar_rules_partial_print (out, _("Rules useless in grammar"),
rule_useless_in_grammar_p);
}
/*-------------------------------.
| Report the results to STDERR. |
`-------------------------------*/
static void
reduce_print (void)
{
if (nuseless_nonterminals)
complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
"%d nonterminals useless in grammar",
nuseless_nonterminals),
nuseless_nonterminals);
if (nuseless_productions)
complain (NULL, Wother, ngettext ("%d rule useless in grammar",
"%d rules useless in grammar",
nuseless_productions),
nuseless_productions);
}
void
reduce_grammar (void)
{
/* Allocate the global sets used to compute the reduced grammar */
N = bitset_create (nnterms, BITSET_FIXED);
P = bitset_create (nrules, BITSET_FIXED);
V = bitset_create (nsyms, BITSET_FIXED);
V1 = bitset_create (nsyms, BITSET_FIXED);
useless_nonterminals ();
inaccessable_symbols ();
/* Did we reduce something? */
if (nuseless_nonterminals || nuseless_productions)
{
reduce_print ();
// Check that start symbols have non-empty languages.
bool failure = false;
for (symbol_list *list = start_symbols; list; list = list->next)
if (!bitset_test (N, list->content.sym->content->number - ntokens))
{
failure = true;
complain (&list->sym_loc, complaint,
_("start symbol %s does not derive any sentence"),
list->content.sym->tag);
}
if (failure)
exit (EXIT_FAILURE);
/* First reduce the nonterminals, as they renumber themselves in the
whole grammar. If you change the order, nonterms would be
renumbered only in the reduced grammar. */
if (nuseless_nonterminals)
nonterminals_reduce ();
if (nuseless_productions)
reduce_grammar_tables ();
}
if (trace_flag & trace_grammar)
{
grammar_dump (stderr, "Reduced Grammar");
fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
", and %d productions.\n",
grammar_file, ntokens, nnterms, nrules);
}
}
bool
reduce_token_unused_in_grammar (symbol_number i)
{
aver (i < ntokens);
return !bitset_test (V, i) && !bitset_test (V1, i);
}
bool
reduce_nonterminal_useless_in_grammar (const sym_content *sym)
{
symbol_number n = sym->number;
aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
return nsyms <= n;
}
/*-----------------------------------------------------------.
| Free the global sets used to compute the reduced grammar. |
`-----------------------------------------------------------*/
void
reduce_free (void)
{
bitset_free (N);
bitset_free (V);
bitset_free (V1);
bitset_free (P);
free (nterm_map);
nterm_map = NULL;
}
@@ -0,0 +1,42 @@
/* Grammar reduction for Bison.
Copyright (C) 2000-2002, 2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef REDUCE_H_
# define REDUCE_H_
void reduce_grammar (void);
void reduce_output (FILE *out);
bool reduce_token_unused_in_grammar (symbol_number i);
/** Whether symbol \a i is useless in the grammar.
* \pre reduce_grammar was called before.
*/
bool reduce_nonterminal_useless_in_grammar (const sym_content *sym);
void reduce_free (void);
/** Map initial nterm numbers to the new ones. Built by
* reduce_grammar. Size nnterms + nuseless_nonterminals. */
extern symbol_number *nterm_map;
extern int nuseless_nonterminals;
extern int nuseless_productions;
#endif /* !REDUCE_H_ */
@@ -0,0 +1,182 @@
/* Binary relations.
Copyright (C) 2002, 2004-2005, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitsetv.h>
#include "getargs.h"
#include "relation.h"
void
relation_print (const char *title,
relation r, relation_node size,
relation_node_print print, FILE *out)
{
if (title)
fprintf (out, "%s:\n", title);
for (relation_node i = 0; i < size; ++i)
if (r[i])
{
fputs (" ", out);
if (print)
print (i, out);
else
fprintf (out, "%3ld", (long) i);
fputc (':', out);
for (relation_node j = 0; r[i][j] != END_NODE; ++j)
{
fputc (' ', out);
if (print)
print (r[i][j], out);
else
fprintf (out, "%3ld", (long) r[i][j]);
}
fputc ('\n', out);
}
fputc ('\n', out);
}
/*---------------------------------------------------------------.
| digraph & traverse. |
| |
| The following variables are used as common storage between the |
| two. |
`---------------------------------------------------------------*/
static relation R;
static relation_nodes indexes;
static relation_nodes vertices;
static relation_node top;
static relation_node infinity;
static bitsetv F;
static void
traverse (relation_node i)
{
vertices[++top] = i;
relation_node height = indexes[i] = top;
if (R[i])
for (relation_node j = 0; R[i][j] != END_NODE; ++j)
{
if (indexes[R[i][j]] == 0)
traverse (R[i][j]);
if (indexes[i] > indexes[R[i][j]])
indexes[i] = indexes[R[i][j]];
bitset_or (F[i], F[i], F[R[i][j]]);
}
if (indexes[i] == height)
for (;;)
{
relation_node j = vertices[top--];
indexes[j] = infinity;
if (i == j)
break;
bitset_copy (F[j], F[i]);
}
}
void
relation_digraph (relation r, relation_node size, bitsetv function)
{
infinity = size + 2;
indexes = xcalloc (size + 1, sizeof *indexes);
vertices = xnmalloc (size + 1, sizeof *vertices);
top = 0;
R = r;
F = function;
for (relation_node i = 0; i < size; i++)
if (indexes[i] == 0 && R[i])
traverse (i);
free (indexes);
free (vertices);
function = F;
}
/*-------------------------------------------.
| Destructively transpose R_ARG, of size N. |
`-------------------------------------------*/
void
relation_transpose (relation *R_arg, relation_node size)
{
relation r = *R_arg;
if (trace_flag & trace_sets)
relation_print ("relation_transpose", r, size, NULL, stderr);
/* Count. */
/* NEDGES[I] -- total size of NEW_R[I]. */
size_t *nedges = xcalloc (size, sizeof *nedges);
for (relation_node i = 0; i < size; i++)
if (r[i])
for (relation_node j = 0; r[i][j] != END_NODE; ++j)
++nedges[r[i][j]];
/* Allocate. */
/* The result. */
relation new_R = xnmalloc (size, sizeof *new_R);
/* END_R[I] -- next entry of NEW_R[I]. */
relation end_R = xnmalloc (size, sizeof *end_R);
for (relation_node i = 0; i < size; i++)
{
relation_node *sp = NULL;
if (nedges[i] > 0)
{
sp = xnmalloc (nedges[i] + 1, sizeof *sp);
sp[nedges[i]] = END_NODE;
}
new_R[i] = sp;
end_R[i] = sp;
}
/* Store. */
for (relation_node i = 0; i < size; i++)
if (r[i])
for (relation_node j = 0; r[i][j] != END_NODE; ++j)
*end_R[r[i][j]]++ = i;
free (nedges);
free (end_R);
/* Free the input: it is replaced with the result. */
for (relation_node i = 0; i < size; i++)
free (r[i]);
free (r);
if (trace_flag & trace_sets)
relation_print ("relation_transpose: output", new_R, size, NULL, stderr);
*R_arg = new_R;
}
@@ -0,0 +1,55 @@
/* Binary relations.
Copyright (C) 2002, 2004, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef RELATION_H_
# define RELATION_H_
/* Performing operations on graphs coded as list of adjacency.
If GRAPH is a relation, then GRAPH[Node] is a list of adjacent
nodes, ended with END_NODE. */
# define END_NODE ((relation_node) -1)
typedef size_t relation_node;
typedef relation_node *relation_nodes;
typedef relation_nodes *relation;
typedef void (relation_node_print) (relation_node node, FILE* out);
/* Report a relation R that has SIZE vertices. */
void relation_print (const char *title,
relation r, size_t size,
relation_node_print print, FILE *out);
/* Compute the transitive closure of the FUNCTION on the relation R
with SIZE vertices.
If R (NODE1, NODE2) then on exit FUNCTION[NODE1] was extended
(unioned) with FUNCTION[NODE2].
FUNCTION is in-out, R is read only. */
void relation_digraph (const relation r, relation_node size, bitsetv function);
/* Destructively transpose *R_ARG, of size SIZE. */
void relation_transpose (relation *R_arg, relation_node size);
#endif /* ! RELATION_H_ */
@@ -0,0 +1,3 @@
#include <config.h>
#include "system.h"
#include "src/scan-code.c"
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,211 @@
/* Bison code properties structure and scanner.
Copyright (C) 2006-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef SCAN_CODE_H_
# define SCAN_CODE_H_
# include "location.h"
# include "named-ref.h"
# include "uniqstr.h"
struct symbol_list;
/**
* Keeps track of the maximum number of semantic values to the left of a handle
* (those referenced by $0, $-1, etc.) that are required by the semantic
* actions of this grammar.
*/
extern int max_left_semantic_context;
/**
* The obstack used to store the translated actions.
*/
extern struct obstack *obstack_for_actions;
/**
* A code passage captured from the grammar file and possibly translated,
* and/or properties associated with such a code passage. Don't break
* encapsulation by modifying the fields directly. Use the provided interface
* functions.
*/
typedef struct code_props {
/** Set by the init functions. */
enum {
CODE_PROPS_NONE, CODE_PROPS_PLAIN,
CODE_PROPS_SYMBOL_ACTION, CODE_PROPS_RULE_ACTION
} kind;
/**
* \c NULL iff \c code_props::kind is \c CODE_PROPS_NONE.
* Memory is allocated in an obstack freed elsewhere.
*/
char const *code;
/** Undefined iff \c code_props::code is \c NULL. */
location location;
/**
* \c false iff either:
* - \c code_props_translate_code has never previously been invoked for
* the \c code_props that would contain the code passage associated
* with \c self. (That \c code_props is not the same as this one if this
* one is for a RHS \c symbol_list node. Instead, it's the \c code_props
* for the LHS symbol of the same rule.)
* - \c code_props_translate_code has been invoked for that \c code_props,
* but the symbol value associated with this \c code_props was not
* referenced in the code passage.
*/
bool is_value_used;
/**
* \c true iff this code is an action that is not to be deferred in
* a non-deterministic parser.
*/
bool is_predicate;
/**
* Whether this is actually used (i.e., not completely masked by
* other code props). */
bool is_used;
/** \c NULL iff \c code_props::kind is not \c CODE_PROPS_RULE_ACTION. */
struct symbol_list *rule;
/** Named reference. */
named_ref *named_ref;
/** Type, for midrule actions. */
uniqstr type;
} code_props;
/**
* \pre
* - <tt>self != NULL</tt>.
* \post
* - \c self has been overwritten to contain no code.
*/
void code_props_none_init (code_props *self);
/** Equivalent to \c code_props_none_init. */
# define CODE_PROPS_NONE_INIT \
{ \
/* .kind = */ CODE_PROPS_NONE, \
/* .code = */ NULL, \
/* .location = */ EMPTY_LOCATION_INIT, \
/* .is_value_used = */ false, \
/* .is_predicate = */ false, \
/* .is_used = */ false, \
/* .rule = */ NULL, \
/* .named_ref = */ NULL, \
/* .type = */ NULL, \
}
/** Initialized by \c CODE_PROPS_NONE_INIT with no further modification. */
extern code_props code_props_none;
/**
* \pre
* - <tt>self != NULL</tt>.
* - <tt>code != NULL</tt>.
* - \c code is an untranslated code passage containing no Bison escapes.
* - \c code was extracted from the grammar file at \c code_loc.
* \post
* - \c self has been overwritten to represent the specified plain code
* passage.
* - \c self will become invalid if the caller frees \c code before invoking
* \c code_props_translate_code on \c self.
*/
void code_props_plain_init (code_props *self, char const *code,
location code_loc);
/**
* \pre
* - <tt>self != NULL</tt>.
* - <tt>code != NULL</tt>.
* - \c code is an untranslated code passage. The only Bison escapes it
* might contain are $$ and \@$, referring to a single symbol.
* - \c code was extracted from the grammar file at \c code_loc.
* \post
* - \c self has been overwritten to represent the specified symbol action.
* - \c self will become invalid if the caller frees \c code before invoking
* \c code_props_translate_code on \c self.
*/
void code_props_symbol_action_init (code_props *self, char const *code,
location code_loc);
/**
* \param type type for midrule actions
* \pre
* - <tt>self != NULL</tt>.
* - <tt>code != NULL</tt>.
* - <tt>rule != NULL</tt>.
* - \c code is the untranslated action of the rule for which \c rule is the
* LHS node. Thus, \c code possibly contains Bison escapes such as $$, $1,
* $2, etc referring to the values of the rule.
* - \c code was extracted from the grammar file at \c code_loc.
* \post
* - \c self has been overwritten to represent the specified rule action.
* - \c self does not claim responsibility for the memory of \c rule.
* - \c self will become invalid if:
* - The caller frees \c code before invoking \c code_props_translate_code
* on \c self.
* - The caller frees \c rule.
*/
void code_props_rule_action_init (code_props *self, char const *code,
location code_loc, struct symbol_list *rule,
named_ref *name, uniqstr type,
bool is_predicate);
/**
* \pre
* - If there's a code passage contained in \c self and it contains Bison
* escapes, all grammar declarations have already been parsed as they may
* affect warnings and complaints issued here.
* \post
* - All M4-special symbols and Bison escapes have been translated in
* \c self->code.
* - <tt>self->code != self->code\@pre</tt> unless
* <tt>self->code\@pre = NULL</tt>.
*/
void code_props_translate_code (code_props *self);
/**
* \pre
* - None.
* \post
* - The dynamic memory allocated by the previous invocation of
* \c code_props_translate_code (if any) was freed. The \c code_props
* instance for which \c code_props_translate_code was invoked is now
* invalid.
*/
void code_scanner_last_string_free (void);
void code_scanner_init (void);
/**
* \pre
* - None.
* \post
* - All dynamic memory allocated during invocations of
* \c code_props_translate_code (if any) has been freed. All \c code_props
* instances may now be invalid.
*/
void code_scanner_free (void);
#endif /* !SCAN_CODE_H_ */
@@ -0,0 +1,856 @@
/* Bison Action Scanner -*- C -*-
Copyright (C) 2006-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
%option debug nodefault noinput nounput noyywrap never-interactive
%option prefix="code_" outfile="lex.yy.c"
%{
#include <c-ctype.h>
#include <get-errno.h>
#include <quote.h>
#include "src/complain.h"
#include "src/getargs.h"
#include "src/muscle-tab.h"
#include "src/reader.h"
#include "src/scan-code.h"
#include "src/symlist.h"
#define FLEX_PREFIX(Id) code_ ## Id
#include "src/flex-scanner.h"
/* Work around a bug in flex 2.5.31. See Debian bug 333231
<https://bugs.debian.org/333231>. */
#undef code_wrap
#define code_wrap() 1
struct obstack *obstack_for_actions = &obstack_for_string;
/* The current calling start condition: SC_RULE_ACTION or
SC_SYMBOL_ACTION. */
# define YY_DECL static char *code_lex (code_props *self, int sc_context)
YY_DECL;
#define YY_USER_ACTION location_compute (loc, &loc->end, yytext, yyleng);
static char *fetch_type_name (char *cp, char const **type_name,
const location *dollar_loc);
static void handle_action_dollar (symbol_list *rule, char *cp,
const location *dollar_loc);
static void handle_action_at (symbol_list *rule, char *cp,
const location *at_loc);
/* A string to be pushed to obstack after dollar/at has been handled. */
static char *ref_tail_fields;
static location current_loc;
static location *loc = &current_loc;
/* A string representing the most recent translation. */
static char *last_string;
/* True if an untyped $$ or $n was seen. */
static bool untyped_var_seen;
%}
/* C and C++ comments in code. */
%x SC_COMMENT SC_LINE_COMMENT
/* Strings and characters in code. */
%x SC_STRING SC_CHARACTER
/* Whether in a rule or symbol action. Specifies the translation
of $ and @. */
%x SC_RULE_ACTION SC_SYMBOL_ACTION
/* POSIX says that a tag must be both an id and a C union member, but
historically almost any character is allowed in a tag. We disallow
NUL and newline, as this simplifies our implementation. We allow
"->" as a means to dereference a pointer. */
tag ([^\0\n>]|->)*[^-]
/* Zero or more instances of backslash-newline. Following GCC, allow
white space between the backslash and the newline. */
splice (\\[ \f\t\v]*\n)*
/* C style identifier. Must start with letter. Will be used for
named symbol references. Shall be kept synchronized with
scan-gram.l "letter" and "id". */
letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
id {letter}({letter}|[-0-9])*
ref -?[0-9]+|{id}|"["{id}"]"|"$"
%%
%{
/* This scanner is special: it is invoked only once, henceforth
is expected to return only once. This initialization is
therefore done once per action to translate. */
aver (sc_context == SC_SYMBOL_ACTION
|| sc_context == SC_RULE_ACTION
|| sc_context == INITIAL);
BEGIN sc_context;
%}
/*------------------------------------------------------------.
| Scanning a C comment. The initial '/ *' is already eaten. |
`------------------------------------------------------------*/
<SC_COMMENT>
{
"*"{splice}"/" STRING_GROW (); BEGIN sc_context;
}
/*--------------------------------------------------------------.
| Scanning a line comment. The initial '//' is already eaten. |
`--------------------------------------------------------------*/
<SC_LINE_COMMENT>
{
"\n" STRING_GROW (); BEGIN sc_context;
{splice} STRING_GROW ();
}
/*--------------------------------------------.
| Scanning user-code characters and strings. |
`--------------------------------------------*/
<SC_CHARACTER,SC_STRING>
{
{splice}|\\{splice}. STRING_GROW ();
}
<SC_CHARACTER>
{
"'" STRING_GROW (); BEGIN sc_context;
}
<SC_STRING>
{
"\"" STRING_GROW (); BEGIN sc_context;
}
<SC_RULE_ACTION,SC_SYMBOL_ACTION>
{
"'" STRING_GROW (); BEGIN SC_CHARACTER;
"\"" STRING_GROW (); BEGIN SC_STRING;
"/"{splice}"*" STRING_GROW (); BEGIN SC_COMMENT;
"/"{splice}"/" STRING_GROW (); BEGIN SC_LINE_COMMENT;
[$@] {
complain (loc, Wother, _("stray '%s'"), yytext);
obstack_escape (&obstack_for_string, yytext);
}
}
<SC_RULE_ACTION>
{
"$"("<"{tag}">")?{ref} {
ref_tail_fields = NULL;
handle_action_dollar (self->rule, yytext, loc);
if (ref_tail_fields)
obstack_sgrow (&obstack_for_string, ref_tail_fields);
}
"@"{ref} {
ref_tail_fields = NULL;
handle_action_at (self->rule, yytext, loc);
if (ref_tail_fields)
obstack_sgrow (&obstack_for_string, ref_tail_fields);
}
}
<SC_SYMBOL_ACTION>
{
"$"("<"{tag}">")?"$" {
const char *type_name = NULL;
fetch_type_name (yytext + 1, &type_name, loc)[-1] = 0;
obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar(");
obstack_quote (&obstack_for_string, type_name);
obstack_sgrow (&obstack_for_string, ")[");
self->is_value_used = true;
}
"@$" {
obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
muscle_percent_define_ensure("locations", *loc, true);
}
}
<*>
{
/* Escape M4 quoting characters in C code. */
[$@\[\]] obstack_escape (&obstack_for_string, yytext);
/* By default, grow the string obstack with the input. */
.|\n STRING_GROW ();
/* End of processing. */
<<EOF>> STRING_FINISH (); return last_string;
}
%%
static inline bool
is_dot_or_dash (char ch)
{
return ch == '.' || ch == '-';
}
static inline bool
contains_dot_or_dash (const char* p)
{
return !!strpbrk (p, ".-");
}
/* Defines a variant of a symbolic name resolution. */
typedef struct
{
/* Index in symbol list. */
int symbol_index;
/* Matched symbol id and loc. */
uniqstr id;
location loc;
/* Hiding named reference. */
named_ref* hidden_by;
/* Error flags. May contain zero (no errors) or
a combination of VARIANT_* values. */
unsigned err;
} variant;
/* Set when the variant refers to a symbol hidden
by an explicit symbol reference. */
#define VARIANT_HIDDEN (1 << 0)
/* Set when the variant refers to a symbol containing
dots or dashes. Will require explicit bracketing. */
#define VARIANT_BAD_BRACKETING (1 << 1)
/* Set when the variant refers to a symbol which is
not visible from current midrule. */
#define VARIANT_NOT_VISIBLE_FROM_MIDRULE (1 << 2)
static variant *variant_table = NULL;
static int variant_table_size = 0;
static int variant_count = 0;
static variant *
variant_table_grow (void)
{
++variant_count;
if (variant_count > variant_table_size)
{
while (variant_count > variant_table_size)
variant_table_size = 2 * variant_table_size + 3;
variant_table = xnrealloc (variant_table, variant_table_size,
sizeof *variant_table);
}
return &variant_table[variant_count - 1];
}
static void
variant_table_free (void)
{
free (variant_table);
variant_table = NULL;
variant_table_size = variant_count = 0;
}
static char const *
find_prefix_end (char const *prefix, char const *cp, char const *end)
{
for (; *prefix && cp != end; ++prefix, ++cp)
if (*prefix != *cp)
return NULL;
return *prefix ? NULL : cp;
}
static variant *
variant_add (uniqstr id, location id_loc, int symbol_index,
char const *cp, char const *cp_end, bool explicit_bracketing)
{
char const *prefix_end = find_prefix_end (id, cp, cp_end);
if (prefix_end &&
(prefix_end == cp_end ||
(!explicit_bracketing && is_dot_or_dash (*prefix_end))))
{
variant *r = variant_table_grow ();
r->symbol_index = symbol_index;
r->id = id;
r->loc = id_loc;
r->hidden_by = NULL;
r->err = 0;
return r;
}
else
return NULL;
}
static const char *
get_at_spec(int symbol_index)
{
static char at_buf[20];
if (symbol_index == 0)
strcpy (at_buf, "$$");
else
snprintf (at_buf, sizeof at_buf, "$%u", symbol_index);
return at_buf;
}
static void
show_sub_message (warnings warning,
const char* cp, bool explicit_bracketing,
int midrule_rhs_index, char dollar_or_at,
const variant *var)
{
const char *at_spec = get_at_spec (var->symbol_index);
if (var->err == 0)
subcomplain (&var->loc, warning,
_("refers to: %c%s at %s"), dollar_or_at,
var->id, at_spec);
else
{
const char *id;
location id_loc;
if (var->hidden_by)
{
id = var->hidden_by->id;
id_loc = var->hidden_by->loc;
}
else
{
id = var->id;
id_loc = var->loc;
}
const char *tail = explicit_bracketing ? "" : cp + strlen (var->id);
/* Create the explanation message. */
static struct obstack msg_buf;
obstack_init (&msg_buf);
obstack_printf (&msg_buf, _("possibly meant: %c"), dollar_or_at);
if (contains_dot_or_dash (id))
obstack_printf (&msg_buf, "[%s]", id);
else
obstack_sgrow (&msg_buf, id);
obstack_sgrow (&msg_buf, tail);
if (var->err & VARIANT_HIDDEN)
{
obstack_printf (&msg_buf, _(", hiding %c"), dollar_or_at);
if (contains_dot_or_dash (var->id))
obstack_printf (&msg_buf, "[%s]", var->id);
else
obstack_sgrow (&msg_buf, var->id);
obstack_sgrow (&msg_buf, tail);
}
obstack_printf (&msg_buf, _(" at %s"), at_spec);
if (var->err & VARIANT_NOT_VISIBLE_FROM_MIDRULE)
obstack_printf (&msg_buf,
_(", cannot be accessed from midrule action at $%d"),
midrule_rhs_index);
subcomplain (&id_loc, warning, "%s",
obstack_finish0 (&msg_buf));
obstack_free (&msg_buf, 0);
}
}
static void
show_sub_messages (warnings warning,
const char* cp, bool explicit_bracketing,
int midrule_rhs_index, char dollar_or_at)
{
for (int i = 0; i < variant_count; ++i)
show_sub_message (warning | silent,
cp, explicit_bracketing,
midrule_rhs_index, dollar_or_at,
&variant_table[i]);
}
/* Returned from "parse_ref" when the reference
is inappropriate. */
#define INVALID_REF (INT_MIN)
/* Returned from "parse_ref" when the reference
points to LHS ($$) of the current rule or midrule. */
#define LHS_REF (INT_MIN + 1)
/* Parse a positional reference in RULE. */
static long
parse_positional_ref (char *cp, int rule_length,
char *text, const location *text_loc)
{
long num = strtol (cp, &cp, 10);
if (1 - INT_MAX + rule_length <= num && num <= rule_length)
return num;
else
{
complain (text_loc, complaint, _("integer out of range: %s"),
quote (text));
return INVALID_REF;
}
}
/* Parse named or positional reference. In case of positional
references, can return negative values for $-n "deep" stack
accesses. */
static long
parse_ref (char *cp, symbol_list *rule, int rule_length,
int midrule_rhs_index, char *text, const location *text_loc,
char dollar_or_at)
{
if ('$' == *cp)
return LHS_REF;
if (c_isdigit (*cp) || (*cp == '-' && c_isdigit (cp[1])))
return parse_positional_ref (cp, rule_length, text, text_loc);
bool const explicit_bracketing = *cp == '[';
if (explicit_bracketing)
++cp;
else
ref_tail_fields = strpbrk (cp, ".-");
char const *cp_end = strchr (cp, explicit_bracketing ? ']' : '\0');
/* Add all relevant variants. */
{
int symbol_index;
symbol_list *l;
variant_count = 0;
for (symbol_index = 0, l = rule; !symbol_list_null (l);
++symbol_index, l = l->next)
{
if (l->content_type != SYMLIST_SYMBOL)
continue;
variant *var
= variant_add (l->content.sym->tag, l->sym_loc,
symbol_index, cp, cp_end, explicit_bracketing);
if (var && l->named_ref)
var->hidden_by = l->named_ref;
if (l->named_ref)
variant_add (l->named_ref->id, l->named_ref->loc,
symbol_index, cp, cp_end, explicit_bracketing);
}
}
/* Check errors. */
int valid_variants = 0;
int valid_variant_index = 0;
for (int i = 0; i < variant_count; ++i)
{
variant *var = &variant_table[i];
int symbol_index = var->symbol_index;
/* Check visibility from midrule actions. */
if (midrule_rhs_index != 0
&& (symbol_index == 0 || midrule_rhs_index < symbol_index))
var->err |= VARIANT_NOT_VISIBLE_FROM_MIDRULE;
/* Check correct bracketing. */
if (!explicit_bracketing && contains_dot_or_dash (var->id))
var->err |= VARIANT_BAD_BRACKETING;
/* Check using of hidden symbols. */
if (var->hidden_by)
var->err |= VARIANT_HIDDEN;
if (!var->err)
{
valid_variant_index = i;
++valid_variants;
}
}
switch (valid_variants)
{
case 0:
{
int len = (explicit_bracketing || !ref_tail_fields) ?
cp_end - cp : ref_tail_fields - cp;
complain (text_loc, complaint,
_("invalid reference: %s"), quote (text));
if (len == 0)
{
location sym_loc = *text_loc;
sym_loc.start.column += 1;
sym_loc.end = sym_loc.start;
subcomplain (&sym_loc, complaint,
_("syntax error after '%c', expecting integer, "
"letter, '_', '[', or '$'"),
dollar_or_at);
}
else if (midrule_rhs_index)
subcomplain (&rule->rhs_loc, complaint,
_("symbol not found in production before $%d: "
"%.*s"),
midrule_rhs_index, len, cp);
else
subcomplain (&rule->rhs_loc, complaint,
_("symbol not found in production: %.*s"),
len, cp);
if (variant_count > 0)
show_sub_messages (complaint,
cp, explicit_bracketing, midrule_rhs_index,
dollar_or_at);
return INVALID_REF;
}
case 1:
{
if (variant_count > 1)
{
complain (text_loc, Wother,
_("misleading reference: %s"), quote (text));
show_sub_messages (Wother,
cp, explicit_bracketing, midrule_rhs_index,
dollar_or_at);
}
{
int symbol_index =
variant_table[valid_variant_index].symbol_index;
return (symbol_index == midrule_rhs_index) ? LHS_REF : symbol_index;
}
}
case 2:
default:
{
complain (text_loc, complaint,
_("ambiguous reference: %s"), quote (text));
show_sub_messages (complaint,
cp, explicit_bracketing, midrule_rhs_index,
dollar_or_at);
return INVALID_REF;
}
}
}
/* Keeps track of the maximum number of semantic values to the left of
a handle (those referenced by $0, $-1, etc.) are required by the
semantic actions of this grammar. */
int max_left_semantic_context = 0;
/* If CP points to a typename (i.e., <.*?>), set TYPE_NAME to its
beginning (i.e., after the opening "<", and return the pointer
immediately after it. */
static
char *
fetch_type_name (char *cp, char const **type_name,
const location *dollar_loc)
{
if (*cp == '<')
{
*type_name = ++cp;
/* Series of non-'>' or "->". */
while (*cp != '>' || cp[-1] == '-')
++cp;
/* The '>' symbol will be later replaced by '\0'. Original
'text' is needed for error messages. */
++cp;
if (untyped_var_seen)
complain (dollar_loc, complaint,
_("explicit type given in untyped grammar"));
tag_seen = true;
}
return cp;
}
/*------------------------------------------------------------------.
| TEXT is pointing to a wannabee semantic value (i.e., a '$'). |
| |
| Possible inputs: $[<TYPENAME>]($|INTEGER) |
| |
| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
`------------------------------------------------------------------*/
static void
handle_action_dollar (symbol_list *rule, char *text, const location *dollar_loc)
{
symbol_list *effective_rule;
int effective_rule_length;
if (rule->midrule_parent_rule)
{
effective_rule = rule->midrule_parent_rule;
effective_rule_length = rule->midrule_parent_rhs_index - 1;
}
else
{
effective_rule = rule;
effective_rule_length = symbol_list_length (rule->next);
}
/* The type name if explicit, otherwise left null. */
char const *type_name = NULL;
char *cp = fetch_type_name (text + 1, &type_name, dollar_loc);
int n = parse_ref (cp, effective_rule, effective_rule_length,
rule->midrule_parent_rhs_index, text, dollar_loc, '$');
/* End type_name. Don't do it earlier: parse_ref depends on TEXT. */
if (type_name)
cp[-1] = '\0';
switch (n)
{
case INVALID_REF:
break;
case LHS_REF:
{
symbol_list *sym = symbol_list_n_get (rule, 0);
if (!type_name
&& !sym->content.sym->content->type_name)
{
if (union_seen || tag_seen)
{
if (rule->midrule_parent_rule)
complain (dollar_loc, complaint,
_("$$ for the midrule at $%d of %s"
" has no declared type"),
rule->midrule_parent_rhs_index,
quote (effective_rule->content.sym->tag));
else
complain (dollar_loc, complaint,
_("$$ of %s has no declared type"),
quote (rule->content.sym->tag));
}
else
untyped_var_seen = true;
}
obstack_printf (&obstack_for_string, "]b4_lhs_value(orig %d, ",
sym->content.sym->content->number);
obstack_quote (&obstack_for_string, type_name);
obstack_sgrow (&obstack_for_string, ")[");
rule->action_props.is_value_used = true;
}
break;
/* Reference to a RHS symbol. */
default:
{
if (max_left_semantic_context < 1 - n)
max_left_semantic_context = 1 - n;
symbol_list *sym = 0 < n ? symbol_list_n_get (effective_rule, n) : NULL;
if (!type_name
&& (!sym || !sym->content.sym->content->type_name))
{
if (union_seen || tag_seen)
complain (dollar_loc, complaint,
_("$%s of %s has no declared type"), cp,
quote (effective_rule->content.sym->tag));
else
untyped_var_seen = true;
}
obstack_printf (&obstack_for_string,
"]b4_rhs_value(%d, %d, ",
effective_rule_length, n);
if (sym)
obstack_printf (&obstack_for_string, "%s%d, ",
sym->content.sym->content->class == nterm_sym ? "orig " : "",
sym->content.sym->content->number);
else
obstack_sgrow (&obstack_for_string, "[], ");
obstack_quote (&obstack_for_string, type_name);
obstack_sgrow (&obstack_for_string, ")[");
if (0 < n)
{
if (muscle_percent_define_ifdef ("api.value.automove")
&& sym->action_props.is_value_used)
complain (dollar_loc, Wother,
_("multiple occurrences of $%d with api.value.automove"),
n);
sym->action_props.is_value_used = true;
}
}
break;
}
}
/*------------------------------------------------------.
| TEXT is a location token (i.e., a '@...'). Output to |
| OBSTACK_FOR_STRING a reference to this location. |
`------------------------------------------------------*/
static void
handle_action_at (symbol_list *rule, char *text, const location *at_loc)
{
symbol_list *effective_rule;
int effective_rule_length;
if (rule->midrule_parent_rule)
{
effective_rule = rule->midrule_parent_rule;
effective_rule_length = rule->midrule_parent_rhs_index - 1;
}
else
{
effective_rule = rule;
effective_rule_length = symbol_list_length (rule->next);
}
muscle_percent_define_ensure ("locations", *at_loc, true);
int n = parse_ref (text + 1, effective_rule, effective_rule_length,
rule->midrule_parent_rhs_index, text, at_loc, '@');
switch (n)
{
case INVALID_REF:
break;
case LHS_REF:
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
break;
default:
obstack_printf (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
effective_rule_length, n);
break;
}
}
/*-------------------------.
| Initialize the scanner. |
`-------------------------*/
/* Translate the '$...' and '@...' in \a self, in the context \a
sc_context (SC_RULE_ACTION, SC_SYMBOL_ACTION, INITIAL). */
static char const *
translate_action (code_props *self, int sc_context)
{
loc->start = loc->end = self->location.start;
yy_switch_to_buffer (yy_scan_string (self->code));
char *res = code_lex (self, sc_context);
yy_delete_buffer (YY_CURRENT_BUFFER);
return res;
}
/*------------------------------------------------------------------------.
| Implementation of the public interface as documented in "scan-code.h". |
`------------------------------------------------------------------------*/
void
code_props_none_init (code_props *self)
{
*self = code_props_none;
}
code_props code_props_none = CODE_PROPS_NONE_INIT;
void
code_props_plain_init (code_props *self, char const *code,
location code_loc)
{
code_props_none_init (self);
self->kind = CODE_PROPS_PLAIN;
self->code = code;
self->location = code_loc;
}
void
code_props_symbol_action_init (code_props *self, char const *code,
location code_loc)
{
code_props_none_init (self);
self->kind = CODE_PROPS_SYMBOL_ACTION;
self->code = code;
self->location = code_loc;
}
void
code_props_rule_action_init (code_props *self, char const *code,
location code_loc, symbol_list *rule,
named_ref *name, uniqstr type,
bool is_predicate)
{
code_props_none_init (self);
self->kind = CODE_PROPS_RULE_ACTION;
self->code = code;
self->location = code_loc;
self->rule = rule;
self->named_ref = name;
self->type = type;
self->is_predicate = is_predicate;
}
void
code_props_translate_code (code_props *self)
{
switch (self->kind)
{
case CODE_PROPS_NONE:
break;
case CODE_PROPS_PLAIN:
self->code = translate_action (self, INITIAL);
break;
case CODE_PROPS_SYMBOL_ACTION:
self->code = translate_action (self, SC_SYMBOL_ACTION);
break;
case CODE_PROPS_RULE_ACTION:
self->code = translate_action (self, SC_RULE_ACTION);
break;
}
}
void
code_scanner_last_string_free (void)
{
STRING_FREE ();
}
void
code_scanner_init (void)
{
obstack_init (&obstack_for_string);
yy_flex_debug = 0;
}
void
code_scanner_free (void)
{
obstack_free (&obstack_for_string, 0);
variant_table_free ();
/* Reclaim Flex's buffers. */
yylex_destroy ();
}
@@ -0,0 +1,3 @@
#include <config.h>
#include "system.h"
#include "src/scan-gram.c"
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,36 @@
/* Bison Grammar Scanner
Copyright (C) 2006-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef SCAN_GRAM_H_
# define SCAN_GRAM_H_
/* Initialize the scanner to read file GRAM. */
void gram_scanner_open (const char *gram);
/* Close the open files. */
void gram_scanner_close (void);
/* Free all the memory allocated to the scanner. */
void gram_scanner_free (void);
void gram_scanner_last_string_free (void);
# define GRAM_LEX_DECL int gram_lex (GRAM_STYPE *val, location *loc)
GRAM_LEX_DECL;
#endif /* !SCAN_GRAM_H_ */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,3 @@
#include <config.h>
#include "system.h"
#include "src/scan-skel.c"
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,28 @@
/* Scan Bison Skeletons.
Copyright (C) 2005-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
void scan_skel (FILE *);
/* Pacify "make syntax-check". */
extern FILE *skel_in;
extern FILE *skel_out;
extern int skel__flex_debug;
extern int skel_lineno;
void skel_scanner_free (void);
@@ -0,0 +1,290 @@
/* Scan Bison Skeletons. -*- C -*-
Copyright (C) 2001-2015, 2018-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
%option nodefault noyywrap noinput nounput never-interactive debug
%option prefix="skel_" outfile="lex.yy.c"
%{
#include <dirname.h>
#include <error.h>
#include <path-join.h>
#include <quotearg.h>
#include "src/complain.h"
#include "src/files.h"
#include "src/getargs.h"
#include "src/scan-skel.h"
#define FLEX_PREFIX(Id) skel_ ## Id
#include "src/flex-scanner.h"
/* Work around a bug in flex 2.5.31. See Debian bug 333231
<https://bugs.debian.org/333231>. */
#undef skel_wrap
#define skel_wrap() 1
#define YY_DECL static int skel_lex (void)
YY_DECL;
typedef void (*at_directive)(int, char**, char **, int*);
static void at_init (int *argc, char *argv[], at_directive *at_ptr, at_directive fun);
static void at_basename (int argc, char *argv[], char**, int*);
static void at_complain (int argc, char *argv[], char**, int*);
static void at_output (int argc, char *argv[], char **name, int *lineno);
static void fail_for_at_directive_too_many_args (char const *at_directive_name);
static void fail_for_at_directive_too_few_args (char const *at_directive_name);
static void fail_for_invalid_at (char const *at);
static void output_mapped_file (char const *name);
%}
/* Identifiers of our M4 macros. */
macro [bm]4_[a-zA-Z_0-9]*
%x SC_AT_DIRECTIVE_ARGS
%x SC_AT_DIRECTIVE_SKIP_WS
%%
%{
int out_lineno PACIFY_CC (= 0);
char *out_name = NULL;
/* Currently, only the @complain directive takes multiple arguments, and
never more than 7, with argv[0] being the directive name and argv[1]
being the type of complaint to dispatch. */
#define ARGC_MAX 9
int argc = 0;
char *argv[ARGC_MAX];
at_directive at_ptr = NULL;
%}
"@@" fputc ('@', yyout);
"@{" fputc ('[', yyout);
"@}" fputc (']', yyout);
"@'" continue; /* Used by b4_cat in ../data/bison.m4. */
@\n continue;
"@oline@" fprintf (yyout, "%d", out_lineno + 1);
"@ofile@" output_mapped_file (out_name);
"@basename(" at_init (&argc, argv, &at_ptr, &at_basename);
"@complain(" at_init (&argc, argv, &at_ptr, &at_complain);
"@output(" at_init (&argc, argv, &at_ptr, &at_output);
/* This pattern must not match more than the previous @ patterns. */
@[^@{}''(\n]* fail_for_invalid_at (yytext);
\n out_lineno++; ECHO;
[^bm@\n]+ ECHO;
/* If there are still identifiers that look like macros, such as
b4_synbol, this probably an error, say a typo in M4, or
overquotation. */
{macro} {
location loc = empty_loc;
loc.start.file = map_file_name (out_name);
loc.start.line = out_lineno;
loc.end = loc.start;
complain (&loc, Wother,
"suspicious sequence in the output: %s", yytext);
ECHO;
}
. ECHO;
<INITIAL><<EOF>> {
if (out_name)
{
free (out_name);
xfclose (yyout);
}
return EOF;
}
<SC_AT_DIRECTIVE_ARGS>
{
[^@]+ STRING_GROW ();
"@@" STRING_1GROW ('@');
"@{" STRING_1GROW ('[');
"@}" STRING_1GROW (']');
"@'" continue; /* For starting an argument that begins with whitespace. */
@\n continue;
@[,)] {
if (argc >= ARGC_MAX)
fail_for_at_directive_too_many_args (argv[0]);
argv[argc++] = obstack_finish0 (&obstack_for_string);
/* Like M4, skip whitespace after a comma. */
if (yytext[1] == ',')
BEGIN SC_AT_DIRECTIVE_SKIP_WS;
else
{
aver (at_ptr);
at_ptr (argc, argv, &out_name, &out_lineno);
obstack_free (&obstack_for_string, argv[0]);
argc = 0;
BEGIN INITIAL;
}
}
@.? fail_for_invalid_at (yytext);
}
<SC_AT_DIRECTIVE_SKIP_WS>
{
[ \t\r\n] continue;
. yyless (0); BEGIN SC_AT_DIRECTIVE_ARGS;
}
<SC_AT_DIRECTIVE_ARGS,SC_AT_DIRECTIVE_SKIP_WS>
{
<<EOF>> complain (NULL, fatal, _("unclosed %s directive in skeleton"), argv[0]);
}
%%
static void
at_init (int *argc, char *argv[], at_directive *at_ptr, at_directive fun)
{
*at_ptr = fun;
yytext[yyleng-1] = '\0';
obstack_grow (&obstack_for_string, yytext, yyleng);
argv[(*argc)++] = obstack_finish (&obstack_for_string);
BEGIN SC_AT_DIRECTIVE_ARGS;
}
/*------------------------.
| Scan a Bison skeleton. |
`------------------------*/
void
scan_skel (FILE *in)
{
static bool initialized = false;
if (!initialized)
{
initialized = true;
obstack_init (&obstack_for_string);
}
skel_in = in;
skel__flex_debug = trace_flag & trace_skeleton;
skel_lex ();
}
void
skel_scanner_free (void)
{
obstack_free (&obstack_for_string, 0);
/* Reclaim Flex's buffers. */
yylex_destroy ();
}
static inline warnings
flag (const char *arg)
{
/* compare with values issued from b4_error */
if (STREQ (arg, "complain"))
return complaint;
else if (STREQ (arg, "deprecated"))
return Wdeprecated;
else if (STREQ (arg, "fatal"))
return fatal;
else if (STREQ (arg, "note"))
return silent | complaint | no_caret | note;
else if (STREQ (arg, "warn"))
return Wother;
else
abort ();
}
static void
at_basename (int argc, char *argv[], char **out_namep, int *out_linenop)
{
(void) out_namep;
(void) out_linenop;
if (2 < argc)
fail_for_at_directive_too_many_args (argv[0]);
fputs (last_component (argv[1]), yyout);
}
static void
at_complain (int argc, char *argv[], char **out_namep, int *out_linenop)
{
if (argc < 4)
fail_for_at_directive_too_few_args (argv[0]);
(void) out_namep;
(void) out_linenop;
warnings w = flag (argv[1]);
location loc;
location *locp = NULL;
if (argv[2] && argv[2][0])
{
boundary_set_from_string (&loc.start, argv[2]);
boundary_set_from_string (&loc.end, argv[3]);
locp = &loc;
}
complain_args (locp, w, argc - 4, argv + 4);
}
static void
at_output (int argc, char *argv[], char **out_namep, int *out_linenop)
{
if (3 < argc)
fail_for_at_directive_too_many_args (argv[0]);
if (*out_namep)
{
free (*out_namep);
xfclose (yyout);
}
*out_namep = xpath_join (argv[1], 2 < argc ? argv[2] : NULL);
output_file_name_check (out_namep, true);
/* If there were errors, do not generate the output. */
yyout = xfopen (complaint_status ? "/dev/null" : *out_namep, "w");
*out_linenop = 1;
}
static void
fail_for_at_directive_too_few_args (char const *at_directive_name)
{
complain (NULL, fatal, _("too few arguments for %s directive in skeleton"),
at_directive_name);
}
static void
fail_for_at_directive_too_many_args (char const *at_directive_name)
{
complain (NULL, fatal, _("too many arguments for %s directive in skeleton"),
at_directive_name);
}
static void
fail_for_invalid_at (char const *at)
{
complain (NULL, fatal, "invalid @ in skeleton: %s", at);
}
static void
output_mapped_file (char const *name)
{
fputs (quotearg_style (c_quoting_style, map_file_name (name)), yyout);
}
@@ -0,0 +1,586 @@
/* Counterexample Generation Search Nodes
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "state-item.h"
#include <assert.h>
#include <gl_linked_list.h>
#include <gl_xlist.h>
#include <stdlib.h>
#include <time.h>
#include "closure.h"
#include "getargs.h"
#include "nullable.h"
size_t nstate_items;
state_item_number *state_item_map;
state_item *state_items;
// Hash functions for index -> bitset hash maps.
typedef struct
{
int key;
bitset l;
} hash_pair;
static size_t
hash_pair_hasher (const hash_pair *sl, size_t max)
{
return sl->key % max;
}
static bool
hash_pair_comparator (const hash_pair *l, const hash_pair *r)
{
return l->key == r->key;
}
static void
hash_pair_free (hash_pair *hp)
{
bitset_free (hp->l);
free (hp);
}
static Hash_table *
hash_pair_table_create (int size)
{
return hash_xinitialize (size,
NULL,
(Hash_hasher) hash_pair_hasher,
(Hash_comparator) hash_pair_comparator,
(Hash_data_freer) hash_pair_free);
}
static bitset
hash_pair_lookup (Hash_table *tab, int key)
{
hash_pair probe;
probe.key = key;
hash_pair *hp = hash_lookup (tab, &probe);
return hp ? hp->l : NULL;
}
static void
hash_pair_insert (Hash_table *tab, int key, bitset val)
{
hash_pair *hp = xmalloc (sizeof *hp);
hp->key = key;
hp->l = val;
hash_pair *res = hash_xinsert (tab, hp);
// This must be the first insertion.
(void) res;
assert (res == hp);
}
/* A state_item from a state's id and the offset of the item within
the state. */
state_item *
state_item_lookup (state_number s, state_item_number off)
{
return &state_items[state_item_index_lookup (s, off)];
}
static inline void
state_item_set (state_item_number sidx, const state *s, item_number off)
{
state_items[sidx].state = s;
state_items[sidx].item = &ritem[off];
state_items[sidx].lookahead = NULL;
state_items[sidx].trans = -1;
state_items[sidx].prods = NULL;
state_items[sidx].revs = bitset_create (nstate_items, BITSET_SPARSE);
}
/**
* Initialize state_items set
*/
static void
init_state_items (void)
{
nstate_items = 0;
bitsetv production_items = bitsetv_create (nstates, nritems, BITSET_SPARSE);
for (int i = 0; i < nstates; ++i)
{
const state *s = states[i];
nstate_items += s->nitems;
closure (s->items, s->nitems);
for (size_t j = 0; j < nitemset; ++j)
if (0 < itemset[j]
&& item_number_is_rule_number (ritem[itemset[j] - 1]))
{
bitset_set (production_items[i], itemset[j]);
++nstate_items;
}
}
state_item_map = xnmalloc (nstates + 1, sizeof (state_item_number));
state_items = xnmalloc (nstate_items, sizeof (state_item));
state_item_number sidx = 0;
for (int i = 0; i < nstates; ++i)
{
state_item_map[i] = sidx;
int rule_search_idx = 0;
const state *s = states[i];
const reductions *red = s->reductions;
for (int j = 0; j < s->nitems; ++j)
{
state_item_set (sidx, s, s->items[j]);
state_item *si = &state_items[sidx];
const rule *r = item_rule (si->item);
if (rule_search_idx < red->num && red->rules[rule_search_idx] < r)
++rule_search_idx;
if (rule_search_idx < red->num && r == red->rules[rule_search_idx])
{
bitsetv lookahead = red->lookaheads;
if (lookahead)
si->lookahead = lookahead[rule_search_idx];
}
++sidx;
}
bitset_iterator biter;
item_number off;
BITSET_FOR_EACH (biter, production_items[i], off, 0)
{
state_item_set (sidx, s, off);
if (item_number_is_rule_number (ritem[off]))
{
bitsetv lookahead = red->lookaheads;
if (lookahead)
state_items[sidx].lookahead = lookahead[rule_search_idx];
++rule_search_idx;
}
++sidx;
}
}
state_item_map[nstates] = nstate_items;
bitsetv_free (production_items);
}
static size_t
state_sym_hasher (const void *st, size_t max)
{
return ((state *) st)->accessing_symbol % max;
}
static bool
state_sym_comparator (const void *s1, const void *s2)
{
return ((state *) s1)->accessing_symbol == ((state *) s2)->accessing_symbol;
}
static state *
state_sym_lookup (symbol_number sym, Hash_table *h)
{
state probe;
probe.accessing_symbol = sym;
return hash_lookup (h, &probe);
}
static void
init_trans (void)
{
for (state_number i = 0; i < nstates; ++i)
{
// Generate a hash set that maps from accepting symbols to the states
// this state transitions to.
state *s = states[i];
transitions *t = s->transitions;
Hash_table *transition_set
= hash_xinitialize (t->num, NULL, (Hash_hasher) state_sym_hasher,
(Hash_comparator) state_sym_comparator, NULL);
for (int j = 0; j < t->num; ++j)
if (!TRANSITION_IS_DISABLED (t, j))
hash_xinsert (transition_set, t->states[j]);
for (state_item_number j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
{
item_number *item = state_items[j].item;
if (item_number_is_rule_number (*item))
continue;
state *dst = state_sym_lookup (*item, transition_set);
if (!dst)
continue;
// find the item in the destination state that corresponds
// to the transition of item
for (int k = 0; k < dst->nitems; ++k)
if (item + 1 == ritem + dst->items[k])
{
state_item_number dstSI =
state_item_index_lookup (dst->number, k);
state_items[j].trans = dstSI;
bitset_set (state_items[dstSI].revs, j);
break;
}
}
hash_free (transition_set);
}
}
static void
init_prods (void)
{
for (int i = 0; i < nstates; ++i)
{
state *s = states[i];
// closure_map is a hash map from nonterminals to a set
// of the items that produce those nonterminals
Hash_table *closure_map = hash_pair_table_create (nsyms - ntokens);
// Add the nitems of state to skip to the production portion
// of that state's state_items
for (state_item_number j = state_item_map[i] + s->nitems;
j < state_item_map[i + 1]; ++j)
{
state_item *src = &state_items[j];
item_number *item = src->item;
symbol_number lhs = item_rule (item)->lhs->number;
bitset itms = hash_pair_lookup (closure_map, lhs);
if (!itms)
{
itms = bitset_create (nstate_items, BITSET_SPARSE);
hash_pair_insert (closure_map, lhs, itms);
}
bitset_set (itms, j);
}
// For each item with a dot followed by a nonterminal,
// try to create a production edge.
for (state_item_number j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
{
state_item *src = &state_items[j];
item_number item = *(src->item);
// Skip reduce items and items with terminals after the dot
if (item_number_is_rule_number (item) || ISTOKEN (item))
continue;
symbol_number sym = item_number_as_symbol_number (item);
bitset lb = hash_pair_lookup (closure_map, sym);
if (lb)
{
bitset copy = bitset_create (nstate_items, BITSET_SPARSE);
bitset_copy (copy, lb);
// update prods.
state_items[j].prods = copy;
// update revs.
bitset_iterator biter;
state_item_number prod;
BITSET_FOR_EACH (biter, copy, prod, 0)
bitset_set (state_items[prod].revs, j);
}
}
hash_free (closure_map);
}
}
/* Since lookaheads are only generated for reductions, we need to
propagate lookahead sets backwards as the searches require each
state_item to have a lookahead. */
static inline void
gen_lookaheads (void)
{
for (state_item_number i = 0; i < nstate_items; ++i)
{
state_item *si = &state_items[i];
if (item_number_is_symbol_number (*(si->item)) || !si->lookahead)
continue;
bitset lookahead = si->lookahead;
state_item_list queue =
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
(const void **) &si);
// For each reduction item, traverse through all state_items
// accessible through reverse transition steps, and set their
// lookaheads to the reduction items lookahead
while (gl_list_size (queue) > 0)
{
state_item *prev = (state_item *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
prev->lookahead = lookahead;
if (SI_TRANSITION (prev))
{
bitset rsi = state_items[prev - state_items].revs;
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, rsi, sin, 0)
gl_list_add_first (queue, &state_items[sin]);
}
}
gl_list_free (queue);
}
}
bitsetv firsts = NULL;
static void
init_firsts (void)
{
firsts = bitsetv_create (nnterms, nsyms, BITSET_FIXED);
for (rule_number i = 0; i < nrules; ++i)
{
rule *r = &rules[i];
item_number *n = r->rhs;
// Iterate through nullable nonterminals to try to find a terminal.
while (item_number_is_symbol_number (*n) && ISVAR (*n)
&& nullable[*n - ntokens])
++n;
if (item_number_is_rule_number (*n) || ISVAR (*n))
continue;
symbol_number lhs = r->lhs->number;
bitset_set (FIRSTS (lhs), *n);
}
bool change = true;
while (change)
{
change = false;
for (rule_number i = 0; i < nrules; ++i)
{
rule *r = &rules[i];
symbol_number lhs = r->lhs->number;
bitset f_lhs = FIRSTS (lhs);
for (item_number *n = r->rhs;
item_number_is_symbol_number (*n) && ISVAR (*n);
++n)
{
bitset f = FIRSTS (*n);
if (!bitset_subset_p (f_lhs, f))
{
change = true;
bitset_union (f_lhs, f_lhs, f);
}
if (!nullable[*n - ntokens])
break;
}
}
}
}
static inline void
disable_state_item (state_item *si)
{
si->trans = -2;
bitset_free (si->revs);
if (si->prods)
bitset_free (si->prods);
}
/* Disable all state_item paths that lead to/from SI and nowhere
else. */
static void
prune_state_item (const state_item *si)
{
state_item_list queue =
gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
(const void **) &si);
while (gl_list_size (queue) > 0)
{
state_item *dsi = (state_item *) gl_list_get_at (queue, 0);
gl_list_remove_at (queue, 0);
if (SI_DISABLED (dsi - state_items))
continue;
if (dsi->trans >= 0 && !SI_DISABLED (dsi->trans))
{
const state_item *trans = &state_items[dsi->trans];
bitset_reset (trans->revs, dsi - state_items);
if (bitset_empty_p (trans->revs))
gl_list_add_last (queue, trans);
}
if (dsi->prods)
{
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, dsi->prods, sin, 0)
{
if (SI_DISABLED (sin))
continue;
const state_item *prod = &state_items[sin];
bitset_reset (prod->revs, dsi - state_items);
if (bitset_empty_p (prod->revs))
gl_list_add_last (queue, prod);
}
}
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, dsi->revs, sin, 0)
{
if (SI_DISABLED (sin))
continue;
state_item *rev = &state_items[sin];
if (&state_items[rev->trans] == dsi)
gl_list_add_last (queue, rev);
else if (rev->prods)
{
bitset_reset (rev->prods, dsi - state_items);
if (bitset_empty_p (rev->prods))
gl_list_add_last (queue, rev);
}
else
gl_list_add_last (queue, rev);
}
disable_state_item (dsi);
}
gl_list_free (queue);
}
/* To make searches more efficient, prune away paths that are caused
by disabled transitions. */
static void
prune_disabled_paths (void)
{
for (int i = nstate_items - 1; i >= 0; --i)
{
state_item *si = &state_items[i];
if (si->trans == -1 && item_number_is_symbol_number (*si->item))
prune_state_item (si);
}
}
void
state_item_print (const state_item *si, FILE *out, const char *prefix)
{
fputs (prefix, out);
item_print (si->item, NULL, out);
putc ('\n', out);
}
const rule*
state_item_rule (const state_item *si)
{
return item_rule (si->item);
}
/**
* Report the state_item graph
*/
static void
state_items_report (FILE *out)
{
fprintf (out, "# state items: %zu\n", nstate_items);
for (state_number i = 0; i < nstates; ++i)
{
fprintf (out, "State %d:\n", i);
for (state_item_number j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
{
const state_item *si = &state_items[j];
item_print (si->item, NULL, out);
if (SI_DISABLED (j))
fputs (" DISABLED\n", out);
else
{
putc ('\n', out);
if (si->trans >= 0)
{
fputs (" -> ", out);
state_item_print (&state_items[si->trans], out, "");
}
bitset sets[2] = { si->prods, si->revs };
const char *txt[2] = { " => ", " <- " };
for (int seti = 0; seti < 2; ++seti)
{
bitset b = sets[seti];
if (b)
{
bitset_iterator biter;
state_item_number sin;
BITSET_FOR_EACH (biter, b, sin, 0)
{
fputs (txt[seti], out);
state_item_print (&state_items[sin], out, "");
}
}
}
}
putc ('\n', out);
}
}
fprintf (out, "FIRSTS\n");
for (symbol_number i = ntokens; i < nsyms; ++i)
{
fprintf (out, " %s firsts\n", symbols[i]->tag);
bitset_iterator iter;
symbol_number j;
BITSET_FOR_EACH (iter, FIRSTS (i), j, 0)
fprintf (out, " %s\n", symbols[j]->tag);
}
fputs ("\n\n", out);
}
void
state_items_init (void)
{
time_t start = time (NULL);
init_state_items ();
init_trans ();
init_prods ();
gen_lookaheads ();
init_firsts ();
prune_disabled_paths ();
if (trace_flag & trace_cex)
{
fprintf (stderr, "init: %f\n", difftime (time (NULL), start));
state_items_report (stderr);
}
}
void
state_items_free (void)
{
for (state_item_number i = 0; i < nstate_items; ++i)
if (!SI_DISABLED (i))
{
state_item *si = &state_items[i];
if (si->prods)
bitset_free (si->prods);
bitset_free (si->revs);
}
free (state_items);
bitsetv_free (firsts);
}
/**
* Determine, using precedence and associativity, whether the next
* production is allowed from the current production.
*/
bool
production_allowed (const state_item *si, const state_item *next)
{
sym_content *s1 = item_rule (si->item)->lhs;
sym_content *s2 = item_rule (next->item)->lhs;
int prec1 = s1->prec;
int prec2 = s2->prec;
if (prec1 >= 0 && prec2 >= 0)
{
// Do not expand if lower precedence.
if (prec1 > prec2)
return false;
// Do not expand if same precedence, but left-associative.
if (prec1 == prec2 && s1->assoc == left_assoc)
return false;
}
return true;
}
@@ -0,0 +1,114 @@
/* Counterexample Generation Search Nodes
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef STATE_ITEM_H
# define STATE_ITEM_H
# include <bitsetv.h>
# include <gl_list.h>
# include <hash.h>
# include "gram.h"
# include "state.h"
/* Initializes a graph connecting (state, production item) pairs to
pairs they can make a transition or production step to. This graph
is used to search for paths that represent counterexamples of some
conflict.
state_items is an array of state state-item pairs ordered by state.
state_item_map maps state numbers to the first item which
corresponds to it in the array. A state's portion in state_items
begins with its items in the same order as it was in the state.
This is then followed by productions from the closure of the state
in order by rule.
There are two type of edges in this graph transitions and
productions. Transitions are the same as transitions from the
parser except edges are only between items from the same
rule.
Productions are edges from items with a nonterminal after the dot to
the production of that nonterminal in the same state. These edges are
stored as a bitset in a state-item.
The inverses of these edges are stored in a bitset in the state-item,
"revs." A state-item that begins with a dot will have reverse
production edges, and all others will have reverse transition
edges. */
# define SI_DISABLED(Sin) (state_items[Sin].trans == -2)
# define SI_PRODUCTION(Si) ((Si) == state_items || *((Si)->item - 1) < 0)
# define SI_TRANSITION(Si) ((Si) != state_items && *((Si)->item - 1) >= 0)
typedef int state_item_number;
typedef struct
{
const state *state;
item_number *item;
state_item_number trans;
bitset prods;
bitset revs;
bitset lookahead;
} state_item;
// A path of state-items.
typedef gl_list_t state_item_list;
extern bitsetv firsts;
# define FIRSTS(sym) firsts[(sym) - ntokens]
extern size_t nstate_items;
extern state_item_number *state_item_map;
/** Array mapping state_item_numbers to state_items */
extern state_item *state_items;
state_item *state_item_lookup (state_number s, state_item_number off);
static inline state_item_number
state_item_index_lookup (state_number s, state_item_number off)
{
return state_item_map[s] + off;
}
void state_items_init (void);
void state_items_free (void);
void state_item_print (const state_item *si, FILE *out, const char *prefix);
const rule *state_item_rule (const state_item *si);
bool production_allowed (const state_item *si, const state_item *next);
// Iterating on a state_item_list.
static inline bool
state_item_list_next (gl_list_iterator_t *it, state_item **si)
{
const void *p = NULL;
bool res = gl_list_iterator_next (it, &p, NULL);
if (res)
*si = (state_item *) p;
else
gl_list_iterator_free (it);
return res;
}
#endif /* STATE_ITEM_H */
+464
View File
@@ -0,0 +1,464 @@
/* Type definitions for the finite state machine for Bison.
Copyright (C) 2001-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "state.h"
#include "system.h"
#include <hash.h>
#include "closure.h"
#include "complain.h"
#include "getargs.h"
#include "gram.h"
#include "print-xml.h"
/*-------------------.
| Shifts and Gotos. |
`-------------------*/
/*-----------------------------------------.
| Create a new array of NUM shifts/gotos. |
`-----------------------------------------*/
static transitions *
transitions_new (int num, state **dst)
{
size_t states_size = num * sizeof *dst;
transitions *res = xmalloc (offsetof (transitions, states) + states_size);
res->num = num;
memcpy (res->states, dst, states_size);
return res;
}
state *
transitions_to (state *s, symbol_number sym)
{
transitions *trans = s->transitions;
for (int i = 0; i < trans->num; ++i)
if (TRANSITION_SYMBOL (trans, i) == sym)
return trans->states[i];
abort ();
}
/*--------------------.
| Error transitions. |
`--------------------*/
/*---------------------------------.
| Create a new array of NUM errs. |
`---------------------------------*/
errs *
errs_new (int num, symbol **tokens)
{
size_t symbols_size = num * sizeof *tokens;
errs *res = xmalloc (offsetof (errs, symbols) + symbols_size);
res->num = num;
if (tokens)
memcpy (res->symbols, tokens, symbols_size);
return res;
}
/*-------------.
| Reductions. |
`-------------*/
/*---------------------------------------.
| Create a new array of NUM reductions. |
`---------------------------------------*/
static reductions *
reductions_new (int num, rule **reds)
{
size_t rules_size = num * sizeof *reds;
reductions *res = xmalloc (offsetof (reductions, rules) + rules_size);
res->num = num;
res->lookaheads = NULL;
memcpy (res->rules, reds, rules_size);
return res;
}
/*---------.
| States. |
`---------*/
state_number nstates = 0;
/* FINAL_STATE is properly set by new_state when it recognizes its
accessing symbol: $end. */
state *final_state = NULL;
/*------------------------------------------------------------------.
| Create a new state with ACCESSING_SYMBOL, for those items. Store |
| it in the state hash table. |
`------------------------------------------------------------------*/
state *
state_new (symbol_number accessing_symbol,
size_t nitems, item_index *core)
{
aver (nstates < STATE_NUMBER_MAXIMUM);
size_t items_size = nitems * sizeof *core;
state *res = xmalloc (offsetof (state, items) + items_size);
res->number = nstates++;
res->accessing_symbol = accessing_symbol;
res->transitions = NULL;
res->reductions = NULL;
res->errs = NULL;
res->state_list = NULL;
res->consistent = false;
res->solved_conflicts = NULL;
res->solved_conflicts_xml = NULL;
res->nitems = nitems;
memcpy (res->items, core, items_size);
state_hash_insert (res);
return res;
}
state *
state_new_isocore (state const *s)
{
aver (nstates < STATE_NUMBER_MAXIMUM);
size_t items_size = s->nitems * sizeof *s->items;
state *res = xmalloc (offsetof (state, items) + items_size);
res->number = nstates++;
res->accessing_symbol = s->accessing_symbol;
res->transitions =
transitions_new (s->transitions->num, s->transitions->states);
res->reductions = reductions_new (s->reductions->num, s->reductions->rules);
res->errs = NULL;
res->state_list = NULL;
res->consistent = s->consistent;
res->solved_conflicts = NULL;
res->solved_conflicts_xml = NULL;
res->nitems = s->nitems;
memcpy (res->items, s->items, items_size);
return res;
}
/*---------.
| Free S. |
`---------*/
static void
state_free (state *s)
{
free (s->transitions);
free (s->reductions);
free (s->errs);
free (s);
}
void
state_transitions_print (const state *s, FILE *out)
{
const transitions *trans = s->transitions;
fprintf (out, "transitions of %d (%d):\n",
s->number, trans->num);
for (int i = 0; i < trans->num; ++i)
fprintf (out, " %d: (%d, %s, %d)\n",
i,
s->number,
symbols[s->transitions->states[i]->accessing_symbol]->tag,
s->transitions->states[i]->number);
}
/*---------------------------.
| Set the transitions of S. |
`---------------------------*/
void
state_transitions_set (state *s, int num, state **dst)
{
aver (!s->transitions);
s->transitions = transitions_new (num, dst);
if (trace_flag & trace_automaton)
state_transitions_print (s, stderr);
}
/*--------------------------.
| Set the reductions of S. |
`--------------------------*/
void
state_reductions_set (state *s, int num, rule **reds)
{
aver (!s->reductions);
s->reductions = reductions_new (num, reds);
}
int
state_reduction_find (state const *s, rule const *r)
{
reductions *reds = s->reductions;
for (int i = 0; i < reds->num; ++i)
if (reds->rules[i] == r)
return i;
abort ();
}
/*--------------------.
| Set the errs of S. |
`--------------------*/
void
state_errs_set (state *s, int num, symbol **tokens)
{
aver (!s->errs);
s->errs = errs_new (num, tokens);
}
/*--------------------------------------------------.
| Print on OUT all the lookahead tokens such that S |
| wants to reduce R. |
`--------------------------------------------------*/
void
state_rule_lookaheads_print (state const *s, rule const *r, FILE *out)
{
/* Find the reduction we are handling. */
reductions *reds = s->reductions;
int red = state_reduction_find (s, r);
/* Print them if there are. */
if (reds->lookaheads && red != -1)
{
bitset_iterator biter;
int k;
char const *sep = "";
fprintf (out, " [");
BITSET_FOR_EACH (biter, reds->lookaheads[red], k, 0)
{
fprintf (out, "%s%s", sep, symbols[k]->tag);
sep = ", ";
}
fprintf (out, "]");
}
}
void
state_rule_lookaheads_print_xml (state const *s, rule const *r,
FILE *out, int level)
{
/* Find the reduction we are handling. */
reductions *reds = s->reductions;
int red = state_reduction_find (s, r);
/* Print them if there are. */
if (reds->lookaheads && red != -1)
{
bitset_iterator biter;
int k;
xml_puts (out, level, "<lookaheads>");
BITSET_FOR_EACH (biter, reds->lookaheads[red], k, 0)
{
xml_printf (out, level + 1, "<symbol>%s</symbol>",
xml_escape (symbols[k]->tag));
}
xml_puts (out, level, "</lookaheads>");
}
}
/*---------------------.
| A state hash table. |
`---------------------*/
/* Initial capacity of states hash table. */
#define HT_INITIAL_CAPACITY 257
static struct hash_table *state_table = NULL;
/* Two states are equal if they have the same core items. */
static inline bool
state_compare (state const *s1, state const *s2)
{
if (s1->nitems != s2->nitems)
return false;
for (size_t i = 0; i < s1->nitems; ++i)
if (s1->items[i] != s2->items[i])
return false;
return true;
}
static bool
state_comparator (void const *s1, void const *s2)
{
return state_compare (s1, s2);
}
static inline size_t
state_hash (state const *s, size_t tablesize)
{
/* Add up the state's item numbers to get a hash key. */
size_t key = 0;
for (size_t i = 0; i < s->nitems; ++i)
key += s->items[i];
return key % tablesize;
}
static size_t
state_hasher (void const *s, size_t tablesize)
{
return state_hash (s, tablesize);
}
/*-------------------------------.
| Create the states hash table. |
`-------------------------------*/
void
state_hash_new (void)
{
state_table = hash_xinitialize (HT_INITIAL_CAPACITY,
NULL,
state_hasher,
state_comparator,
NULL);
}
/*---------------------------------------------.
| Free the states hash table, not the states. |
`---------------------------------------------*/
void
state_hash_free (void)
{
hash_free (state_table);
}
/*-----------------------------------.
| Insert S in the state hash table. |
`-----------------------------------*/
void
state_hash_insert (state *s)
{
hash_xinsert (state_table, s);
}
/*------------------------------------------------------------------.
| Find the state associated to the CORE, and return it. If it does |
| not exist yet, return NULL. |
`------------------------------------------------------------------*/
state *
state_hash_lookup (size_t nitems, const item_index *core)
{
size_t items_size = nitems * sizeof *core;
state *probe = xmalloc (offsetof (state, items) + items_size);
probe->nitems = nitems;
memcpy (probe->items, core, items_size);
state *entry = hash_lookup (state_table, probe);
free (probe);
return entry;
}
/*--------------------------------------------------------.
| Record S and all states reachable from S in REACHABLE. |
`--------------------------------------------------------*/
static void
state_record_reachable_states (state *s, bitset reachable)
{
if (bitset_test (reachable, s->number))
return;
bitset_set (reachable, s->number);
for (int i = 0; i < s->transitions->num; ++i)
if (!TRANSITION_IS_DISABLED (s->transitions, i))
state_record_reachable_states (s->transitions->states[i], reachable);
}
void
state_remove_unreachable_states (state_number old_to_new[])
{
state_number nstates_reachable = 0;
bitset reachable = bitset_create (nstates, BITSET_FIXED);
state_record_reachable_states (states[0], reachable);
for (state_number i = 0; i < nstates; ++i)
{
if (bitset_test (reachable, states[i]->number))
{
states[nstates_reachable] = states[i];
states[nstates_reachable]->number = nstates_reachable;
old_to_new[i] = nstates_reachable++;
}
else
{
state_free (states[i]);
old_to_new[i] = nstates;
}
}
nstates = nstates_reachable;
bitset_free (reachable);
}
/* All the decorated states, indexed by the state number. */
state **states = NULL;
/*----------------------.
| Free all the states. |
`----------------------*/
void
states_free (void)
{
closure_free ();
for (state_number i = 0; i < nstates; ++i)
state_free (states[i]);
free (states);
}
+286
View File
@@ -0,0 +1,286 @@
/* Type definitions for the finite state machine for Bison.
Copyright (C) 1984, 1989, 2000-2004, 2007, 2009-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* These type definitions are used to represent a nondeterministic
finite state machine that parses the specified grammar. This
information is generated by the function generate_states in the
file LR0.
Each state of the machine is described by a set of items --
particular positions in particular rules -- that are the possible
places where parsing could continue when the machine is in this
state. These symbols at these items are the allowable inputs that
can follow now.
A core represents one state. States are numbered in the NUMBER
field. When generate_states is finished, the starting state is
state 0 and NSTATES is the number of states. (FIXME: This sentence
is no longer true: A transition to a state whose state number is
NSTATES indicates termination.) All the cores are chained together
and FIRST_STATE points to the first one (state 0).
For each state there is a particular symbol which must have been
the last thing accepted to reach that state. It is the
ACCESSING_SYMBOL of the core.
Each core contains a vector of NITEMS items which are the indices
in the RITEM vector of the items that are selected in this state.
The two types of actions are shifts/gotos (push the lookahead token
and read another/goto to the state designated by a nterm) and
reductions (combine the last n things on the stack via a rule,
replace them with the symbol that the rule derives, and leave the
lookahead token alone). When the states are generated, these
actions are represented in two other lists.
Each transition structure describes the possible transitions out of
one state (there are NUM of them). Each contains a vector of
numbers of the states that transitions can go to. The
accessing_symbol fields of those states' cores say what kind of
input leads to them.
A transition to state zero should be ignored: conflict resolution
deletes transitions by having them point to zero.
Each reductions structure describes the possible reductions at the
state whose number is in the number field. rules is an array of
num rules. lookaheads is an array of bitsets, one per rule.
Conflict resolution can decide that certain tokens in certain
states should explicitly be errors (for implementing %nonassoc).
For each state, the tokens that are errors for this reason are
recorded in an errs structure. The generated parser does not
depend on this errs structure, it is used only in the reports
(*.output, etc.) to describe conflicted actions that have been
discarded.
There is at least one goto transition present in state zero. It
leads to a next-to-final state whose accessing_symbol is the
grammar's start symbol. The next-to-final state has one shift to
the final state, whose accessing_symbol is zero (end of input).
The final state has one shift, which goes to the termination state.
The reason for the extra state at the end is to placate the
parser's strategy of making all decisions one token ahead of its
actions. */
#ifndef STATE_H_
# define STATE_H_
# include <stdbool.h>
# include <bitset.h>
# include "gram.h"
# include "symtab.h"
/*-------------------.
| Numbering states. |
`-------------------*/
typedef int state_number;
# define STATE_NUMBER_MAXIMUM INT_MAX
/* Be ready to map a state_number to an int. */
static inline int
state_number_as_int (state_number s)
{
return s;
}
typedef struct state state;
/*--------------.
| Transitions. |
`--------------*/
typedef struct
{
int num; /** Size of destination STATES. */
state *states[1];
} transitions;
/* What is the symbol labelling the transition to
TRANSITIONS->states[Num]? Can be a token (amongst which the error
token), or nonterminals in case of gotos. */
# define TRANSITION_SYMBOL(Transitions, Num) \
(Transitions->states[Num]->accessing_symbol)
/* Is the TRANSITIONS->states[Num] a shift? (as opposed to gotos). */
# define TRANSITION_IS_SHIFT(Transitions, Num) \
(ISTOKEN (TRANSITION_SYMBOL (Transitions, Num)))
/* Is the TRANSITIONS->states[Num] a goto?. */
# define TRANSITION_IS_GOTO(Transitions, Num) \
(!TRANSITION_IS_SHIFT (Transitions, Num))
/* Is the TRANSITIONS->states[Num] labelled by the error token? */
# define TRANSITION_IS_ERROR(Transitions, Num) \
(TRANSITION_SYMBOL (Transitions, Num) == errtoken->content->number)
/* When resolving a SR conflicts, if the reduction wins, the shift is
disabled. */
# define TRANSITION_DISABLE(Transitions, Num) \
(Transitions->states[Num] = NULL)
# define TRANSITION_IS_DISABLED(Transitions, Num) \
(Transitions->states[Num] == NULL)
/* Iterate over each transition over a token (shifts). */
# define FOR_EACH_SHIFT(Transitions, Iter) \
for (Iter = 0; \
Iter < Transitions->num \
&& (TRANSITION_IS_DISABLED (Transitions, Iter) \
|| TRANSITION_IS_SHIFT (Transitions, Iter)); \
++Iter) \
if (!TRANSITION_IS_DISABLED (Transitions, Iter))
/* The destination of the transition (shift/goto) from state S on
label SYM (term or nterm). Abort if none found. */
struct state *transitions_to (state *s, symbol_number sym);
/*-------.
| Errs. |
`-------*/
typedef struct
{
int num;
symbol *symbols[1];
} errs;
errs *errs_new (int num, symbol **tokens);
/*-------------.
| Reductions. |
`-------------*/
typedef struct
{
int num;
bitset *lookaheads;
/* Sorted ascendingly on rule number. */
rule *rules[1];
} reductions;
/*---------.
| states. |
`---------*/
struct state_list;
struct state
{
state_number number;
symbol_number accessing_symbol;
transitions *transitions;
reductions *reductions;
errs *errs;
/* When an includer (such as ielr.c) needs to store states in a list, the
includer can define struct state_list as the list node structure and can
store in this member a reference to the node containing each state. */
struct state_list *state_list;
/* Whether no lookahead sets on reduce actions are needed to decide
what to do in state S. */
bool consistent;
/* If some conflicts were solved thanks to precedence/associativity,
a human readable description of the resolution. */
const char *solved_conflicts;
const char *solved_conflicts_xml;
/* Its items. Must be last, since ITEMS can be arbitrarily large. Sorted
ascendingly on item index in RITEM, which is sorted on rule number. */
size_t nitems;
item_index items[1];
};
extern state_number nstates;
extern state *final_state;
/* Create a new state with ACCESSING_SYMBOL for those items. */
state *state_new (symbol_number accessing_symbol,
size_t core_size, item_index *core);
/* Create a new state with the same kernel as S (same accessing
symbol, transitions, reductions, consistency and items). */
state *state_new_isocore (state const *s);
/* Record that from S we can reach all the DST states (NUM of them). */
void state_transitions_set (state *s, int num, state **dst);
/* Print the transitions of state s for debug. */
void state_transitions_print (const state *s, FILE *out);
/* Set the reductions of STATE. */
void state_reductions_set (state *s, int num, rule **reds);
/* The index of the reduction of state S that corresponds to rule R.
Aborts if there is no reduction of R in S. */
int state_reduction_find (state const *s, rule const *r);
/* Set the errs of STATE. */
void state_errs_set (state *s, int num, symbol **errors);
/* Print on OUT all the lookahead tokens such that this STATE wants to
reduce R. */
void state_rule_lookaheads_print (state const *s, rule const *r, FILE *out);
void state_rule_lookaheads_print_xml (state const *s, rule const *r,
FILE *out, int level);
/* Create/destroy the states hash table. */
void state_hash_new (void);
void state_hash_free (void);
/* Find the state associated to the CORE, and return it. If it does
not exist yet, return NULL. */
state *state_hash_lookup (size_t core_size, const item_index *core);
/* Insert STATE in the state hash table. */
void state_hash_insert (state *s);
/* Remove unreachable states, renumber remaining states, update NSTATES, and
write to OLD_TO_NEW a mapping of old state numbers to new state numbers such
that the old value of NSTATES is written as the new state number for removed
states. The size of OLD_TO_NEW must be the old value of NSTATES. */
void state_remove_unreachable_states (state_number old_to_new[]);
/* All the states, indexed by the state number. */
extern state **states;
/* Free all the states. */
void states_free (void);
#endif /* !STATE_H_ */
@@ -0,0 +1,67 @@
/* Convert version string to int.
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "strversion.h"
#include <errno.h>
#include <intprops.h>
int
strversion_to_int (char const *version)
{
IGNORE_TYPE_LIMITS_BEGIN
int res = 0;
errno = 0;
char *cp = NULL;
{
long major = strtol (version, &cp, 10);
if (errno || cp == version || *cp != '.' || major < 0
|| INT_MULTIPLY_WRAPV (major, 10000, &res))
return -1;
}
{
++cp;
char *prev = cp;
long minor = strtol (cp, &cp, 10);
if (errno || cp == prev || (*cp != '\0' && *cp != '.')
|| ! (0 <= minor && minor < 100)
|| INT_MULTIPLY_WRAPV (minor, 100, &minor)
|| INT_ADD_WRAPV (minor, res, &res))
return -1;
}
if (*cp == '.')
{
++cp;
char *prev = cp;
long micro = strtol (cp, &cp, 10);
if (errno || cp == prev || (*cp != '\0' && *cp != '.')
|| ! (0 <= micro && micro < 100)
|| INT_ADD_WRAPV (micro, res, &res))
return -1;
}
IGNORE_TYPE_LIMITS_END
return res;
}
@@ -0,0 +1,28 @@
/* Convert version string to int.
Copyright (C) 2020-2021 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef STRVERSION_H_
# define STRVERSION_H_
/* Convert VERSION into an int (MAJOR * 10000 + MINOR * 100 + MICRO).
E.g., "3.7.4" => 30704, "3.8" => 30800.
Return -1 on errors. */
int strversion_to_int (char const *version);
#endif
@@ -0,0 +1,255 @@
/* Lists of symbols for Bison
Copyright (C) 2002, 2005-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include "symlist.h"
/*--------------------------------------.
| Create a list containing SYM at LOC. |
`--------------------------------------*/
symbol_list *
symbol_list_sym_new (symbol *sym, location loc)
{
symbol_list *res = xmalloc (sizeof *res);
res->content_type = SYMLIST_SYMBOL;
res->content.sym = sym;
res->sym_loc = loc;
res->named_ref = NULL;
res->midrule = NULL;
res->midrule_parent_rule = NULL;
res->midrule_parent_rhs_index = 0;
/* Members used for LHS only. */
res->rhs_loc = empty_loc;
res->ruleprec = NULL;
res->percent_empty_loc = empty_loc;
code_props_none_init (&res->action_props);
res->dprec = 0;
res->dprec_loc = empty_loc;
res->merger = 0;
res->merger_declaration_loc = empty_loc;
res->expected_sr_conflicts = -1;
res->expected_rr_conflicts = -1;
res->next = NULL;
return res;
}
/*--------------------------------------------.
| Create a list containing TYPE_NAME at LOC. |
`--------------------------------------------*/
symbol_list *
symbol_list_type_new (uniqstr type_name, location loc)
{
symbol_list *res = xmalloc (sizeof *res);
res->content_type = SYMLIST_TYPE;
res->content.sem_type = xmalloc (sizeof (semantic_type));
res->content.sem_type->tag = type_name;
res->content.sem_type->location = loc;
res->content.sem_type->status = undeclared;
res->sym_loc = loc;
res->named_ref = NULL;
res->next = NULL;
return res;
}
symbol_list *
symbol_list_type_set (symbol_list *syms, uniqstr type_name)
{
for (symbol_list *l = syms; l; l = l->next)
symbol_type_set (l->content.sym, type_name, l->sym_loc);
return syms;
}
symbol_list *
symbol_list_find_symbol (symbol_list *l, const symbol *sym)
{
for (/* Nothing. */; l && l->content.sym; l = l->next)
if (l->content.sym == sym)
return l;
return NULL;
}
/*-----------------------------------------------------------------------.
| Print this list, for which every content_type must be SYMLIST_SYMBOL. |
`-----------------------------------------------------------------------*/
void
symbol_list_syms_print (const symbol_list *l, FILE *out)
{
fputc ('[', out);
char const *sep = "";
for (/* Nothing. */; l && l->content.sym; l = l->next)
{
fputs (sep, out);
fputs (l->content_type == SYMLIST_SYMBOL ? "symbol{"
: l->content_type == SYMLIST_TYPE ? "type{"
: "invalid content_type{",
out);
if (l->content_type == SYMLIST_SYMBOL)
symbol_print (l->content.sym, out);
fputs (l->action_props.is_value_used ? " (used)" : " (unused)", out);
putc ('}', out);
sep = ", ";
}
fputc (']', out);
}
/*---------------------------.
| Prepend NODE to the LIST. |
`---------------------------*/
symbol_list *
symbol_list_prepend (symbol_list *list, symbol_list *node)
{
node->next = list;
return node;
}
symbol_list *
symbol_list_last (symbol_list *list)
{
if (!list)
return NULL;
symbol_list *next = list;
while (next->next)
next = next->next;
return next;
}
symbol_list *
symbol_list_append (symbol_list *list, symbol_list *node)
{
if (list)
symbol_list_last (list)->next = node;
else
list = node;
return list;
}
/*-----------------------------------------------.
| Free the LIST, but not the items it contains. |
`-----------------------------------------------*/
void
symbol_list_free (symbol_list *list)
{
for (symbol_list *next; list; list = next)
{
next = list->next;
named_ref_free (list->named_ref);
if (list->content_type == SYMLIST_TYPE)
free (list->content.sem_type);
free (list);
}
}
/*--------------------.
| Return its length. |
`--------------------*/
int
symbol_list_length (symbol_list const *l)
{
int res = 0;
for (/* Nothing. */;
l && !(l->content_type == SYMLIST_SYMBOL && l->content.sym == NULL);
l = l->next)
++res;
return res;
}
/*------------------------------.
| Get item N in symbol list L. |
`------------------------------*/
symbol_list *
symbol_list_n_get (symbol_list *l, int n)
{
aver (0 <= n);
for (int i = 0; i < n; ++i)
{
l = l->next;
aver (l);
}
aver (l->content_type == SYMLIST_SYMBOL);
aver (l->content.sym);
return l;
}
/*--------------------------------------------------------------.
| Get the data type (alternative in the union) of the value for |
| symbol N in symbol list L. |
`--------------------------------------------------------------*/
uniqstr
symbol_list_n_type_name_get (symbol_list *l, int n)
{
return symbol_list_n_get (l, n)->content.sym->content->type_name;
}
bool
symbol_list_null (symbol_list *node)
{
return (!node
|| (node->content_type == SYMLIST_SYMBOL && !node->content.sym));
}
void
symbol_list_code_props_set (symbol_list *node, code_props_type kind,
code_props const *cprops)
{
switch (node->content_type)
{
case SYMLIST_SYMBOL:
symbol_code_props_set (node->content.sym, kind, cprops);
if (node->content.sym->content->status == undeclared)
node->content.sym->content->status = used;
break;
case SYMLIST_TYPE:
semantic_type_code_props_set
(semantic_type_get (node->content.sem_type->tag,
&node->content.sem_type->location),
kind, cprops);
if (node->content.sem_type->status == undeclared)
node->content.sem_type->status = used;
break;
}
}
@@ -0,0 +1,156 @@
/* Lists of symbols for Bison
Copyright (C) 2002, 2005-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef SYMLIST_H_
# define SYMLIST_H_
# include "location.h"
# include "scan-code.h"
# include "symtab.h"
# include "named-ref.h"
/* A list of symbols, used during the parsing for many different
purposes: rules, symbol declarations or properties (such as
%destructor, etc.)... */
typedef struct symbol_list
{
/**
* Whether this node contains a symbol, a semantic type, a \c <*>, or a
* \c <>.
*/
enum {
SYMLIST_SYMBOL,
SYMLIST_TYPE
} content_type;
union {
/**
* The symbol or \c NULL iff
* <tt>symbol_list::content_type = SYMLIST_SYMBOL</tt>.
*/
symbol *sym;
/**
* The semantic type iff <tt>symbol_list::content_type = SYMLIST_TYPE</tt>.
*/
semantic_type *sem_type;
} content;
/* Named reference. */
named_ref *named_ref;
/* Proper location of the symbol, not all the rule */
location sym_loc;
/* If this symbol is the generated lhs for a midrule but this is the rule in
whose rhs it appears, MIDRULE = a pointer to that midrule. */
struct symbol_list *midrule;
/* If this symbol is the generated lhs for a midrule and this is that
midrule, MIDRULE_PARENT_RULE = a pointer to the rule in whose rhs it
appears, and MIDRULE_PARENT_RHS_INDEX = its rhs index (1-origin) in the
parent rule. */
struct symbol_list *midrule_parent_rule;
int midrule_parent_rhs_index;
/*--------------------------------------------------------------.
| Used for rules only (attached to the "LHS", one per rule even |
| when several RHSs are bound to a single lhs via "|"). |
`--------------------------------------------------------------*/
/* Location of the RHS. */
location rhs_loc;
/* Precedence/associativity. */
symbol *ruleprec;
/* The action is attached to the LHS of a rule, but action properties for
* each RHS are also stored here. */
code_props action_props;
/* The location of the first %empty for this rule, or \a
empty_loc. */
location percent_empty_loc;
int dprec;
location dprec_loc;
int merger;
location merger_declaration_loc;
/* Counts of the number of expected conflicts for this rule, or -1 if none
given. */
int expected_sr_conflicts;
int expected_rr_conflicts;
/* The list. */
struct symbol_list *next;
} symbol_list;
/** Create a list containing \c sym at \c loc. */
symbol_list *symbol_list_sym_new (symbol *sym, location loc);
/** Create a list containing \c type_name at \c loc. */
symbol_list *symbol_list_type_new (uniqstr type_name, location loc);
/** Assign the type \c type_name to all the members of \c syms.
** \returns \c syms */
symbol_list *symbol_list_type_set (symbol_list *syms, uniqstr type_name);
/** Find a symbol with the same content as \c sym within \c syms. */
symbol_list *symbol_list_find_symbol (symbol_list *syms, const symbol *sym);
/** Print this list.
\pre For every node \c n in the list, <tt>n->content_type =
SYMLIST_SYMBOL</tt>. */
void symbol_list_syms_print (const symbol_list *l, FILE *f);
/** Prepend \c node to \c list. */
symbol_list *symbol_list_prepend (symbol_list *list, symbol_list *node);
/** The last node of this list. */
symbol_list *symbol_list_last (symbol_list *list);
/** Append \c node to \c list. */
symbol_list *symbol_list_append (symbol_list *list, symbol_list *node);
/** Free \c list, but not the items it contains. */
void symbol_list_free (symbol_list *list);
/** Return the length of \c l. */
int symbol_list_length (symbol_list const *l);
/** Get item \c n in symbol list \c l.
** \pre 0 <= n
** \post res != NULL
**/
symbol_list *symbol_list_n_get (symbol_list *l, int n);
/** Get the data type (alternative in the union) of the value for
symbol N in rule RULE. */
uniqstr symbol_list_n_type_name_get (symbol_list *l, int n);
/** Check whether the node is a border element of a rule. */
bool symbol_list_null (symbol_list *node);
/** Set the \c \%destructor or \c \%printer for \c node as \c cprops. */
void symbol_list_code_props_set (symbol_list *node, code_props_type kind,
code_props const *cprops);
#endif /* !SYMLIST_H_ */
File diff suppressed because it is too large Load Diff
+365
View File
@@ -0,0 +1,365 @@
/* Definitions for symtab.c and callers, part of Bison.
Copyright (C) 1984, 1989, 1992, 2000-2002, 2004-2015, 2018-2021 Free
Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/**
* \file symtab.h
* \brief Manipulating ::symbol.
*/
#ifndef SYMTAB_H_
# define SYMTAB_H_
# include "assoc.h"
# include "location.h"
# include "scan-code.h"
# include "uniqstr.h"
/*----------.
| Symbols. |
`----------*/
/** Symbol classes. */
typedef enum
{
/** Undefined. */
unknown_sym,
/** Declared with %type: same as Undefined, but triggered a Wyacc if
applied to a terminal. */
pct_type_sym,
/** Terminal. */
token_sym,
/** Nonterminal. */
nterm_sym
} symbol_class;
/** Internal token numbers. */
typedef int symbol_number;
# define SYMBOL_NUMBER_MAXIMUM INT_MAX
typedef struct symbol symbol;
typedef struct sym_content sym_content;
/* Declaration status of a symbol.
First, it is "undeclared". Then, if "undeclared" and used in a
%printer/%destructor, it is "used". If not "declared" but used in
a rule, it is "needed". Finally, if declared (via a rule for
nonterminals, or %token), it is "declared".
When status are checked at the end, "declared" symbols are fine,
"used" symbols trigger warnings, otherwise it's an error. */
typedef enum
{
/** Used in the input file for an unknown reason (error). */
undeclared,
/** Used by %destructor/%printer but not defined (warning). */
used,
/** Used in the grammar (rules) but not defined (error). */
needed,
/** Defined with %type or %token (good). */
declared,
} declaration_status;
enum code_props_type
{
destructor = 0,
printer = 1,
};
typedef enum code_props_type code_props_type;
enum { CODE_PROPS_SIZE = 2 };
struct symbol
{
/** The key, name of the symbol. */
uniqstr tag;
/** The "defining" location. */
location location;
/** Whether this symbol is translatable. */
bool translatable;
/** Whether \a location is about the first uses as left-hand side
symbol of a rule (true), or simply the first occurrence (e.g.,
in a %type, or as a rhs symbol of a rule). The former type of
location is more natural in error messages. This Boolean helps
moving from location of the first occurrence to first use as
lhs. */
bool location_of_lhs;
/** Points to the other in the symbol-string pair for an alias. */
symbol *alias;
/** Whether this symbol is the alias of another or not. */
bool is_alias;
/** All the info about the pointed symbol is there. */
sym_content *content;
};
struct sym_content
{
/** The main symbol that denotes this content (it contains the
possible alias). */
symbol *symbol;
/** Its \c \%type.
Beware that this is the type_name as was entered by the user,
including silly things such as "]" if she entered "%token <]> t".
Therefore, when outputting type_name to M4, be sure to escape it
into "@}". See quoted_output for instance. */
uniqstr type_name;
/** Its \c \%type's location. */
location type_loc;
/** Any \c \%destructor (resp. \%printer) declared specifically for this
symbol.
Access this field only through <tt>symbol</tt>'s interface functions. For
example, if <tt>symbol::destructor = NULL</tt> (resp. <tt>symbol::printer
= NULL</tt>), a default \c \%destructor (resp. \%printer) or a per-type
\c symbol_destructor_printer_get will compute the correct one. */
code_props props[CODE_PROPS_SIZE];
symbol_number number;
location prec_loc;
int prec;
assoc assoc;
/** Token code, possibly specified by the user (%token FOO 42). */
int code;
symbol_class class;
declaration_status status;
};
/** Fetch (or create) the symbol associated to KEY. */
symbol *symbol_from_uniqstr (const uniqstr key, location loc);
/** Fetch (or create) the symbol associated to KEY. */
symbol *symbol_get (const char *key, location loc);
/** Generate a dummy nonterminal.
Its name cannot conflict with the user's names. */
symbol *dummy_symbol_get (location loc);
/*--------------------.
| Methods on symbol. |
`--------------------*/
/** Print a symbol (for debugging). */
void symbol_print (symbol const *s, FILE *f);
/** Is this a dummy nonterminal? */
bool symbol_is_dummy (symbol const *sym);
/** The name of the code_props type: "\%destructor" or "\%printer". */
char const *code_props_type_string (code_props_type kind);
/** The name of the symbol that can be used as an identifier.
** Consider the alias if needed.
** Return 0 if there is none (e.g., the symbol is only defined as
** a string). */
uniqstr symbol_id_get (symbol const *sym);
/**
* Make \c str the literal string alias of \c sym. Copy token number,
* symbol number, and type from \c sym to \c str.
*/
void symbol_make_alias (symbol *sym, symbol *str, location loc);
/**
* This symbol is used as the lhs of a rule. Record this location
* as definition point, if not already done.
*/
void symbol_location_as_lhs_set (symbol *sym, location loc);
/** Set the \c type_name associated with \c sym.
Do nothing if passed 0 as \c type_name. */
void symbol_type_set (symbol *sym, uniqstr type_name, location loc);
/** Set the \c \%destructor or \c \%printer associated with \c sym. */
void symbol_code_props_set (symbol *sym, code_props_type kind,
code_props const *destructor);
/** Get the computed \c \%destructor or \c %printer for \c sym, which was
initialized with \c code_props_none_init if there's no \c \%destructor or
\c %printer. */
code_props *symbol_code_props_get (symbol *sym, code_props_type kind);
/** Set the \c precedence associated with \c sym.
Ensure that \a symbol is a terminal.
Do nothing if invoked with \c undef_assoc as \c assoc. */
void symbol_precedence_set (symbol *sym, int prec, assoc a, location loc);
/** Set the \c class associated with \c sym.
Whether \c declaring means whether this class definition comes
from %nterm or %token (but not %type, prec/assoc, etc.). A symbol
can have "declaring" set only at most once. */
void symbol_class_set (symbol *sym, symbol_class class, location loc,
bool declaring);
/** Set the token \c code of \c sym, specified by the user at \c loc. */
void symbol_code_set (symbol *sym, int code, location loc);
/*------------------.
| Special symbols. |
`------------------*/
/** The error token. */
extern symbol *errtoken;
/** The token for unknown tokens. */
extern symbol *undeftoken;
/** The end of input token. */
extern symbol *eoftoken;
/** The genuine start symbol.
$accept: start-symbol $end */
extern symbol *acceptsymbol;
/** Whether a symbol declared with a type tag. */
extern bool tag_seen;
/*-------------------.
| Symbol Relations. |
`-------------------*/
/* The symbol relations are represented by a directed graph. */
/* The id of a node */
typedef int graphid;
typedef struct symgraphlink symgraphlink;
struct symgraphlink
{
/** The second \c symbol or group of a precedence relation.
* See \c symgraph. */
graphid id;
symgraphlink *next;
};
/* Symbol precedence graph, to store the used precedence relations between
* symbols. */
typedef struct symgraph symgraph;
struct symgraph
{
/** Identifier for the node: equal to the number of the symbol. */
graphid id;
/** The list of related symbols that have a smaller precedence. */
symgraphlink *succ;
/** The list of related symbols that have a greater precedence. */
symgraphlink *pred;
};
/** Register a new precedence relation as used. */
void register_precedence (graphid first, graphid snd);
/** Print a warning for each symbol whose precedence and/or associativity
* is useless. */
void print_precedence_warnings (void);
/*----------------------.
| Symbol associativity |
`----------------------*/
void register_assoc (graphid i, graphid j);
/*-----------------.
| Semantic types. |
`-----------------*/
/** A semantic type and its associated \c \%destructor and \c \%printer.
Access the fields of this struct only through the interface functions in
this file. \sa symbol::destructor */
typedef struct {
/** The key, name of the semantic type. */
uniqstr tag;
/** The location of its first occurrence. */
location location;
/** Its status : "undeclared", "used" or "declared".
It cannot be "needed". */
declaration_status status;
/** Any \c %destructor and %printer declared for this
semantic type. */
code_props props[CODE_PROPS_SIZE];
} semantic_type;
/** Fetch (or create) the semantic type associated to KEY. */
semantic_type *semantic_type_from_uniqstr (const uniqstr key,
const location *loc);
/** Fetch (or create) the semantic type associated to KEY. */
semantic_type *semantic_type_get (const char *key, const location *loc);
/** Set the \c destructor or \c printer associated with \c type. */
void semantic_type_code_props_set (semantic_type *type,
code_props_type kind,
code_props const *code);
/*----------------------------------.
| Symbol and semantic type tables. |
`----------------------------------*/
/** Create the symbol and semantic type tables, and the built-in
symbols. */
void symbols_new (void);
/** Free all the memory allocated for symbols and semantic types. */
void symbols_free (void);
/** Check that all the symbols are defined.
Report any undefined symbols and consider them nonterminals. */
void symbols_check_defined (void);
/** Sanity checks and #token_translations construction.
Perform various sanity checks, assign symbol numbers, and set up
#token_translations. */
void symbols_pack (void);
#endif /* !SYMTAB_H_ */
+282
View File
@@ -0,0 +1,282 @@
/* System-dependent definitions for Bison.
Copyright (C) 2000-2007, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef BISON_SYSTEM_H
# define BISON_SYSTEM_H
/* flex 2.5.31 gratuitously defines macros like INT8_MIN. But this
runs afoul of pre-C99 compilers that have <inttypes.h> or
<stdint.h>, which are included below if available. It also runs
afoul of pre-C99 compilers that define these macros in <limits.h>. */
# if ! defined __STDC_VERSION__ || __STDC_VERSION__ < 199901
# undef INT8_MIN
# undef INT16_MIN
# undef INT32_MIN
# undef INT8_MAX
# undef INT16_MAX
# undef UINT8_MAX
# undef INT32_MAX
# undef UINT16_MAX
# undef UINT32_MAX
# endif
# include <limits.h>
# include <stddef.h>
# include <stdlib.h>
# include <string.h>
# define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array))
# define STREQ(L, R) (strcmp(L, R) == 0)
# define STRNEQ(L, R) (!STREQ(L, R))
/* Just like strncmp, but the second argument must be a literal string
and you don't specify the length. */
# define STRNCMP_LIT(S, Literal) \
strncmp (S, "" Literal "", sizeof (Literal) - 1)
/* Whether Literal is a prefix of S. */
# define STRPREFIX_LIT(Literal, S) \
(STRNCMP_LIT (S, Literal) == 0)
# include <unistd.h>
# include <inttypes.h>
# ifndef UINTPTR_MAX
/* This isn't perfect, but it's good enough for Bison, which needs
only to hash pointers. */
typedef size_t uintptr_t;
# endif
/* Version mismatch. */
# define EX_MISMATCH 63
/*---------.
| Gnulib. |
`---------*/
# include <unlocked-io.h>
# include <verify.h>
# include <xalloc.h>
// Clang and ICC like to pretend they are GCC.
# if defined __GNUC__ && !defined __clang__ && !defined __ICC
# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# endif
// See https://lists.gnu.org/r/bug-bison/2019-10/msg00061.html
// and https://trac.macports.org/ticket/59927.
# if defined GCC_VERSION && 405 <= GCC_VERSION
# define IGNORE_TYPE_LIMITS_BEGIN \
_Pragma ("GCC diagnostic push") \
_Pragma ("GCC diagnostic ignored \"-Wtype-limits\"")
# define IGNORE_TYPE_LIMITS_END \
_Pragma ("GCC diagnostic pop")
# else
# define IGNORE_TYPE_LIMITS_BEGIN
# define IGNORE_TYPE_LIMITS_END
# endif
/*-----------------.
| GCC extensions. |
`-----------------*/
/* Use PACIFY_CC to indicate that Code is unimportant to the logic of Bison
but that it is necessary for suppressing compiler warnings. For example,
Code might be a variable initializer that's always overwritten before the
variable is used.
PACIFY_CC is intended to be useful only as a comment as it does not alter
Code. It is tempting to redefine PACIFY_CC so that it will suppress Code
when configuring without --enable-gcc-warnings. However, that would mean
that, for maintainers, Bison would compile with potentially less warnings
and safer logic than it would for users. Due to the overhead of M4,
suppressing Code is unlikely to offer any significant improvement in
Bison's performance anyway. */
# define PACIFY_CC(Code) Code
# include <attribute.h>
/*------.
| NLS. |
`------*/
# include <locale.h>
# include <gettext.h>
# define _(Msgid) gettext (Msgid)
# define N_(Msgid) (Msgid)
/*-----------.
| Booleans. |
`-----------*/
# include <stdbool.h>
/*-----------.
| Integers. |
`-----------*/
static inline int
min_int (int a, int b)
{
return a < b ? a : b;
}
static inline int
max_int (int a, int b)
{
return a >= b ? a : b;
}
/*-------------.
| Assertions. |
`-------------*/
/* In the past, Bison defined aver to simply invoke abort in the case of
a failed assertion. The rationale was that <assert.h>'s assertions
were too heavyweight and could be disabled too easily. See
discussions at
<https://lists.gnu.org/r/bison-patches/2006-01/msg00080.html>
<https://lists.gnu.org/r/bison-patches/2006-09/msg00111.html>.
However, normal assert output can be helpful during development and
in bug reports from users. Moreover, it's not clear now that
<assert.h>'s assertions are significantly heavyweight. Finally, if
users want to experiment with disabling assertions, it's debatable
whether it's our responsibility to stop them. See discussion
starting at
<https://lists.gnu.org/r/bison-patches/2009-09/msg00013.html>.
For now, we use assert but we call it aver throughout Bison in case
we later wish to try another scheme.
*/
# include <assert.h>
# define aver assert
/*-----------.
| Obstacks. |
`-----------*/
# define obstack_chunk_alloc xmalloc
# define obstack_chunk_free free
# include <obstack.h>
/* String-grow: append Str to Obs. */
# define obstack_sgrow(Obs, Str) \
obstack_grow (Obs, Str, strlen (Str))
/* Output Str escaped to be a string.
For instance "\"foo\"" -> "\\\"foo\\\"". */
# define obstack_backslash(Obs, Str) \
do { \
char const *p__; \
for (p__ = Str; *p__; p__++) \
switch (*p__) \
{ \
case '"': obstack_sgrow (Obs, "\\\""); break; \
case '\\': obstack_sgrow (Obs, "\\\\"); break; \
default: obstack_1grow (Obs, *p__); break; \
} \
} while (0)
/* Output Str escaped for our postprocessing (i.e., escape M4 special
characters).
For instance "[foo]" -> "@{foo@}", "$$" -> "$][$][". */
# define obstack_escape(Obs, Str) \
do { \
char const *p__; \
for (p__ = Str; *p__; p__++) \
switch (*p__) \
{ \
case '$': obstack_sgrow (Obs, "$]["); break; \
case '@': obstack_sgrow (Obs, "@@" ); break; \
case '[': obstack_sgrow (Obs, "@{" ); break; \
case ']': obstack_sgrow (Obs, "@}" ); break; \
default: obstack_1grow (Obs, *p__ ); break; \
} \
} while (0)
/* Output Str both quoted for M4 (i.e., embed in [[...]]), and escaped
for our postprocessing (i.e., escape M4 special characters). If
Str is empty (or NULL), output "[]" instead of "[[]]" as it makes
M4 programming easier (m4_ifval can be used).
For instance "[foo]" -> "[[@{foo@}]]", "$$" -> "[[$][$][]]". */
# define obstack_quote(Obs, Str) \
do { \
char const* obstack_quote_p = Str; \
if (obstack_quote_p && obstack_quote_p[0]) \
{ \
obstack_sgrow (Obs, "[["); \
obstack_escape (Obs, obstack_quote_p); \
obstack_sgrow (Obs, "]]"); \
} \
else \
obstack_sgrow (Obs, "[]"); \
} while (0)
/* Append the ending 0, finish Obs, and return the string. */
# define obstack_finish0(Obs) \
(obstack_1grow (Obs, '\0'), (char *) obstack_finish (Obs))
/*-----------------------------------------.
| Extensions to use for the output files. |
`-----------------------------------------*/
# ifndef OUTPUT_EXT
# define OUTPUT_EXT ".output"
# endif
# ifndef TAB_EXT
# define TAB_EXT ".tab"
# endif
/*---------------------.
| Free a linked list. |
`---------------------*/
# define LIST_FREE(Type, List) \
do { \
Type *_node, *_next; \
for (_node = List; _node; _node = _next) \
{ \
_next = _node->next; \
free (_node); \
} \
} while (0)
#endif /* ! BISON_SYSTEM_H */
+910
View File
@@ -0,0 +1,910 @@
/* Output the generated parsing program for Bison.
Copyright (C) 1984, 1986, 1989, 1992, 2000-2006, 2009-2015, 2018-2021
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <bitset.h>
#include <bitsetv.h>
#include "complain.h"
#include "conflicts.h"
#include "files.h"
#include "getargs.h"
#include "gram.h"
#include "lalr.h"
#include "muscle-tab.h"
#include "reader.h"
#include "symtab.h"
#include "tables.h"
/* Several tables are indexed both by state and nonterminal numbers.
We call such an index a 'vector'; i.e., a vector is either a state
or a nonterminal number.
Of course vector_number_t ought to be wide enough to contain
state_number and symbol_number. */
typedef int vector_number;
#if 0 /* Not currently used. */
static inline vector_number
state_number_to_vector_number (state_number s)
{
return s;
}
#endif
static inline vector_number
symbol_number_to_vector_number (symbol_number sym)
{
return state_number_as_int (nstates) + sym - ntokens;
}
int nvectors;
/* FROMS and TOS are indexed by vector_number.
If VECTOR is a nonterminal, (FROMS[VECTOR], TOS[VECTOR]) form an
array of state numbers of the non defaulted GOTO on VECTOR.
If VECTOR is a state, TOS[VECTOR] is the array of actions to do on
the (array of) symbols FROMS[VECTOR].
In both cases, TALLY[VECTOR] is the size of the arrays
FROMS[VECTOR], TOS[VECTOR]; and WIDTH[VECTOR] =
(FROMS[VECTOR][SIZE] - FROMS[VECTOR][0] + 1) where SIZE =
TALLY[VECTOR].
FROMS therefore contains symbol_number and action_number,
TOS state_number and action_number,
TALLY sizes,
WIDTH differences of FROMS.
Let base_number be the type of FROMS, TOS, and WIDTH. */
#define BASE_MAXIMUM INT_MAX
#define BASE_MINIMUM INT_MIN
static base_number **froms;
static base_number **tos;
static int **conflict_tos;
static size_t *tally;
static base_number *width;
/* For a given state, N = ACTROW[SYMBOL]:
If N = 0, stands for 'run the default action'.
If N = MIN, stands for 'raise a syntax error'.
If N > 0, stands for 'shift SYMBOL and go to n'.
If N < 0, stands for 'reduce -N'. */
typedef int action_number;
#define ACTION_NUMBER_MINIMUM INT_MIN
static action_number *actrow;
/* FROMS and TOS are reordered to be compressed. ORDER[VECTOR] is the
new vector number of VECTOR. We skip 'empty' vectors (i.e.,
TALLY[VECTOR] = 0), and call these 'entries'. */
static vector_number *order;
static int nentries;
base_number *base = NULL;
/* A distinguished value of BASE, negative infinite. During the
computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
keep parser tables small. */
base_number base_ninf = 0;
/* Bitset representing an integer set in the range
POS_SET_OFFSET..(POS_SET_OFFSET + SIZE). POS_SET_OFFSET is
nonpositive. */
static bitset pos_set = NULL;
/* The integer denoted by bitno 0 in pos_set. */
static int pos_set_base = 0;
static int *conflrow;
int *conflict_table;
int *conflict_list;
int conflict_list_cnt;
static int conflict_list_free;
/* TABLE_SIZE is the allocated size of both TABLE and CHECK. We start
with more or less the original hard-coded value (which was
SHRT_MAX). */
static int table_size = 32768;
base_number *table;
base_number *check;
/* The value used in TABLE to denote explicit syntax errors
(%nonassoc), a negative infinite. First defaults to ACTION_NUMBER_MINIMUM,
but in order to keep small tables, renumbered as TABLE_ERROR, which
is the smallest (non error) value minus 1. */
base_number table_ninf = 0;
static int lowzero;
int high;
state_number *yydefgoto;
rule_number *yydefact;
/*----------.
| pos_set. |
`----------*/
#if 0
static void
pos_set_dump (void)
{
fprintf (stderr, "pos_set (%ld, %d) =", bitset_size (pos_set), pos_set_base);
bitset_iterator biter;
int i;
BITSET_FOR_EACH (biter, pos_set, i, 0)
fprintf (stderr, " %d", i + pos_set_base);
putc ('\n', stderr);
}
#endif
/* The size and base of POS_SET are not known, we need to be able to
move the base farther "on the left", and grow "on the right".
It would be nice to be able to predict the base accurately, but it
seems difficult (-nstates seems to work most of the time, except
when there are useless tokens).
FIXME: The current approach is correct, but with poor performances.
Bitsets need to support 'assign' and 'shift'. And instead of
extending POS_SET just for the out-of-range new values, we need
something like doubling the size.
*/
static void
pos_set_set (int pos)
{
int bitno = pos - pos_set_base;
if (bitno < 0)
{
// Need more room on the left.
// DELTA is positive. Run 'pos_set >> delta'.
const int delta = pos_set_base - pos;
const int old_size = bitset_size (pos_set);
const int new_size = old_size + delta;
bitset_resize (pos_set, new_size);
// Right-shift all the bits by DELTA. Be sure to reset the new
// bits on the left.
//
// FIXME: add bitset_assign, and bitset_shift?
for (int i = new_size - 1; 0 <= i ; --i)
if (delta <= i && bitset_test (pos_set, i - delta))
bitset_set (pos_set, i);
else
bitset_reset (pos_set, i);
pos_set_base = pos;
bitno = 0;
}
else if (bitset_size (pos_set) <= bitno)
// Need more room on the right.
bitset_resize (pos_set, bitno + 1);
bitset_set (pos_set, bitno);
}
static bool
pos_set_test (int pos)
{
const int bitno = pos - pos_set_base;
return bitset_test (pos_set, bitno);
}
/*-------------------------------------------------------------------.
| If TABLE, CONFLICT_TABLE, and CHECK are too small to be addressed |
| at DESIRED, grow them. TABLE[DESIRED] can be used, so the desired |
| size is at least DESIRED + 1. |
`-------------------------------------------------------------------*/
static void
table_grow (int desired)
{
int old_size = table_size;
while (table_size <= desired)
table_size *= 2;
if (trace_flag & trace_resource)
fprintf (stderr, "growing tables from %d to %d\n",
old_size, table_size);
table = xnrealloc (table, table_size, sizeof *table);
memset (table + old_size, 0,
sizeof *table * (table_size - old_size));
conflict_table = xnrealloc (conflict_table, table_size,
sizeof *conflict_table);
memset (conflict_table + old_size, 0,
sizeof *conflict_table * (table_size - old_size));
check = xnrealloc (check, table_size, sizeof *check);
for (int i = old_size; i < table_size; ++i)
check[i] = -1;
}
/*-------------------------------------------------------------------.
| For GLR parsers, for each conflicted token in S, as indicated |
| by non-zero entries in CONFLROW, create a list of possible |
| reductions that are alternatives to the shift or reduction |
| currently recorded for that token in S. Store the alternative |
| reductions followed by a 0 in CONFLICT_LIST, updating |
| CONFLICT_LIST_CNT, and storing an index to the start of the list |
| back into CONFLROW. |
`-------------------------------------------------------------------*/
static void
conflict_row (state *s)
{
if (!nondeterministic_parser)
return;
const reductions *reds = s->reductions;
for (state_number j = 0; j < ntokens; j += 1)
if (conflrow[j])
{
conflrow[j] = conflict_list_cnt;
/* Find all reductions for token J, and record all that do not
match ACTROW[J]. */
for (int i = 0; i < reds->num; i += 1)
if (bitset_test (reds->lookaheads[i], j)
&& (actrow[j]
!= rule_number_as_item_number (reds->rules[i]->number)))
{
aver (0 < conflict_list_free);
conflict_list[conflict_list_cnt] = reds->rules[i]->number + 1;
conflict_list_cnt += 1;
conflict_list_free -= 1;
}
/* Leave a 0 at the end. */
aver (0 < conflict_list_free);
conflict_list[conflict_list_cnt] = 0;
conflict_list_cnt += 1;
conflict_list_free -= 1;
}
}
/*------------------------------------------------------------------.
| Decide what to do for each type of token if seen as the |
| lookahead in specified state. The value returned is used as the |
| default action (yydefact) for the state. In addition, ACTROW is |
| filled with what to do for each kind of token, index by symbol |
| number, with zero meaning do the default action. The value |
| ACTION_NUMBER_MINIMUM, a very negative number, means this |
| situation is an error. The parser recognizes this value |
| specially. |
| |
| This is where conflicts are resolved. The loop over lookahead |
| rules considered lower-numbered rules last, and the last rule |
| considered that likes a token gets to handle it. |
| |
| For GLR parsers, also sets CONFLROW[SYM] to an index into |
| CONFLICT_LIST iff there is an unresolved conflict (s/r or r/r) |
| with symbol SYM. The default reduction is not used for a symbol |
| that has any such conflicts. |
`------------------------------------------------------------------*/
static rule *
action_row (state *s)
{
for (state_number i = 0; i < ntokens; i++)
actrow[i] = conflrow[i] = 0;
reductions *reds = s->reductions;
bool conflicted = false;
if (reds->lookaheads)
/* loop over all the rules available here which require
lookahead (in reverse order to give precedence to the first
rule) */
for (int i = reds->num - 1; 0 <= i; --i)
/* and find each token which the rule finds acceptable
to come next */
{
bitset_iterator biter;
int j;
BITSET_FOR_EACH (biter, reds->lookaheads[i], j, 0)
{
/* and record this rule as the rule to use if that
token follows. */
if (actrow[j] != 0)
{
conflicted = true;
conflrow[j] = 1;
}
actrow[j] = rule_number_as_item_number (reds->rules[i]->number);
}
}
/* Now see which tokens are allowed for shifts in this state. For
them, record the shift as the thing to do. So shift is preferred
to reduce. */
transitions *trans = s->transitions;
/* Set to nonzero to inhibit having any default reduction. */
bool nodefault = false;
{
int i;
FOR_EACH_SHIFT (trans, i)
{
symbol_number sym = TRANSITION_SYMBOL (trans, i);
state *shift_state = trans->states[i];
if (actrow[sym] != 0)
{
conflicted = true;
conflrow[sym] = 1;
}
actrow[sym] = state_number_as_int (shift_state->number);
/* Do not use any default reduction if there is a shift for
error */
if (sym == errtoken->content->number)
nodefault = true;
}
}
/* See which tokens are an explicit error in this state (due to
%nonassoc). For them, record ACTION_NUMBER_MINIMUM as the
action. */
errs *errp = s->errs;
for (int i = 0; i < errp->num; i++)
{
symbol *sym = errp->symbols[i];
actrow[sym->content->number] = ACTION_NUMBER_MINIMUM;
}
/* Turn off default reductions where requested by the user. See
state_lookaheads_count in lalr.c to understand when states are
labeled as consistent. */
{
char *default_reductions =
muscle_percent_define_get ("lr.default-reduction");
if (STRNEQ (default_reductions, "most") && !s->consistent)
nodefault = true;
free (default_reductions);
}
/* Now find the most common reduction and make it the default action
for this state. */
rule *default_reduction = NULL;
if (reds->num >= 1 && !nodefault)
{
if (s->consistent)
default_reduction = reds->rules[0];
else
{
int max = 0;
for (int i = 0; i < reds->num; i++)
{
int count = 0;
rule *r = reds->rules[i];
for (symbol_number j = 0; j < ntokens; j++)
if (actrow[j] == rule_number_as_item_number (r->number))
count++;
if (count > max)
{
max = count;
default_reduction = r;
}
}
/* GLR parsers need space for conflict lists, so we can't
default conflicted entries. For non-conflicted entries
or as long as we are not building a GLR parser,
actions that match the default are replaced with zero,
which means "use the default". */
if (0 < max)
for (symbol_number j = 0; j < ntokens; j++)
if (actrow[j]
== rule_number_as_item_number (default_reduction->number)
&& ! (nondeterministic_parser && conflrow[j]))
actrow[j] = 0;
}
}
/* If have no default reduction, the default is an error.
So replace any action which says "error" with "use default". */
if (!default_reduction)
for (symbol_number i = 0; i < ntokens; i++)
if (actrow[i] == ACTION_NUMBER_MINIMUM)
actrow[i] = 0;
if (conflicted)
conflict_row (s);
return default_reduction;
}
/*----------------------------------------.
| Set FROMS, TOS, TALLY and WIDTH for S. |
`----------------------------------------*/
static void
save_row (state_number s)
{
/* Number of non default actions in S. */
size_t count = 0;
for (symbol_number i = 0; i < ntokens; i++)
if (actrow[i] != 0)
count++;
if (count)
{
/* Allocate non defaulted actions. */
base_number *sp1 = froms[s] = xnmalloc (count, sizeof *sp1);
base_number *sp2 = tos[s] = xnmalloc (count, sizeof *sp2);
int *sp3 = conflict_tos[s] =
nondeterministic_parser ? xnmalloc (count, sizeof *sp3) : NULL;
/* Store non defaulted actions. */
for (symbol_number i = 0; i < ntokens; i++)
if (actrow[i] != 0)
{
*sp1++ = i;
*sp2++ = actrow[i];
if (nondeterministic_parser)
*sp3++ = conflrow[i];
}
tally[s] = count;
width[s] = sp1[-1] - froms[s][0] + 1;
}
}
/*------------------------------------------------------------------.
| Figure out the actions for the specified state, indexed by |
| lookahead token kind. |
| |
| The YYDEFACT table is output now. The detailed info is saved for |
| putting into YYTABLE later. |
`------------------------------------------------------------------*/
static void
token_actions (void)
{
int nconflict = nondeterministic_parser ? conflicts_total_count () : 0;
yydefact = xnmalloc (nstates, sizeof *yydefact);
actrow = xnmalloc (ntokens, sizeof *actrow);
conflrow = xnmalloc (ntokens, sizeof *conflrow);
conflict_list = xnmalloc (1 + 2 * nconflict, sizeof *conflict_list);
conflict_list_free = 2 * nconflict;
conflict_list_cnt = 1;
/* Find the rules which are reduced. */
if (!nondeterministic_parser)
for (rule_number r = 0; r < nrules; ++r)
rules[r].useful = false;
for (state_number i = 0; i < nstates; ++i)
{
rule *default_reduction = action_row (states[i]);
yydefact[i] = default_reduction ? default_reduction->number + 1 : 0;
save_row (i);
/* Now that the parser was computed, we can find which rules are
really reduced, and which are not because of SR or RR
conflicts. */
if (!nondeterministic_parser)
{
for (symbol_number j = 0; j < ntokens; ++j)
if (actrow[j] < 0 && actrow[j] != ACTION_NUMBER_MINIMUM)
rules[item_number_as_rule_number (actrow[j])].useful = true;
if (yydefact[i])
rules[yydefact[i] - 1].useful = true;
}
}
free (actrow);
free (conflrow);
}
/*------------------------------------------------------------------.
| Compute FROMS[VECTOR], TOS[VECTOR], TALLY[VECTOR], WIDTH[VECTOR], |
| i.e., the information related to non defaulted GOTO on the nterm |
| SYM. |
| |
| DEFAULT_STATE is the principal destination on SYM, i.e., the |
| default GOTO destination on SYM. |
`------------------------------------------------------------------*/
static void
save_column (symbol_number sym, state_number default_state)
{
const goto_number begin = goto_map[sym - ntokens];
const goto_number end = goto_map[sym - ntokens + 1];
/* Number of non default GOTO. */
size_t count = 0;
for (goto_number i = begin; i < end; i++)
if (to_state[i] != default_state)
count++;
if (count)
{
/* Allocate room for non defaulted gotos. */
vector_number symno = symbol_number_to_vector_number (sym);
base_number *sp1 = froms[symno] = xnmalloc (count, sizeof *sp1);
base_number *sp2 = tos[symno] = xnmalloc (count, sizeof *sp2);
/* Store the state numbers of the non defaulted gotos. */
for (goto_number i = begin; i < end; i++)
if (to_state[i] != default_state)
{
*sp1++ = from_state[i];
*sp2++ = to_state[i];
}
tally[symno] = count;
width[symno] = sp1[-1] - froms[symno][0] + 1;
}
}
/*----------------------------------------------------------------.
| The default state for SYM: the state which is 'the' most common |
| GOTO destination on SYM (an nterm). |
`----------------------------------------------------------------*/
static state_number
default_goto (symbol_number sym, size_t state_count[])
{
const goto_number begin = goto_map[sym - ntokens];
const goto_number end = goto_map[sym - ntokens + 1];
/* In the case this symbol is never reduced to ($accept), use state
0. We used to use -1, but as a result the yydefgoto table must
be signed, which (1) might trigger compiler warnings when storing
a value from yydefgoto into a state number (nonnegative), and (2)
wastes bits which might result in using a int16 where a uint8
suffices. */
state_number res = 0;
if (begin != end)
{
for (state_number s = 0; s < nstates; s++)
state_count[s] = 0;
for (goto_number i = begin; i < end; i++)
state_count[to_state[i]]++;
size_t max = 0;
for (state_number s = 0; s < nstates; s++)
if (max < state_count[s])
{
max = state_count[s];
res = s;
}
}
return res;
}
/*-------------------------------------------------------------------.
| Figure out what to do after reducing with each rule, depending on |
| the saved state from before the beginning of parsing the data that |
| matched this rule. |
| |
| The YYDEFGOTO table is output now. The detailed info is saved for |
| putting into YYTABLE later. |
`-------------------------------------------------------------------*/
static void
goto_actions (void)
{
size_t *state_count = xnmalloc (nstates, sizeof *state_count);
yydefgoto = xnmalloc (nnterms, sizeof *yydefgoto);
/* For a given nterm I, STATE_COUNT[S] is the number of times there
is a GOTO to S on I. */
for (symbol_number i = ntokens; i < nsyms; ++i)
{
state_number default_state = default_goto (i, state_count);
save_column (i, default_state);
yydefgoto[i - ntokens] = default_state;
}
free (state_count);
}
/*------------------------------------------------------------------.
| Compute ORDER, a reordering of vectors, in order to decide how to |
| pack the actions and gotos information into yytable. |
`------------------------------------------------------------------*/
static void
sort_actions (void)
{
nentries = 0;
for (int i = 0; i < nvectors; i++)
if (0 < tally[i])
{
const size_t t = tally[i];
const int w = width[i];
int j = nentries - 1;
while (0 <= j && width[order[j]] < w)
j--;
while (0 <= j && width[order[j]] == w && tally[order[j]] < t)
j--;
for (int k = nentries - 1; k > j; k--)
order[k + 1] = order[k];
order[j + 1] = i;
nentries++;
}
}
/* If VECTOR is a state whose actions (reflected by FROMS, TOS, TALLY
and WIDTH of VECTOR) are common to a previous state, return this
state number.
In any other case, return -1. */
static state_number
matching_state (vector_number vector)
{
vector_number i = order[vector];
/* If VECTOR is a nterm, return -1. */
if (i < nstates)
{
size_t t = tally[i];
int w = width[i];
/* If VECTOR has GLR conflicts, return -1 */
if (conflict_tos[i] != NULL)
for (int j = 0; j < t; j += 1)
if (conflict_tos[i][j] != 0)
return -1;
for (int prev = vector - 1; 0 <= prev; prev--)
{
vector_number j = order[prev];
/* Given how ORDER was computed, if the WIDTH or TALLY is
different, there cannot be a matching state. */
if (width[j] != w || tally[j] != t)
return -1;
else
{
bool match = true;
for (int k = 0; match && k < t; k++)
if (tos[j][k] != tos[i][k]
|| froms[j][k] != froms[i][k]
|| (conflict_tos[j] != NULL && conflict_tos[j][k] != 0))
match = false;
if (match)
return j;
}
}
}
return -1;
}
static base_number
pack_vector (vector_number vector)
{
vector_number i = order[vector];
size_t t = tally[i];
base_number *from = froms[i];
base_number *to = tos[i];
int *conflict_to = conflict_tos[i];
aver (t != 0);
for (base_number res = lowzero - from[0]; ; res++)
{
bool ok = true;
aver (res < table_size);
{
for (int k = 0; ok && k < t; k++)
{
int loc = res + state_number_as_int (from[k]);
if (table_size <= loc)
table_grow (loc);
if (table[loc] != 0)
ok = false;
}
if (ok && pos_set_test (res))
ok = false;
}
if (ok)
{
int loc PACIFY_CC (= -1);
for (int k = 0; k < t; k++)
{
loc = res + state_number_as_int (from[k]);
table[loc] = to[k];
if (nondeterministic_parser && conflict_to != NULL)
conflict_table[loc] = conflict_to[k];
check[loc] = from[k];
}
while (table[lowzero] != 0)
lowzero++;
if (high < loc)
high = loc;
aver (BASE_MINIMUM <= res && res <= BASE_MAXIMUM);
return res;
}
}
}
/*-------------------------------------------------------------.
| Remap the negative infinite in TAB from NINF to the greatest |
| possible smallest value. Return it. |
| |
| In most case this allows us to use shorts instead of ints in |
| parsers. |
`-------------------------------------------------------------*/
static base_number
table_ninf_remap (base_number tab[], int size, base_number ninf)
{
base_number res = 0;
for (int i = 0; i < size; i++)
if (tab[i] < res && tab[i] != ninf)
res = tab[i];
--res;
for (int i = 0; i < size; i++)
if (tab[i] == ninf)
tab[i] = res;
return res;
}
static void
pack_table (void)
{
base = xnmalloc (nvectors, sizeof *base);
pos_set = bitset_create (table_size + nstates, BITSET_FRUGAL);
pos_set_base = -nstates;
table = xcalloc (table_size, sizeof *table);
conflict_table = xcalloc (table_size, sizeof *conflict_table);
check = xnmalloc (table_size, sizeof *check);
lowzero = 0;
high = 0;
for (int i = 0; i < nvectors; i++)
base[i] = BASE_MINIMUM;
for (int i = 0; i < table_size; i++)
check[i] = -1;
for (int i = 0; i < nentries; i++)
{
state_number s = matching_state (i);
base_number place;
if (s < 0)
/* A new set of state actions, or a nonterminal. */
place = pack_vector (i);
else
/* Action of I were already coded for S. */
place = base[s];
pos_set_set (place);
base[order[i]] = place;
}
/* Use the greatest possible negative infinites. */
base_ninf = table_ninf_remap (base, nvectors, BASE_MINIMUM);
table_ninf = table_ninf_remap (table, high + 1, ACTION_NUMBER_MINIMUM);
bitset_free (pos_set);
}
/*-----------------------------------------------------------------.
| Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable |
| and yycheck. |
`-----------------------------------------------------------------*/
void
tables_generate (void)
{
/* This is a poor way to make sure the sizes are properly
correlated. In particular the signedness is not taken into
account. But it's not useless. */
verify (sizeof nstates <= sizeof nvectors);
verify (sizeof nnterms <= sizeof nvectors);
nvectors = state_number_as_int (nstates) + nnterms;
froms = xcalloc (nvectors, sizeof *froms);
tos = xcalloc (nvectors, sizeof *tos);
conflict_tos = xcalloc (nvectors, sizeof *conflict_tos);
tally = xcalloc (nvectors, sizeof *tally);
width = xnmalloc (nvectors, sizeof *width);
token_actions ();
goto_actions ();
free (goto_map);
free (from_state);
free (to_state);
order = xcalloc (nvectors, sizeof *order);
sort_actions ();
pack_table ();
free (order);
free (tally);
free (width);
for (int i = 0; i < nvectors; i++)
{
free (froms[i]);
free (tos[i]);
free (conflict_tos[i]);
}
free (froms);
free (tos);
free (conflict_tos);
}
/*-------------------------.
| Free the parser tables. |
`-------------------------*/
void
tables_free (void)
{
free (base);
free (conflict_table);
free (conflict_list);
free (table);
free (check);
free (yydefgoto);
free (yydefact);
}
+142
View File
@@ -0,0 +1,142 @@
/* Prepare the LALR and GLR parser tables.
Copyright (C) 2002, 2004, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef TABLES_H_
# define TABLES_H_
# include "state.h"
/* The parser tables consist of these tables.
YYTRANSLATE = vector mapping yylex's token numbers into bison's
token numbers.
YYTNAME = vector of string-names indexed by bison token number.
YYTOKNUM = vector of yylex token numbers corresponding to entries
in YYTNAME.
YYRLINE = vector of line-numbers of all rules. For yydebug
printouts.
YYRHS = vector of items of all rules. This is exactly what RITEMS
contains. For yydebug and for semantic parser.
YYPRHS[R] = index in YYRHS of first item for rule R.
YYR1[R] = symbol number of symbol that rule R derives.
YYR2[R] = number of symbols composing right hand side of rule R.
YYSTOS[S] = the symbol number of the symbol that leads to state S.
YYFINAL = the state number of the termination state.
YYTABLE = a vector filled with portions for different uses, found
via YYPACT and YYPGOTO, described below.
YYLAST ( = high) the number of the last element of YYTABLE, i.e.,
sizeof (YYTABLE) - 1.
YYCHECK = a vector indexed in parallel with YYTABLE. It indicates,
in a roundabout way, the bounds of the portion you are trying to
examine.
Suppose that the portion of YYTABLE starts at index P and the index
to be examined within the portion is I. Then if YYCHECK[P+I] != I,
I is outside the bounds of what is actually allocated, and the
default (from YYDEFACT or YYDEFGOTO) should be used. Otherwise,
YYTABLE[P+I] should be used.
YYDEFACT[S] = default reduction number in state s. Performed when
YYTABLE doesn't specify something else to do. Zero means the default
is an error.
YYDEFGOTO[I] = default state to go to after a reduction of a rule
that generates variable NTOKENS + I, except when YYTABLE specifies
something else to do.
YYPACT[S] = index in YYTABLE of the portion describing state S.
The lookahead token's number, I, is used to index that portion of
YYTABLE to find out what action to perform.
If YYPACT[S] == YYPACT_NINF, if YYPACT[S] + I is outside the bounds
of YYTABLE (from 0 to YYLAST), or I is outside the bounds for portion
S (that is, YYCHECK[YYPACT[S] + I] != I), then the default action
(that is, YYDEFACT[S]) should be used instead of YYTABLE. Otherwise,
the value YYTABLE[YYPACT[S] + I] should be used even if
YYPACT[S] < 0.
If the value in YYTABLE is positive, we shift the token and go to
that state.
If the value is negative, it is minus a rule number to reduce by.
If the value is YYTABLE_NINF, it's a syntax error.
YYPGOTO[I] = the index in YYTABLE of the portion describing what to
do after reducing a rule that derives variable I + NTOKENS. This
portion is indexed by the parser state number, S, as of before the
text for this nonterminal was read.
If YYPGOTO[I] + S is outside the bounds of YYTABLE (from 0 to YYLAST)
or if S is outside the bounds of the portion for I (that is,
YYCHECK[YYPGOTO[I] + S] != S), then the default state (that is,
YYDEFGOTO[I]) should be used instead of YYTABLE. Otherwise,
YYTABLE[YYPGOTO[I] + S] is the state to go to even if YYPGOTO[I] < 0.
When the above YYPACT, YYPGOTO, and YYCHECK tests determine that a
value from YYTABLE should be used, that value is never zero, so it is
useless to check for zero. When those tests indicate that the value
from YYDEFACT or YYDEFGOTO should be used instead, the value from
YYTABLE *might* be zero, which, as a consequence of the way in which
the tables are constructed, also happens to indicate that YYDEFACT or
YYDEFGOTO should be used. However, the YYTABLE value cannot be
trusted when the YYDEFACT or YYDEFGOTO value should be used. In
summary, forget about zero values in YYTABLE.
*/
extern int nvectors;
typedef int base_number;
extern base_number *base;
/* A distinguished value of BASE, negative infinite. During the
computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
keep parser tables small. */
extern base_number base_ninf;
extern int *conflict_table;
extern int *conflict_list;
extern int conflict_list_cnt;
extern base_number *table;
extern base_number *check;
/* The value used in TABLE to denote explicit syntax errors
(%nonassoc), a negative infinite. */
extern base_number table_ninf;
extern state_number *yydefgoto;
extern rule_number *yydefact;
extern int high;
void tables_generate (void);
void tables_free (void);
#endif /* !TABLES_H_ */
@@ -0,0 +1,195 @@
/* Keep a unique copy of strings.
Copyright (C) 2002-2005, 2009-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
#include "system.h"
#include <attribute.h>
#include <error.h>
#include <hash.h>
#include <quotearg.h>
#include <stdarg.h>
#include "uniqstr.h"
/*-----------------------.
| A uniqstr hash table. |
`-----------------------*/
/* Initial capacity of uniqstr hash table. */
#define HT_INITIAL_CAPACITY 257
static struct hash_table *uniqstrs_table = NULL;
/*-------------------------------------.
| Create the uniqstr for S if needed. |
`-------------------------------------*/
uniqstr
uniqstr_new (char const *str)
{
uniqstr res = hash_lookup (uniqstrs_table, str);
if (!res)
{
/* First insertion in the hash. */
res = xstrdup (str);
hash_xinsert (uniqstrs_table, res);
}
return res;
}
uniqstr
uniqstr_concat (int nargs, ...)
{
va_list args;
va_start (args, nargs);
size_t reslen = 0;
for (int i = 0; i < nargs; i++)
reslen += strlen (va_arg (args, char const *));
va_end (args);
char *str = xmalloc (reslen + 1);
char *p = str;
va_start (args, nargs);
for (int i = 0; i < nargs; i++)
{
char const *arg = va_arg (args, char const *);
size_t arglen = strlen (arg);
memcpy (p, arg, arglen);
p += arglen;
}
va_end (args);
*p = '\0';
uniqstr res = hash_xinsert (uniqstrs_table, str);
if (res != str)
free (str);
return res;
}
/*------------------------------.
| Abort if S is not a uniqstr. |
`------------------------------*/
void
uniqstr_assert (char const *str)
{
uniqstr s = hash_lookup (uniqstrs_table, str);
if (!s || s != str)
{
error (0, 0,
"not a uniqstr: %s", quotearg (str));
abort ();
}
}
/*--------------------.
| Print the uniqstr. |
`--------------------*/
static inline bool
uniqstr_print (uniqstr ustr)
{
fprintf (stderr, "%s\n", ustr);
return true;
}
static bool
uniqstr_print_processor (void *ustr, void *null MAYBE_UNUSED)
{
return uniqstr_print (ustr);
}
int
uniqstr_cmp (uniqstr l, uniqstr r)
{
return (l == r ? 0
: !l ? -1
: !r ? +1
: strcmp (l, r));
}
/*-----------------------.
| A uniqstr hash table. |
`-----------------------*/
static bool
hash_compare_uniqstr (void const *m1, void const *m2)
{
return STREQ (m1, m2);
}
static size_t
hash_uniqstr (void const *m, size_t tablesize)
{
return hash_string (m, tablesize);
}
/*----------------------------.
| Create the uniqstrs table. |
`----------------------------*/
void
uniqstrs_new (void)
{
uniqstrs_table = hash_xinitialize (HT_INITIAL_CAPACITY,
NULL,
hash_uniqstr,
hash_compare_uniqstr,
free);
}
/*-------------------------------------.
| Perform a task on all the uniqstrs. |
`-------------------------------------*/
static void
uniqstrs_do (Hash_processor processor, void *processor_data)
{
hash_do_for_each (uniqstrs_table, processor, processor_data);
}
/*-----------------.
| Print them all. |
`-----------------*/
void
uniqstrs_print (void)
{
uniqstrs_do (uniqstr_print_processor, NULL);
}
/*--------------------.
| Free the uniqstrs. |
`--------------------*/
void
uniqstrs_free (void)
{
hash_free (uniqstrs_table);
}
@@ -0,0 +1,69 @@
/* Keeping a unique copy of strings.
Copyright (C) 2002-2003, 2008-2015, 2018-2021 Free Software
Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef UNIQSTR_H_
# define UNIQSTR_H_
# include <stdio.h>
/*-----------------------------------------.
| Pointers to unique copies of C strings. |
`-----------------------------------------*/
typedef char const *uniqstr;
/* Return the uniqstr for STR. */
uniqstr uniqstr_new (char const *str);
/* Two uniqstr values have the same value iff they are the same. */
# define UNIQSTR_EQ(Ustr1, Ustr2) (!!((Ustr1) == (Ustr2)))
/* Compare two uniqstr a la strcmp: negative for <, nul for =, and
positive for >. Undefined order, relies on addresses. */
int uniqstr_cmp (uniqstr u1, uniqstr u2);
/* Die if STR is not a uniqstr. */
void uniqstr_assert (char const *str);
/*----------------.
| Concatenation. |
`----------------*/
/* Concatenate strings and return a uniqstr. The goal of
this macro is to make the caller's code a little more succinct. */
# define UNIQSTR_CONCAT(...) \
uniqstr_concat (ARRAY_CARDINALITY (((char const *[]) {__VA_ARGS__})), \
__VA_ARGS__)
uniqstr uniqstr_concat (int nargs, ...);
/*--------------------.
| Table of uniqstrs. |
`--------------------*/
/* Create the string table. */
void uniqstrs_new (void);
/* Free all the memory allocated for symbols. */
void uniqstrs_free (void);
/* Report them all. */
void uniqstrs_print (void);
#endif /* ! defined UNIQSTR_H_ */
@@ -0,0 +1,25 @@
#! /bin/sh
@relocatable_sh@
if test "@RELOCATABLE@" = yes; then
prefix="@prefix@"
exec_prefix="@exec_prefix@"
bindir="@bindir@"
orig_installdir="$bindir" # see Makefile.am's *_SCRIPTS variables
func_find_curr_installdir # determine curr_installdir
func_find_prefixes
relocate () {
echo "$1/" \
| sed -e "s%^${orig_installprefix}/%${curr_installprefix}/%" \
| sed -e 's,/$,,'
}
else
relocate () {
echo "$1"
}
fi
prefix=@prefix@
exec_prefix=@exec_prefix@
bindir=`relocate "@bindir@"`
exec "$bindir/bison" -y "$@"