Sophie

Sophie

distrib > * > 2009.0 > i586 > by-pkgid > 652f2135c89f730e09dcd4ca2b870fb5 > files > 14

openfst-devel-0.0.beta-1mdv2008.1.i586.rpm

// connect.h
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: riley@google.com (Michael Riley)
//
// \file
// Classes and functions to remove unsuccessful paths from an Fst.

#ifndef FST_LIB_CONNECT_H__
#define FST_LIB_CONNECT_H__

#include "fst/lib/mutable-fst.h"

namespace fst {

// Finds and returns strongly-connected components, accessible and
// coaccessible states and related properties. Uses Tarzan's single
// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
// Algorithms", 189pp).
template <class A>
class SccVisitor {
 public:
  typedef A Arc;
  typedef typename Arc::Weight Weight;
  typedef typename A::StateId StateId;

  // scc[i]: strongly-connected component number for state i.
  //   SCC numbers will be in topological order for acyclic input.
  // access[i]: accessibility of state i.
  // coaccess[i]: coaccessibility of state i.
  // Any of above can be NULL.
  // props: related property bits (cyclicity, initial cyclicity,
  //   accessibiliity, coaccessiblity) set/cleared (o.w. unchanged).
  SccVisitor(vector<StateId> *scc, vector<bool> *access,
             vector<bool> *coaccess, uint64 *props)
      : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
  SccVisitor(uint64 *props)
      : scc_(0), access_(0), coaccess_(0), props_(props) {}

  void InitVisit(const Fst<A> &fst) {
    if (scc_)
      scc_->clear();
    if (access_)
      access_->clear();
    if (coaccess_) {
      coaccess_->clear();
      coaccess_internal_ = false;
    } else {
      coaccess_ = new vector<bool>;
      coaccess_internal_ = true;
    }
    *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
    *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
    fst_ = &fst;
    start_ = fst.Start();
    nstates_ = 0;
    nscc_ = 0;
    dfnumber_ = new vector<StateId>;
    lowlink_ = new vector<StateId>;
    onstack_ = new vector<bool>;
    scc_stack_ = new vector<StateId>;
  }

  bool InitState(StateId s, StateId root) {
    scc_stack_->push_back(s);
    while (dfnumber_->size() <= s) {
      if (scc_)
        scc_->push_back(-1);
      if (access_)
        access_->push_back(false);
      coaccess_->push_back(false);
      dfnumber_->push_back(-1);
      lowlink_->push_back(-1);
      onstack_->push_back(false);
    }
    (*dfnumber_)[s] = nstates_;
    (*lowlink_)[s] = nstates_;
    (*onstack_)[s] = true;
    if (root == start_) {
      if (access_)
        (*access_)[s] = true;
    } else {
      if (access_)
        (*access_)[s] = false;
      *props_ |= kNotAccessible;
      *props_ &= ~kAccessible;
    }
    ++nstates_;
    return true;
  }

  bool TreeArc(StateId s, const A &arc) { return true; }

  bool BackArc(StateId s, const A &arc) {
    StateId t = arc.nextstate;
    if ((*dfnumber_)[t] < (*lowlink_)[s])
      (*lowlink_)[s] = (*dfnumber_)[t];
    if ((*coaccess_)[t])
      (*coaccess_)[s] = true;
    *props_ |= kCyclic;
    *props_ &= ~kAcyclic;
    if (arc.nextstate == start_) {
      *props_ |= kInitialCyclic;
      *props_ &= ~kInitialAcyclic;
    }
    return true;
  }

  bool ForwardOrCrossArc(StateId s, const A &arc) {
    StateId t = arc.nextstate;
    if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
        (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
      (*lowlink_)[s] = (*dfnumber_)[t];
    if ((*coaccess_)[t])
      (*coaccess_)[s] = true;
    return true;
  }

  void FinishState(StateId s, StateId p, const A *) {
    if (fst_->Final(s) != Weight::Zero())
      (*coaccess_)[s] = true;
    if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
      bool scc_coaccess = false;
      size_t i = scc_stack_->size();
      StateId t;
      do {
        t = (*scc_stack_)[--i];
        if ((*coaccess_)[t])
          scc_coaccess = true;
      } while (s != t);
      do {
        t = scc_stack_->back();
        if (scc_)
          (*scc_)[t] = nscc_;
        if (scc_coaccess)
          (*coaccess_)[t] = true;
        (*onstack_)[t] = false;
        scc_stack_->pop_back();
      } while (s != t);
      if (!scc_coaccess) {
        *props_ |= kNotCoAccessible;
        *props_ &= ~kCoAccessible;
      }
      ++nscc_;
    }
    if (p != kNoStateId) {
      if ((*coaccess_)[s])
        (*coaccess_)[p] = true;
      if ((*lowlink_)[s] < (*lowlink_)[p])
        (*lowlink_)[p] = (*lowlink_)[s];
    }
  }

  void FinishVisit() {
    // Numbers SCC's in topological order when acyclic.
    if (scc_)
      for (StateId i = 0; i < scc_->size(); ++i)
        (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
    if (coaccess_internal_)
      delete coaccess_;
    delete dfnumber_;
    delete lowlink_;
    delete onstack_;
    delete scc_stack_;
  }

 private:
  vector<StateId> *scc_;        // State's scc number
  vector<bool> *access_;        // State's accessibility
  vector<bool> *coaccess_;      // State's coaccessibility
  uint64 *props_;
  const Fst<A> *fst_;
  StateId start_;
  StateId nstates_;             // State count
  StateId nscc_;                // SCC count
  bool coaccess_internal_;
  vector<StateId> *dfnumber_;   // state discovery times
  vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
  vector<bool> *onstack_;       // is a state on the SCC stack
  vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
};


// Trims an FST, removing states and arcs that are not on successful
// paths. This version modifies its input.
//
// Complexity:
// - Time:  O(V + E)
// - Space: O(V + E)
// where V = # of states and E = # of arcs.
template<class Arc>
void Connect(MutableFst<Arc> *fst) {
  typedef typename Arc::StateId StateId;

  vector<bool> access;
  vector<bool> coaccess;
  uint64 props = 0;
  SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
  DfsVisit(*fst, &scc_visitor);
  vector<StateId> dstates;
  for (StateId s = 0; s < access.size(); ++s)
    if (!access[s] || !coaccess[s])
      dstates.push_back(s);
  fst->DeleteStates(dstates);
  fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
}

}  // namespace fst

#endif  // FST_LIB_CONNECT_H__