dynare++ / tensor library (TL): move away from CWEB

By the way apply Dynare C++ coding style and extensions (.cc/.hh).
2019-01-08 16:09:25 +01:00 · 2019-01-08 16:09:25 +01:00 · ce1ef47093
parent 84255f9e9a
commit ce1ef47093
119 changed files with 12604 additions and 12727 deletions
--- a/.gitignore
+++ b/.gitignore
@ -137,9 +137,6 @@ mex/build/matlab/run_m2html.m
 /dynare++/src/dynglob_ll.cc
 /dynare++/src/dynglob_tab.cc
 /dynare++/src/dynglob_tab.hh
 /dynare++/tl/cc/*.cpp
 /dynare++/tl/cc/*.h
 /dynare++/tl/cc/main.tex
 /dynare++/tl/testing/tests
 /dynare++/tl/testing/tests.exe
 !/dynare++/extern/R/Makefile
--- a/dynare++/extern/matlab/dynare_simul.cpp
+++ b/dynare++/extern/matlab/dynare_simul.cpp
@ -23,7 +23,7 @@
 #include "mex.h"
 #include "decision_rule.hh"
-#include "fs_tensor.h"
+#include "fs_tensor.hh"
 #include "SylvException.h"
 extern "C" {
--- a/dynare++/integ/cc/product.cc
+++ b/dynare++/integ/cc/product.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik
 #include "product.hh"
-#include "symmetry.h"
+#include "symmetry.hh"
 prodpit::prodpit()
  : prodq(NULL), level(0), npoints(0), jseq(NULL),
--- a/dynare++/integ/cc/product.hh
+++ b/dynare++/integ/cc/product.hh
@ -16,7 +16,7 @@
 #ifndef PRODUCT_H
 #define PRODUCT_H
-#include "int_sequence.h"
+#include "int_sequence.hh"
 #include "vector_function.hh"
 #include "quadrature.hh"
--- a/dynare++/integ/cc/quadrature.hh
+++ b/dynare++/integ/cc/quadrature.hh
@ -32,8 +32,8 @@
 #include <cstdlib>
 #include "vector_function.hh"
-#include "int_sequence.h"
+#include "int_sequence.hh"
-#include "sthread.h"
+#include "sthread.hh"
 /* This pure virtual class represents a concept of one-dimensional
   (non-nested) quadrature. So, one dimensional quadrature must return
--- a/dynare++/integ/cc/quasi_mcarlo.hh
+++ b/dynare++/integ/cc/quasi_mcarlo.hh
@ -25,7 +25,7 @@
 #ifndef QUASI_MCARLO_H
 #define QUASI_MCARLO_H
-#include "int_sequence.h"
+#include "int_sequence.hh"
 #include "quadrature.hh"
 #include "Vector.h"
--- a/dynare++/integ/cc/smolyak.cc
+++ b/dynare++/integ/cc/smolyak.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik
 #include "smolyak.hh"
-#include "symmetry.h"
+#include "symmetry.hh"
 smolpit::smolpit()
  : smolq(NULL), isummand(0), jseq(NULL), sig(NULL), p(NULL)
--- a/dynare++/integ/cc/smolyak.hh
+++ b/dynare++/integ/cc/smolyak.hh
@ -17,8 +17,8 @@
 #ifndef SMOLYAK_H
 #define SMOLYAK_H
-#include "int_sequence.h"
+#include "int_sequence.hh"
-#include "tl_static.h"
+#include "tl_static.hh"
 #include "vector_function.hh"
 #include "quadrature.hh"
--- a/dynare++/kord/dynamic_model.hh
+++ b/dynare++/kord/dynamic_model.hh
@ -10,8 +10,8 @@
 #ifndef DYNAMIC_MODEL_H
 #define DYNAMIC_MODEL_H
-#include "t_container.h"
+#include "t_container.hh"
-#include "sparse_tensor.h"
+#include "sparse_tensor.hh"
 #include "Vector.h"
--- a/dynare++/kord/faa_di_bruno.cc
+++ b/dynare++/kord/faa_di_bruno.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik
 #include "faa_di_bruno.hh"
-#include "fine_container.h"
+#include "fine_container.hh"
 #include <cmath>
--- a/dynare++/kord/faa_di_bruno.hh
+++ b/dynare++/kord/faa_di_bruno.hh
@ -12,10 +12,10 @@
 #define FAA_DI_BRUNO_H
 #include "journal.hh"
-#include "stack_container.h"
+#include "stack_container.hh"
-#include "t_container.h"
+#include "t_container.hh"
-#include "sparse_tensor.h"
+#include "sparse_tensor.hh"
-#include "gs_tensor.h"
+#include "gs_tensor.hh"
 /* Nothing special here. See |@<|FaaDiBruno::calculate| folded sparse
   code@>| for reason of having |magic_mult|. */
--- a/dynare++/kord/journal.hh
+++ b/dynare++/kord/journal.hh
@ -5,7 +5,7 @@
 #ifndef JOURNAL_H
 #define JOURNAL_H
-#include "int_sequence.h"
+#include "int_sequence.hh"
 #include <sys/time.h>
 #include <cstdio>
--- a/dynare++/kord/korder.hh
+++ b/dynare++/kord/korder.hh
@ -25,13 +25,13 @@
 #ifndef KORDER_H
 #define KORDER_H
-#include "int_sequence.h"
+#include "int_sequence.hh"
-#include "fs_tensor.h"
+#include "fs_tensor.hh"
-#include "gs_tensor.h"
+#include "gs_tensor.hh"
-#include "t_container.h"
+#include "t_container.hh"
-#include "stack_container.h"
+#include "stack_container.hh"
-#include "normal_moments.h"
+#include "normal_moments.hh"
-#include "t_polynomial.h"
+#include "t_polynomial.hh"
 #include "faa_di_bruno.hh"
 #include "journal.hh"
--- a/dynare++/kord/normal_conjugate.hh
+++ b/dynare++/kord/normal_conjugate.hh
@ -33,7 +33,7 @@
 #ifndef NORMAL_CONJUGATE_H
 #define NORMAL_CONJUGATE_H
-#include "twod_matrix.h"
+#include "twod_matrix.hh"
 /* The class is described by the four parameters: $\mu$, $\kappa$, $\nu$ and
   $\Lambda$. */
--- a/dynare++/src/dynare3.cpp
+++ b/dynare++/src/dynare3.cpp
@ -8,7 +8,7 @@
 #include "utils/cc/exception.h"
 #include "parser/cc/parser_exception.h"
 #include "parser/cc/atom_substitutions.h"
-#include "../tl/cc/tl_exception.h"
+#include "../tl/cc/tl_exception.hh"
 #include "../kord/kord_exception.hh"
 #ifndef DYNVERSION
--- a/dynare++/src/dynare3.h
+++ b/dynare++/src/dynare3.h
@ -4,8 +4,8 @@
 #ifndef DYNARE3_H
 #define DYNARE3_H
-#include "../tl/cc/t_container.h"
+#include "../tl/cc/t_container.hh"
-#include "../tl/cc/sparse_tensor.h"
+#include "../tl/cc/sparse_tensor.hh"
 #include "../kord/decision_rule.hh"
 #include "../kord/dynamic_model.hh"
--- a/dynare++/src/dynare_model.h
+++ b/dynare++/src/dynare_model.h
@ -7,7 +7,7 @@
 #include "parser/cc/atom_assignings.h"
 #include "dynare_atoms.h"
-#include "twod_matrix.h"
+#include "twod_matrix.hh"
 #include "Vector.h"
 #include "GeneralMatrix.h"
--- a/dynare++/src/nlsolve.h
+++ b/dynare++/src/nlsolve.h
@ -5,7 +5,7 @@
 #ifndef OGU_NLSOLVE_H
 #define OGU_NLSOLVE_H
-#include "twod_matrix.h"
+#include "twod_matrix.hh"
 #include "journal.hh"
 namespace ogu
--- a/dynare++/tl/cc/Makefile.am
+++ b/dynare++/tl/cc/Makefile.am
@ -1,120 +1,48 @@
 CWEBSRC = \
 	normal_moments.cweb \
 	int_sequence.cweb \
 	tensor.cweb \
 	ps_tensor.cweb \
 	pyramid_prod2.cweb \
 	equivalence.cweb \
 	fine_container.cweb \
 	kron_prod.cweb \
 	ps_tensor.hweb \
 	t_polynomial.cweb \
 	symmetry.cweb \
 	stack_container.cweb \
 	sthread.hweb \
 	twod_matrix.hweb \
 	twod_matrix.cweb \
 	symmetry.hweb \
 	sparse_tensor.cweb \
 	fine_container.hweb \
 	sthread.cweb \
 	int_sequence.hweb \
 	tl_exception.hweb \
 	pyramid_prod2.hweb \
 	t_container.hweb \
 	permutation.hweb \
 	tensor.hweb \
 	gs_tensor.cweb \
 	rfs_tensor.hweb \
 	pyramid_prod.hweb \
 	t_polynomial.hweb \
 	pyramid_prod.cweb \
 	fs_tensor.cweb \
 	sparse_tensor.hweb \
 	permutation.cweb \
 	equivalence.hweb \
 	gs_tensor.hweb \
 	normal_moments.hweb \
 	tl_static.hweb \
 	kron_prod.hweb \
 	fs_tensor.hweb \
 	stack_container.hweb \
 	rfs_tensor.cweb \
 	t_container.cweb \
 	tl_static.cweb
 GENERATED_FILES = \
 	normal_moments.cpp \
 	int_sequence.cpp \
 	tensor.cpp \
 	ps_tensor.cpp \
 	pyramid_prod2.cpp \
 	equivalence.cpp \
 	fine_container.cpp \
 	kron_prod.cpp \
 	ps_tensor.h \
 	t_polynomial.cpp \
 	symmetry.cpp \
 	stack_container.cpp \
 	sthread.h \
 	twod_matrix.h \
 	twod_matrix.cpp \
 	symmetry.h \
 	sparse_tensor.cpp \
 	fine_container.h \
 	sthread.cpp \
 	int_sequence.h \
 	tl_exception.h \
 	pyramid_prod2.h \
 	t_container.h \
 	permutation.h \
 	tensor.h \
 	gs_tensor.cpp \
 	rfs_tensor.h \
 	pyramid_prod.h \
 	t_polynomial.h \
 	pyramid_prod.cpp \
 	fs_tensor.cpp \
 	sparse_tensor.h \
 	permutation.cpp \
 	equivalence.h \
 	gs_tensor.h \
 	normal_moments.h \
 	tl_static.h \
 	kron_prod.h \
 	fs_tensor.h \
 	stack_container.h \
 	rfs_tensor.cpp \
 	t_container.cpp \
 	tl_static.cpp
 noinst_LIBRARIES = libtl.a
-libtl_a_SOURCES = $(CWEBSRC) $(GENERATED_FILES)
+libtl_a_SOURCES = \
 	equivalence.cc \
 	equivalence.hh \
 	fine_container.cc \
 	fine_container.hh \
 	fs_tensor.cc \
 	fs_tensor.hh \
 	gs_tensor.cc \
 	gs_tensor.hh \
 	int_sequence.cc \
 	int_sequence.hh \
 	kron_prod.cc \
 	kron_prod.hh \
 	normal_moments.cc \
 	normal_moments.hh \
 	permutation.cc \
 	permutation.hh \
 	ps_tensor.cc \
 	ps_tensor.hh \
 	pyramid_prod.cc \
 	pyramid_prod.hh \
 	pyramid_prod2.cc \
 	pyramid_prod2.hh \
 	rfs_tensor.cc \
 	rfs_tensor.hh \
 	sparse_tensor.cc \
 	sparse_tensor.hh \
 	stack_container.cc \
 	stack_container.hh \
 	sthread.cc \
 	sthread.hh \
 	symmetry.cc \
 	symmetry.hh \
 	t_container.cc \
 	t_container.hh \
 	t_polynomial.cc \
 	t_polynomial.hh \
 	tensor.cc \
 	tensor.hh \
 	tl_exception.hh \
 	tl_static.cc \
 	tl_static.hh \
 	twod_matrix.cc \
 	twod_matrix.hh
 libtl_a_CPPFLAGS = -I../../sylv/cc $(CPPFLAGS_MATIO)
 libtl_a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
 BUILT_SOURCES = $(GENERATED_FILES)
 EXTRA_DIST = main.web dummy.ch
 %.cpp: %.cweb dummy.ch
 	$(CTANGLE) -bhp $< dummy.ch $@
 %.h: %.hweb dummy.ch
 	$(CTANGLE) -bhp $< dummy.ch $@
 if HAVE_CWEAVE
 if HAVE_PDFTEX
 if HAVE_EPLAIN
 pdf-local: tl.pdf
 tl.pdf: main.web $(CWEBSRC)
 	$(CWEAVE) -bhp main.web
 	$(PDFTEX) main
 	mv main.pdf tl.pdf
 endif
 endif
 endif
 CLEANFILES = tl.pdf main.idx main.log main.scn main.tex main.toc
--- a/dynare++/tl/cc/dummy.ch
+++ b/dynare++/tl/cc/dummy.ch
--- a/dynare++/tl/cc/equivalence.cc
+++ b/dynare++/tl/cc/equivalence.cc
@ -0,0 +1,435 @@
 // Copyright 2004, Ondra Kamenik
 #include "equivalence.hh"
 #include "permutation.hh"
 #include "tl_exception.hh"
 #include <cstring>
 int
 OrdSequence::operator[](int i) const
 {
  TL_RAISE_IF((i < 0 || i >= length()),
              "Index out of range in OrdSequence::operator[]");
  return data[i];
 }
 /* Here we implement the ordering. It can be changed, or various
   orderings can be used for different problem sizes. We order them
   according to the average, and then according to the first item. */
 bool
 OrdSequence::operator<(const OrdSequence &s) const
 {
  double ta = average();
  double sa = s.average();
  return (ta < sa || ((ta == sa) && (operator[](0) > s[0])));
 }
 bool
 OrdSequence::operator==(const OrdSequence &s) const
 {
  if (length() != s.length())
    return false;
  int i = 0;
  while (i < length() && operator[](i) == s[i])
    i++;
  return (i == length());
 }
 /* The first |add| adds a given integer to the class, the second
   iterates through a given sequence and adds everything found in the
   given class. */
 void
 OrdSequence::add(int i)
 {
  vector<int>::iterator vit = data.begin();
  while (vit != data.end() && *vit < i)
    ++vit;
  if (vit != data.end() && *vit == i)
    return;
  data.insert(vit, i);
 }
 void
 OrdSequence::add(const OrdSequence &s)
 {
  vector<int>::const_iterator vit = s.data.begin();
  while (vit != s.data.end())
    {
      add(*vit);
      ++vit;
    }
 }
 /* Answers |true| if a given number is in the class. */
 bool
 OrdSequence::has(int i) const
 {
  vector<int>::const_iterator vit = data.begin();
  while (vit != data.end())
    {
      if (*vit == i)
        return true;
      ++vit;
    }
  return false;
 }
 /* Return an average of the class. */
 double
 OrdSequence::average() const
 {
  double res = 0;
  for (unsigned int i = 0; i < data.size(); i++)
    res += data[i];
  TL_RAISE_IF(data.size() == 0,
              "Attempt to take average of empty class in OrdSequence::average");
  return res/data.size();
 }
 /* Debug print. */
 void
 OrdSequence::print(const char *prefix) const
 {
  printf("%s", prefix);
  for (unsigned int i = 0; i < data.size(); i++)
    printf("%d ", data[i]);
  printf("\n");
 }
 Equivalence::Equivalence(int num)
  : n(num)
 {
  for (int i = 0; i < num; i++)
    {
      OrdSequence s;
      s.add(i);
      classes.push_back(s);
    }
 }
 Equivalence::Equivalence(int num, const char *dummy)
  : n(num)
 {
  OrdSequence s;
  for (int i = 0; i < num; i++)
    s.add(i);
  classes.push_back(s);
 }
 /* Copy constructors. The second also glues a given couple. */
 Equivalence::Equivalence(const Equivalence &e)
  : n(e.n),
    classes(e.classes)
 {
 }
 Equivalence::Equivalence(const Equivalence &e, int i1, int i2)
  : n(e.n),
    classes(e.classes)
 {
  seqit s1 = find(i1);
  seqit s2 = find(i2);
  if (s1 != s2)
    {
      OrdSequence ns(*s1);
      ns.add(*s2);
      classes.erase(s1);
      classes.erase(s2);
      insert(ns);
    }
 }
 const Equivalence &
 Equivalence::operator=(const Equivalence &e)
 {
  classes.clear();
  n = e.n;
  classes = e.classes;
  return *this;
 }
 bool
 Equivalence::operator==(const Equivalence &e) const
 {
  if (!std::operator==(classes, e.classes))
    return false;
  if (n != e.n)
    return false;
  return true;
 }
 /* Return an iterator pointing to a class having a given integer. */
 Equivalence::const_seqit
 Equivalence::findHaving(int i) const
 {
  const_seqit si = classes.begin();
  while (si != classes.end())
    {
      if ((*si).has(i))
        return si;
      ++si;
    }
  TL_RAISE_IF(si == classes.end(),
              "Couldn't find equivalence class in Equivalence::findHaving");
  return si;
 }
 Equivalence::seqit
 Equivalence::findHaving(int i)
 {
  seqit si = classes.begin();
  while (si != classes.end())
    {
      if ((*si).has(i))
        return si;
      ++si;
    }
  TL_RAISE_IF(si == classes.end(),
              "Couldn't find equivalence class in Equivalence::findHaving");
  return si;
 }
 /* Find $j$-th class for a given $j$. */
 Equivalence::const_seqit
 Equivalence::find(int j) const
 {
  const_seqit si = classes.begin();
  int i = 0;
  while (si != classes.end() && i < j)
    {
      ++si;
      i++;
    }
  TL_RAISE_IF(si == classes.end(),
              "Couldn't find equivalence class in Equivalence::find");
  return si;
 }
 Equivalence::seqit
 Equivalence::find(int j)
 {
  seqit si = classes.begin();
  int i = 0;
  while (si != classes.end() && i < j)
    {
      ++si;
      i++;
    }
  TL_RAISE_IF(si == classes.end(),
              "Couldn't find equivalence class in Equivalence::find");
  return si;
 }
 /* Insert a new class yielding the ordering. */
 void
 Equivalence::insert(const OrdSequence &s)
 {
  seqit si = classes.begin();
  while (si != classes.end() && *si < s)
    ++si;
  classes.insert(si, s);
 }
 /* Trace the equivalence into the integer sequence. The classes are in
   some order (described earlier), and items within classes are ordered,
   so this implies, that the data can be linearized. This method
   ``prints'' them to the sequence. We allow for tracing only a given
   number of classes from the beginning. */
 void
 Equivalence::trace(IntSequence &out, int num) const
 {
  int i = 0;
  int nc = 0;
  for (const_seqit it = begin(); it != end() && nc < num; ++it, ++nc)
    for (int j = 0; j < (*it).length(); j++, i++)
      {
        TL_RAISE_IF(i >= out.size(),
                    "Wrong size of output sequence in Equivalence::trace");
        out[i] = (*it)[j];
      }
 }
 void
 Equivalence::trace(IntSequence &out, const Permutation &per) const
 {
  TL_RAISE_IF(out.size() != n,
              "Wrong size of output sequence in Equivalence::trace");
  TL_RAISE_IF(per.size() != numClasses(),
              "Wrong permutation for permuted Equivalence::trace");
  int i = 0;
  for (int iclass = 0; iclass < numClasses(); iclass++)
    {
      const_seqit itper = find(per.getMap()[iclass]);
      for (int j = 0; j < (*itper).length(); j++, i++)
        out[i] = (*itper)[j];
    }
 }
 /* Debug print. */
 void
 Equivalence::print(const char *prefix) const
 {
  int i = 0;
  for (const_seqit it = classes.begin();
       it != classes.end();
       ++it, i++)
    {
      printf("%sclass %d: ", prefix, i);
      (*it).print("");
    }
 }
 /* Here we construct a set of all equivalences over $n$-element
   set. The construction proceeds as follows. We maintain a list of added
   equivalences. At each iteration we pop front of the list, try to add
   all parents of the popped equivalence. This action adds new
   equivalences to the object and also to the added list. We finish the
   iterations when the added list is empty.
   In the beginning we start with
   $\{\{0\},\{1\},\ldots,\{n-1\}\}$. Adding of parents is an action which
   for a given equivalence tries to glue all possible couples and checks
   whether a new equivalence is already in the equivalence set. This is
   not effective, but we will do the construction only ones.
   In this way we breath-first search a lattice of all equivalences. Note
   that the lattice is modular, that is why the result of a construction
   is a list with a property that between two equivalences with the same
   number of classes there are only equivalences with that number of
   classes. Obviously, the list is decreasing in a number of classes
   (since it is constructed by gluing attempts). */
 EquivalenceSet::EquivalenceSet(int num)
  : n(num),
    equis()
 {
  list<Equivalence> added;
  Equivalence first(n);
  equis.push_back(first);
  addParents(first, added);
  while (!added.empty())
    {
      addParents(added.front(), added);
      added.pop_front();
    }
  if (n > 1)
    {
      Equivalence last(n, "");
      equis.push_back(last);
    }
 }
 /* This method is used in |addParents| and returns |true| if the object
   already has that equivalence. We trace list of equivalences in reverse
   order since equivalences are ordered in the list from the most
   primitive (nothing equivalent) to maximal (all is equivalent). Since
   we will have much more results of |has| method as |true|, and
   |operator==| between equivalences is quick if number of classes
   differ, and in time we will compare with equivalences with less
   classes, then it is more efficient to trace the equivalences from less
   classes to more classes. hence the reverse order. */
 bool
 EquivalenceSet::has(const Equivalence &e) const
 {
  list<Equivalence>::const_reverse_iterator rit = equis.rbegin();
  while (rit != equis.rend() && *rit != e)
    ++rit;
  if (rit != equis.rend())
    return true;
  return false;
 }
 /* Responsibility of this methods is to try to glue all possible
   couples within a given equivalence and add those which are not in the
   list yet. These are added also to the |added| list.
   If number of classes is 2 or 1, we exit, because there is nothing to
   be added. */
 void
 EquivalenceSet::addParents(const Equivalence &e,
                           list<Equivalence> &added)
 {
  if (e.numClasses() == 2 || e.numClasses() == 1)
    return;
  for (int i1 = 0; i1 < e.numClasses(); i1++)
    for (int i2 = i1+1; i2 < e.numClasses(); i2++)
      {
        Equivalence ns(e, i1, i2);
        if (!has(ns))
          {
            added.push_back(ns);
            equis.push_back(ns);
          }
      }
 }
 /* Debug print. */
 void
 EquivalenceSet::print(const char *prefix) const
 {
  char tmp[100];
  strcpy(tmp, prefix);
  strcat(tmp, "    ");
  int i = 0;
  for (list<Equivalence>::const_iterator it = equis.begin();
       it != equis.end();
       ++it, i++)
    {
      printf("%sequivalence %d:(classes %d)\n", prefix, i, (*it).numClasses());
      (*it).print(tmp);
    }
 }
 /* Construct the bundle. |nmax| is a maximum size of underlying set. */
 EquivalenceBundle::EquivalenceBundle(int nmax)
 {
  nmax = max(nmax, 1);
  generateUpTo(nmax);
 }
 /* Destruct bundle. Just free all pointers. */
 EquivalenceBundle::~EquivalenceBundle()
 {
  for (unsigned int i = 0; i < bundle.size(); i++)
    delete bundle[i];
 }
 /* Remember, that the first item is |EquivalenceSet(1)|. */
 const EquivalenceSet &
 EquivalenceBundle::get(int n) const
 {
  if (n > (int) (bundle.size()) || n < 1)
    {
      TL_RAISE("Equivalence set not found in EquivalenceBundle::get");
      return *(bundle[0]);
    }
  else
    {
      return *(bundle[n-1]);
    }
 }
 /* Get |curmax| which is a maximum size in the bundle, and generate for
   all sizes from |curmax+1| up to |nmax|. */
 void
 EquivalenceBundle::generateUpTo(int nmax)
 {
  int curmax = bundle.size();
  for (int i = curmax+1; i <= nmax; i++)
    bundle.push_back(new EquivalenceSet(i));
 }
--- a/dynare++/tl/cc/equivalence.cweb
+++ b/dynare++/tl/cc/equivalence.cweb
@ -1,477 +0,0 @@
@q $Id: equivalence.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt equivalence.cpp} file.
@c
 #include "equivalence.h"
 #include "permutation.h"
 #include "tl_exception.h"
 #include <cstring>
@<|OrdSequence| method codes@>;
@<|Equivalence| method codes@>;
@<|EquivalenceSet| method codes@>;
@<|EquivalenceBundle| method codes@>;
@ 
@<|OrdSequence| method codes@>=
@<|OrdSequence::operator[]| code@>;
@<|OrdSequence::operator<| code@>;
@<|OrdSequence::operator==| code@>;
@<|OrdSequence::add| codes@>;
@<|OrdSequence::has| code@>;
@<|OrdSequence::average()| code@>;
@<|OrdSequence::print| code@>;
@ 
@<|Equivalence| method codes@>=
@<|Equivalence| constructors@>;
@<|Equivalence| copy constructors@>;
@<|Equivalence::findHaving| codes@>;
@<|Equivalence::find| codes@>;
@<|Equivalence::insert| code@>;
@<|Equivalence::operator=| code@>;
@<|Equivalence::operator==| code@>;
@<|Equivalence::trace| code@>;
@<|Equivalence::trace| permuted code@>;
@<|Equivalence::print| code@>;
@ 
@<|EquivalenceSet| method codes@>=
@<|EquivalenceSet| constructor code@>;
@<|EquivalenceSet::has| code@>;
@<|EquivalenceSet::addParents| code@>;
@<|EquivalenceSet::print| code@>;
@ 
@<|EquivalenceBundle| method codes@>=
@<|EquivalenceBundle| constructor code@>;
@<|EquivalenceBundle| destructor code@>;
@<|EquivalenceBundle::get| code@>;
@<|EquivalenceBundle::generateUpTo| code@>;
@ 
@<|OrdSequence::operator[]| code@>=
 int OrdSequence::operator[](int i) const
 {
 	TL_RAISE_IF((i<0 || i>=length()),
 				"Index out of range in OrdSequence::operator[]");
 	return data[i];
 }
@ Here we implement the ordering. It can be changed, or various
 orderings can be used for different problem sizes. We order them
 according to the average, and then according to the first item.
@<|OrdSequence::operator<| code@>=
 bool OrdSequence::operator<(const OrdSequence& s) const
 {
 	double ta = average();
 	double sa = s.average();
 	return (ta < sa || ((ta == sa) && (operator[](0) > s[0])));
 }
@ 
@<|OrdSequence::operator==| code@>=
 bool OrdSequence::operator==(const OrdSequence& s) const
 {
 	if (length() != s.length())
 		return false;
 	int i = 0;
 	while (i < length() && operator[](i) == s[i])
 		i++;
 	return (i == length());
 }
@ The first |add| adds a given integer to the class, the second
 iterates through a given sequence and adds everything found in the
 given class.
@<|OrdSequence::add| codes@>=
 void OrdSequence::add(int i)
 {
 	vector<int>::iterator vit = data.begin();
 	while (vit != data.end() && *vit < i)
 		++vit;
 	if (vit != data.end() && *vit == i)
 		return;
 	data.insert(vit, i);
 }
@#
 void OrdSequence::add(const OrdSequence& s)
 {
 	vector<int>::const_iterator vit = s.data.begin();
 	while (vit != s.data.end()) {
 		add(*vit);
 		++vit;
 	}
 }
@ Answers |true| if a given number is in the class.
@<|OrdSequence::has| code@>=
 bool OrdSequence::has(int i) const
 {
 	vector<int>::const_iterator vit = data.begin();
 	while (vit != data.end()) {
 		if (*vit == i)
 			return true;
 		++vit;
 	}
 	return false;
 }
@ Return an average of the class. 
@<|OrdSequence::average()| code@>=
 double OrdSequence::average() const
 {
 	double res = 0;
 	for (unsigned int i = 0; i < data.size(); i++)
 		res += data[i];
 	TL_RAISE_IF(data.size() == 0,
 				"Attempt to take average of empty class in OrdSequence::average");
 	return res/data.size();
 }
@ Debug print.
@<|OrdSequence::print| code@>=
 void OrdSequence::print(const char* prefix) const
 {
 	printf("%s",prefix);
 	for (unsigned int i = 0; i < data.size(); i++)
 		printf("%d ",data[i]);
 	printf("\n");
 }
@ 
@<|Equivalence| constructors@>=
 Equivalence::Equivalence(int num)
 	: n(num)
 {
 	for (int i = 0; i < num; i++) {
 		OrdSequence s;
 		s.add(i);
 		classes.push_back(s);
 	}
 }
@#
 Equivalence::Equivalence(int num, const char* dummy)
 	: n(num)
 {
 	OrdSequence s;
 	for (int i = 0; i < num; i++)
 		s.add(i);
 	classes.push_back(s);
 }
@ Copy constructors. The second also glues a given couple.
@<|Equivalence| copy constructors@>=
 Equivalence::Equivalence(const Equivalence& e)
 	: n(e.n),
 	  classes(e.classes)
 {
 }
@#
 Equivalence::Equivalence(const Equivalence& e, int i1, int i2)
 	: n(e.n),
 	  classes(e.classes)
 {
 	seqit s1 = find(i1);
 	seqit s2 = find(i2);
 	if (s1 != s2) {
 		OrdSequence ns(*s1);
 		ns.add(*s2);
 		classes.erase(s1);
 		classes.erase(s2);
 		insert(ns);
 	}
 }
@ 
@<|Equivalence::operator=| code@>=
 const Equivalence& Equivalence::operator=(const Equivalence& e)
 {
 	classes.clear();
 	n = e.n;
 	classes = e.classes;
 	return *this;
 }
@ 
@<|Equivalence::operator==| code@>=
 bool Equivalence::operator==(const Equivalence& e) const
 {
 	if (! std::operator==(classes, e.classes))
 		return false;
 	if (n != e.n)
 		return false;
 	return true;
 }
@ Return an iterator pointing to a class having a given integer.
@<|Equivalence::findHaving| codes@>=
 Equivalence::const_seqit Equivalence::findHaving(int i) const
 {
 	const_seqit si = classes.begin();
 	while (si != classes.end()) {
 		if ((*si).has(i))
 			return si;
 		++si;
 	}
 	TL_RAISE_IF(si == classes.end(),
 				"Couldn't find equivalence class in Equivalence::findHaving");
 	return si;
 }
@#
 Equivalence::seqit Equivalence::findHaving(int i)
 {
 	seqit si = classes.begin();
 	while (si != classes.end()) {
 		if ((*si).has(i))
 			return si;
 		++si;
 	}
 	TL_RAISE_IF(si == classes.end(),
 				"Couldn't find equivalence class in Equivalence::findHaving");
 	return si;
 }
@ Find $j$-th class for a given $j$.
@<|Equivalence::find| codes@>=
 Equivalence::const_seqit Equivalence::find(int j) const
 {
 	const_seqit si = classes.begin();
 	int i = 0;
 	while (si != classes.end() && i < j) {
 		++si;
 		i++;
 	}
 	TL_RAISE_IF(si == classes.end(),
 				"Couldn't find equivalence class in Equivalence::find");
 	return si;
 }
@#
 Equivalence::seqit Equivalence::find(int j)
 {
 	seqit si = classes.begin();
 	int i = 0;
 	while (si != classes.end() && i < j) {
 		++si;
 		i++;
 	}
 	TL_RAISE_IF(si == classes.end(),
 				"Couldn't find equivalence class in Equivalence::find");
 	return si;
 }
@ Insert a new class yielding the ordering.
@<|Equivalence::insert| code@>=
 void Equivalence::insert(const OrdSequence& s)
 {
 	seqit si = classes.begin();
 	while (si != classes.end() && *si < s) 
 		++si;
 	classes.insert(si, s);
 }
@ Trace the equivalence into the integer sequence. The classes are in
 some order (described earlier), and items within classes are ordered,
 so this implies, that the data can be linearized. This method
 ``prints'' them to the sequence. We allow for tracing only a given
 number of classes from the beginning.
@<|Equivalence::trace| code@>=
 void Equivalence::trace(IntSequence& out, int num) const
 {
 	int i = 0;
 	int nc = 0;
 	for (const_seqit it = begin(); it != end() && nc < num; ++it, ++nc)
 		for (int j = 0;	j < (*it).length(); j++, i++) {
 			TL_RAISE_IF(i >= out.size(),
 						"Wrong size of output sequence in Equivalence::trace");
 			out[i] = (*it)[j];
 		}
 }
@ 
@<|Equivalence::trace| permuted code@>=
 void Equivalence::trace(IntSequence& out, const Permutation& per) const
 {
 	TL_RAISE_IF(out.size() != n,
 				"Wrong size of output sequence in Equivalence::trace");
 	TL_RAISE_IF(per.size() != numClasses(),
 				"Wrong permutation for permuted Equivalence::trace");
 	int i = 0;
 	for (int iclass = 0; iclass < numClasses(); iclass++) {
 		const_seqit itper = find(per.getMap()[iclass]);
 		for (int j = 0; j < (*itper).length(); j++, i++)
 			out[i] = (*itper)[j];
 	}
 }
@ Debug print.
@<|Equivalence::print| code@>=
 void Equivalence::print(const char* prefix) const
 {
 	int i = 0;
 	for (const_seqit it = classes.begin();
 		 it != classes.end();
 		 ++it, i++) {
 		printf("%sclass %d: ",prefix,i);
 		(*it).print("");
 	}
 }
@ Here we construct a set of all equivalences over $n$-element
 set. The construction proceeds as follows. We maintain a list of added
 equivalences. At each iteration we pop front of the list, try to add
 all parents of the popped equivalence. This action adds new
 equivalences to the object and also to the added list. We finish the
 iterations when the added list is empty.
 In the beginning we start with
 $\{\{0\},\{1\},\ldots,\{n-1\}\}$. Adding of parents is an action which
 for a given equivalence tries to glue all possible couples and checks
 whether a new equivalence is already in the equivalence set. This is
 not effective, but we will do the construction only ones.
 In this way we breath-first search a lattice of all equivalences. Note
 that the lattice is modular, that is why the result of a construction
 is a list with a property that between two equivalences with the same
 number of classes there are only equivalences with that number of
 classes. Obviously, the list is decreasing in a number of classes
 (since it is constructed by gluing attempts).
@<|EquivalenceSet| constructor code@>=
 EquivalenceSet::EquivalenceSet(int num)
 	: n(num),
 	  equis()
 {
 	list<Equivalence> added;
 	Equivalence first(n);
 	equis.push_back(first);
 	addParents(first, added);
 	while (! added.empty()) {
 		addParents(added.front(), added);
 		added.pop_front();
 	}
 	if (n > 1) {
 		Equivalence last(n, "");
 		equis.push_back(last);
 	}
 }
@ This method is used in |addParents| and returns |true| if the object
 already has that equivalence. We trace list of equivalences in reverse
 order since equivalences are ordered in the list from the most
 primitive (nothing equivalent) to maximal (all is equivalent). Since
 we will have much more results of |has| method as |true|, and
 |operator==| between equivalences is quick if number of classes
 differ, and in time we will compare with equivalences with less
 classes, then it is more efficient to trace the equivalences from less
 classes to more classes. hence the reverse order.
@<|EquivalenceSet::has| code@>=
 bool EquivalenceSet::has(const Equivalence& e) const
 {
 	list<Equivalence>::const_reverse_iterator rit = equis.rbegin();
 	while (rit != equis.rend() && *rit != e)
 		++rit;
 	if (rit != equis.rend())
 		return true;
 	return false;
 }
@ Responsibility of this methods is to try to glue all possible
 couples within a given equivalence and add those which are not in the
 list yet. These are added also to the |added| list.
 If number of classes is 2 or 1, we exit, because there is nothing to
 be added.
@<|EquivalenceSet::addParents| code@>=
 void EquivalenceSet::addParents(const Equivalence& e,
 								list<Equivalence>& added)
 {
 	if (e.numClasses() == 2 || e.numClasses() == 1)
 		return;
 	for (int i1 = 0; i1 < e.numClasses(); i1++)
 		for (int i2 = i1+1; i2 < e.numClasses(); i2++) {
 			Equivalence ns(e, i1, i2);
 			if (! has(ns)) {
 				added.push_back(ns);
 				equis.push_back(ns);
 			}
 		}
 }
@ Debug print.
@<|EquivalenceSet::print| code@>=
 void EquivalenceSet::print(const char* prefix) const
 {
 	char tmp[100];
 	strcpy(tmp, prefix);
 	strcat(tmp, "    ");
 	int i = 0;
 	for (list<Equivalence>::const_iterator it = equis.begin();
 		 it != equis.end();
 		 ++it, i++) {
 		printf("%sequivalence %d:(classes %d)\n",prefix,i,(*it).numClasses());
 		(*it).print(tmp);
 	}
 }
@ Construct the bundle. |nmax| is a maximum size of underlying set.
@<|EquivalenceBundle| constructor code@>=
 EquivalenceBundle::EquivalenceBundle(int nmax)
 {
 	nmax = max(nmax, 1);
 	generateUpTo(nmax);
 }
@ Destruct bundle. Just free all pointers.
@<|EquivalenceBundle| destructor code@>=
 EquivalenceBundle::~EquivalenceBundle()
 {
 	for (unsigned int i = 0; i < bundle.size(); i++)
 		delete bundle[i];
 }
@ Remember, that the first item is |EquivalenceSet(1)|.
@<|EquivalenceBundle::get| code@>=
 const EquivalenceSet& EquivalenceBundle::get(int n) const
 {
 	if (n > (int)(bundle.size()) || n < 1) {
 		TL_RAISE("Equivalence set not found in EquivalenceBundle::get");
 		return *(bundle[0]);
 	} else {
 		return *(bundle[n-1]);
 	}
 }
@ Get |curmax| which is a maximum size in the bundle, and generate for
 all sizes from |curmax+1| up to |nmax|.
@<|EquivalenceBundle::generateUpTo| code@>=
 void EquivalenceBundle::generateUpTo(int nmax)
 {
 	int curmax = bundle.size();
 	for (int i = curmax+1; i <= nmax; i++)
 		bundle.push_back(new EquivalenceSet(i));
 }
@ End of {\tt equivalence.cpp} file.
--- a/dynare++/tl/cc/equivalence.hh
+++ b/dynare++/tl/cc/equivalence.hh
@ -0,0 +1,226 @@
 // Copyright 2004, Ondra Kamenik
 // Equivalences.
 /* Here we define an equivalence of a set of integers $\{0, 1, \ldots,
   k-1\}$. The purpose is clear, in the tensor library we often iterate
   through all equivalences and sum matrices. We need an abstraction for
   an equivalence class, equivalence and a set of all equivalences.
   The equivalence class (which is basically a set of integers) is here
   implemented as ordered integer sequence. The ordered sequence is not
   implemented via |IntSequence|, but via |vector<int>| since we need
   insertions. The equivalence is implemented as an ordered list of
   equivalence classes, and equivalence set is a list of equivalences.
   The ordering of the equivalence classes within an equivalence is very
   important. For instance, if we iterate through equivalences for $k=5$
   and pickup some equivalence class, say $\{\{0,4\},\{1,2\},\{3\}\}$, we
   then evaluate something like:
   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
   \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
   \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
   $$
   If the tensors are unfolded, we can evaluate this expression as
   $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)\cdot P,$$
   where $P$ is a suitable permutation of columns of the expressions,
   which permutes them so that the index
   $(\alpha_1,\beta_3,\alpha_2,\beta_1,\beta_2)$ would go to
   $(\alpha_1,\alpha_2,\beta_1,\beta_2,\beta_3)$.
   The permutation $P$ can be very ineffective (copying great amount of
   small chunks of data) if the equivalence class ordering is chosen
   badly. However, we do not provide any heuristic minimizing a total
   time spent in all permutations. We choose an ordering which orders the
   classes according to their averages, and according to the smallest
   equivalence class element if the averages are the same. */
 #ifndef EQUIVALENCE_H
 #define EQUIVALENCE_H
 #include "int_sequence.hh"
 #include <vector>
 #include <list>
 using namespace std;
 /* Here is the abstraction for an equivalence class. We implement it as
   |vector<int>|. We have a constructor for empty class, copy
   constructor. What is important here is the ordering operator
   |operator<| and methods for addition of an integer, and addition of
   another sequence. Also we provide method |has| which returns true if a
   given integer is contained. */
 class OrdSequence
 {
  vector<int> data;
 public:
  OrdSequence() : data()
  {
  }
  OrdSequence(const OrdSequence &s) : data(s.data)
  {
  }
  const OrdSequence &
  operator=(const OrdSequence &s)
  {
    data = s.data; return *this;
  }
  bool operator==(const OrdSequence &s) const;
  int operator[](int i) const;
  bool operator<(const OrdSequence &s) const;
  const vector<int> &
  getData() const
  {
    return data;
  }
  int
  length() const
  {
    return data.size();
  }
  void add(int i);
  void add(const OrdSequence &s);
  bool has(int i) const;
  void print(const char *prefix) const;
 private:
  double average() const;
 };
 /* Here is the abstraction for the equivalence. It is a list of
   equivalence classes. Also we remember |n|, which is a size of
   underlying set $\{0, 1, \ldots, n-1\}$.
   Method |trace| ``prints'' the equivalence into the integer sequence. */
 class Permutation;
 class Equivalence
 {
 private:
  int n;
  list<OrdSequence> classes;
 public:
  typedef list<OrdSequence>::const_iterator const_seqit;
  typedef list<OrdSequence>::iterator seqit;
  /* The first constructor constructs $\{\{0\},\{1\},\ldots,\{n-1\}\}$.
     The second constructor constructs $\{\{0,1,\ldots,n-1\}\}$.
     The third is the copy constructor. And the fourth is the copy
     constructor plus gluing |i1| and |i2| in one class. */
  Equivalence(int num);
  Equivalence(int num, const char *dummy);
  Equivalence(const Equivalence &e);
  Equivalence(const Equivalence &e, int i1, int i2);
  const Equivalence &operator=(const Equivalence &e);
  bool operator==(const Equivalence &e) const;
  bool
  operator!=(const Equivalence &e) const
  {
    return !operator==(e);
  }
  int
  getN() const
  {
    return n;
  }
  int
  numClasses() const
  {
    return classes.size();
  }
  void trace(IntSequence &out, int n) const;
  void
  trace(IntSequence &out) const
  {
    trace(out, numClasses());
  }
  void trace(IntSequence &out, const Permutation &per) const;
  void print(const char *prefix) const;
  seqit
  begin()
  {
    return classes.begin();
  }
  const_seqit
  begin() const
  {
    return classes.begin();
  }
  seqit
  end()
  {
    return classes.end();
  }
  const_seqit
  end() const
  {
    return classes.end();
  }
  const_seqit find(int i) const;
  seqit find(int i);
 protected:
  /* Here we have find methods. We can find an equivalence class having a
     given number or we can find an equivalence class of a given index within
     the ordering.
     We have also an |insert| method which inserts a given class
     according to the class ordering. */
  const_seqit findHaving(int i) const;
  seqit findHaving(int i);
  void insert(const OrdSequence &s);
 };
 /* The |EquivalenceSet| is a list of equivalences. The unique
   constructor constructs a set of all equivalences over $n$-element
   set. The equivalences are sorted in the list so that equivalences with
   fewer number of classes are in the end.
   The two methods |has| and |addParents| are useful in the constructor. */
 class EquivalenceSet
 {
  int n;
  list<Equivalence> equis;
 public:
  typedef list<Equivalence>::const_iterator const_iterator;
  EquivalenceSet(int num);
  void print(const char *prefix) const;
  const_iterator
  begin() const
  {
    return equis.begin();
  }
  const_iterator
  end() const
  {
    return equis.end();
  }
 private:
  bool has(const Equivalence &e) const;
  void addParents(const Equivalence &e, list<Equivalence> &added);
 };
 /* The equivalence bundle class only encapsulates |EquivalenceSet|s
   from 1 up to a given number. It is able to retrieve the equivalence set
   over $n$-element set for a given $n$, and also it can generate some more
   sets on request.
   It is fully responsible for storage needed for |EquivalenceSet|s. */
 class EquivalenceBundle
 {
  vector<EquivalenceSet *> bundle;
 public:
  EquivalenceBundle(int nmax);
  ~EquivalenceBundle();
  const EquivalenceSet&get(int n) const;
  void generateUpTo(int nmax);
 };
 #endif
--- a/dynare++/tl/cc/equivalence.hweb
+++ b/dynare++/tl/cc/equivalence.hweb
@ -1,203 +0,0 @@
@q $Id: equivalence.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Equivalences. Start of {\tt equivalence.h} file
 Here we define an equivalence of a set of integers $\{0, 1, \ldots,
 k-1\}$. The purpose is clear, in the tensor library we often iterate
 through all equivalences and sum matrices. We need an abstraction for
 an equivalence class, equivalence and a set of all equivalences.
 The equivalence class (which is basically a set of integers) is here
 implemented as ordered integer sequence. The ordered sequence is not
 implemented via |IntSequence|, but via |vector<int>| since we need
 insertions. The equivalence is implemented as an ordered list of
 equivalence classes, and equivalence set is a list of equivalences.
 The ordering of the equivalence classes within an equivalence is very
 important. For instance, if we iterate through equivalences for $k=5$
 and pickup some equivalence class, say $\{\{0,4\},\{1,2\},\{3\}\}$, we
 then evaluate something like:
 $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
 \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
 \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
 \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
 \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
 $$ 
 If the tensors are unfolded, we can evaluate this expression as
 $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)\cdot P,$$
 where $P$ is a suitable permutation of columns of the expressions,
 which permutes them so that the index
 $(\alpha_1,\beta_3,\alpha_2,\beta_1,\beta_2)$ would go to
 $(\alpha_1,\alpha_2,\beta_1,\beta_2,\beta_3)$.
 The permutation $P$ can be very ineffective (copying great amount of
 small chunks of data) if the equivalence class ordering is chosen
 badly. However, we do not provide any heuristic minimizing a total
 time spent in all permutations. We choose an ordering which orders the
 classes according to their averages, and according to the smallest
 equivalence class element if the averages are the same.
@s OrdSequence int
@s Equivalence int
@s EquivalenceSet int
@c
 #ifndef EQUIVALENCE_H
 #define EQUIVALENCE_H
 #include "int_sequence.h"
 #include <vector>
 #include <list>
 using namespace std;
@<|OrdSequence| class declaration@>;
@<|Equivalence| class declaration@>;
@<|EquivalenceSet| class declaration@>;
@<|EquivalenceBundle| class declaration@>;
 #endif
@ Here is the abstraction for an equivalence class. We implement it as
 |vector<int>|. We have a constructor for empty class, copy
 constructor. What is important here is the ordering operator
 |operator<| and methods for addition of an integer, and addition of
 another sequence. Also we provide method |has| which returns true if a
 given integer is contained.
@<|OrdSequence| class declaration@>=
 class OrdSequence {
 	vector<int> data;
 public:@/
 	OrdSequence() : data()@+ {}
 	OrdSequence(const OrdSequence& s) : data(s.data)@+ {}
 	const OrdSequence& operator=(const OrdSequence& s)
 		{@+ data = s.data;@+ return *this;@+}
 	bool operator==(const OrdSequence& s) const;
 	int operator[](int i) const;
 	bool operator<(const OrdSequence& s) const;
 	const vector<int>& getData() const
 		{@+ return data;@+}
 	int length() const {@+ return data.size();@+}
 	void add(int i);
 	void add(const OrdSequence& s);
 	bool has(int i) const;
 	void print(const char* prefix) const;
 private:@/
 	double average() const;
 };
@ Here is the abstraction for the equivalence. It is a list of
 equivalence classes. Also we remember |n|, which is a size of
 underlying set $\{0, 1, \ldots, n-1\}$.
 Method |trace| ``prints'' the equivalence into the integer sequence.
@<|Equivalence| class declaration@>=
 class Permutation;
 class Equivalence {
 private:
 	int n;
 	list<OrdSequence> classes;
 public:@;
 	typedef list<OrdSequence>::const_iterator const_seqit;
 	typedef list<OrdSequence>::iterator seqit;
 	@<|Equivalence| constructors@>;
 	const Equivalence& operator=(const Equivalence& e);
 	bool operator==(const Equivalence& e) const;
 	bool operator!=(const Equivalence& e) const
 		{@+ return ! operator==(e);@+}
 	int getN() const {@+ return n;@+}
 	int numClasses() const {@+ return classes.size();@+}
 	void trace(IntSequence& out, int n) const;
 	void trace(IntSequence& out) const
 		{@+ trace(out, numClasses()); @+}
 	void trace(IntSequence& out, const Permutation& per) const;
 	void print(const char* prefix) const;
 	@<|Equivalence| begin and end methods@>;
 	const_seqit find(int i) const;
 	seqit find(int i);
 protected:@;
 	@<|Equivalence| protected methods@>;
 };
@ The |EquivalenceSet| is a list of equivalences. The unique
 constructor constructs a set of all equivalences over $n$-element
 set. The equivalences are sorted in the list so that equivalences with
 fewer number of classes are in the end.
 The two methods |has| and |addParents| are useful in the constructor.
@<|EquivalenceSet| class declaration@>=
 class EquivalenceSet {
 	int n;
 	list<Equivalence> equis;
 public:@;
 	typedef list<Equivalence>::const_iterator const_iterator; 
 	EquivalenceSet(int num);
 	void print(const char* prefix) const;
 	const_iterator begin() const
 		{@+ return equis.begin();@+}
 	const_iterator end() const
 		{@+ return equis.end();@+}
 private:@;
 	bool has(const Equivalence& e) const;
 	void addParents(const Equivalence& e, list<Equivalence>& added);
 };
@ The equivalence bundle class only encapsulates |EquivalenceSet|s
 from 1 up to a given number. It is able to retrieve the equivalence set
 over $n$-element set for a given $n$, and also it can generate some more
 sets on request.
 It is fully responsible for storage needed for |EquivalenceSet|s.
@<|EquivalenceBundle| class declaration@>=
 class EquivalenceBundle {
 	vector<EquivalenceSet*> bundle;
 public:@;
 	EquivalenceBundle(int nmax);
 	~EquivalenceBundle();
 	const EquivalenceSet& get(int n) const;
 	void generateUpTo(int nmax);
 };
@ The first constructor constructs $\{\{0\},\{1\},\ldots,\{n-1\}\}$.
 The second constructor constructs $\{\{0,1,\ldots,n-1\}\}$.
 The third is the copy constructor. And the fourth is the copy
 constructor plus gluing |i1| and |i2| in one class.
@<|Equivalence| constructors@>=
 	Equivalence(int num);
 	Equivalence(int num, const char* dummy);
 	Equivalence(const Equivalence& e);
 	Equivalence(const Equivalence& e, int i1, int i2);
@ 
@<|Equivalence| begin and end methods@>=
 	seqit begin() {@+ return classes.begin();@+}
 	const_seqit begin() const {@+ return classes.begin();@+}
 	seqit end() {@+ return classes.end();@+}
 	const_seqit end() const {@+ return classes.end();@+}
@ Here we have find methods. We can find an equivalence class having a
 given number or we can find an equivalence class of a given index within
 the ordering.
 We have also an |insert| method which inserts a given class
 according to the class ordering.
@<|Equivalence| protected methods@>=
 	const_seqit findHaving(int i) const;
 	seqit findHaving(int i);
 	void insert(const OrdSequence& s);
@ End of {\tt equivalence.h} file.
--- a/dynare++/tl/cc/fine_container.cc
+++ b/dynare++/tl/cc/fine_container.cc
@ -0,0 +1,35 @@
 // Copyright 2005, Ondra Kamenik
 #include "fine_container.hh"
 #include <cmath>
 /* Here we construct the vector of new sizes of containers (before
   |nc|) and copy all remaining sizes behind |nc|. */
 SizeRefinement::SizeRefinement(const IntSequence &s, int nc, int max)
 {
  new_nc = 0;
  for (int i = 0; i < nc; i++)
    {
      int nr = s[i]/max;
      if (s[i] % max != 0)
        nr++;
      int ss = (nr > 0) ? (int) round(((double) s[i])/nr) : 0;
      for (int j = 0; j < nr - 1; j++)
        {
          rsizes.push_back(ss);
          ind_map.push_back(i);
          new_nc++;
        }
      rsizes.push_back(s[i]-(nr-1)*ss);
      ind_map.push_back(i);
      new_nc++;
    }
  for (int i = nc; i < s.size(); i++)
    {
      rsizes.push_back(s[i]);
      ind_map.push_back(i);
    }
 }
--- a/dynare++/tl/cc/fine_container.cweb
+++ b/dynare++/tl/cc/fine_container.cweb
@ -1,41 +0,0 @@
@q $Id: fine_container.cweb 1833 2008-05-18 20:22:39Z kamenik $ @>
@q Copyright 2005, Ondra Kamenik @>
@ Start of {\tt stack\_container.cpp} file.
@c
 #include "fine_container.h"
 #include <cmath>
@<|SizeRefinement| constructor code@>;
@ Here we construct the vector of new sizes of containers (before
 |nc|) and copy all remaining sizes behind |nc|.
@<|SizeRefinement| constructor code@>=
 SizeRefinement::SizeRefinement(const IntSequence& s, int nc, int max)
 {
 	new_nc = 0;
 	for (int i = 0; i < nc; i++) {
 		int nr = s[i]/max;
 		if (s[i] % max != 0)
 			nr++;
 		int ss = (nr>0) ? (int)round(((double)s[i])/nr) : 0;
 		for (int j = 0; j < nr - 1; j++) {
 			rsizes.push_back(ss);
 			ind_map.push_back(i);
 			new_nc++;
 		}
 		rsizes.push_back(s[i]-(nr-1)*ss);
 		ind_map.push_back(i);
 		new_nc++;
 	}
 	for (int i = nc; i < s.size(); i++) {
 		rsizes.push_back(s[i]);
 		ind_map.push_back(i);
 	}
 }
@ End of {\tt stack\_container.cpp} file.
--- a/dynare++/tl/cc/fine_container.hh
+++ b/dynare++/tl/cc/fine_container.hh
@ -0,0 +1,162 @@
 // Copyright 2005, Ondra Kamenik
 // Refined stack of containers.
 /* This file defines a refinement of the stack container. It makes a
   vertical refinement of a given stack container, it refines only matrix
   items, the items which are always zero, or can be identity matrices
   are not refined.
   The refinement is done by a simple construction from the stack
   container being refined. A parameter is passed meaning a maximum size
   of each stack in the refined container. The resulting object is stack
   container, so everything works seamlessly.
   We define here a class for refinement of sizes |SizeRefinement|, this
   is purely an auxiliary class allowing us to write a code more
   concisely. The main class of this file is |FineContainer|, which
   corresponds to refining. The two more classes |FoldedFineContainer|
   and |UnfoldedFineContainer| are its specializations.
   NOTE: This code was implemented with a hope that it will help to cut
   down memory allocations during the Faa Di Bruno formula
   evaluation. However, it seems that this needs to be accompanied with a
   similar thing for tensor multidimensional index. Thus, the abstraction
   is not currently used, but it might be useful in future. */
 #ifndef FINE_CONTAINER_H
 #define FINE_CONTAINER_H
 #include "stack_container.hh"
 #include <vector>
 /* This class splits the first |nc| elements of the given sequence |s|
   to a sequence not having items greater than given |max|. The remaining
   elements (those behind |nc|) are left untouched. It also remembers the
   mapping, i.e. for a given index in a new sequence, it is able to
   return a corresponding index in old sequence. */
 class SizeRefinement
 {
  vector<int> rsizes;
  vector<int> ind_map;
  int new_nc;
 public:
  SizeRefinement(const IntSequence &s, int nc, int max);
  int
  getRefSize(int i) const
  {
    return rsizes[i];
  }
  int
  numRefinements() const
  {
    return rsizes.size();
  }
  int
  getOldIndex(int i) const
  {
    return ind_map[i];
  }
  int
  getNC() const
  {
    return new_nc;
  }
 };
 /* This main class of this class refines a given stack container, and
   inherits from the stack container. It also defines the |getType|
   method, which returns a type for a given stack as the type of the
   corresponding (old) stack of the former stack container. */
 template <class _Ttype>
 class FineContainer : public SizeRefinement, public StackContainer<_Ttype>
 {
 protected:
  typedef StackContainer<_Ttype> _Stype;
  typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
  typedef typename StackContainerInterface<_Ttype>::itype itype;
  _Ctype **const ref_conts;
  const _Stype &stack_cont;
 public:
  /* Here we construct the |SizeRefinement| and allocate space for the
     refined containers. Then, the containers are created and put to
     |conts| array. Note that the containers do not claim any further
     space, since all the tensors of the created containers are in-place
     submatrices.
     Here we use a dirty trick of converting |const| pointer to non-|const|
     pointer and passing it to a subtensor container constructor. The
     containers are stored in |ref_conts| and then in |conts| from
     |StackContainer|. However, this is safe since neither |ref_conts| nor
     |conts| are used in non-|const| contexts. For example,
     |StackContainer| has only a |const| method to return a member of
     |conts|. */
  FineContainer(const _Stype &sc, int max)
    : SizeRefinement(sc.getStackSizes(), sc.numConts(), max),
      StackContainer<_Ttype>(numRefinements(), getNC()),
    ref_conts(new _Ctype *[getNC()]),
    stack_cont(sc)
  {
    for (int i = 0; i < numRefinements(); i++)
      _Stype::stack_sizes[i] = getRefSize(i);
    _Stype::calculateOffsets();
    int last_cont = -1;
    int last_row = 0;
    for (int i = 0; i < getNC(); i++)
      {
        if (getOldIndex(i) != last_cont)
          {
            last_cont = getOldIndex(i);
            last_row = 0;
          }
        union {const _Ctype *c; _Ctype *n;} convert;
        convert.c = stack_cont.getCont(last_cont);
        ref_conts[i] = new _Ctype(last_row, _Stype::stack_sizes[i],
                                  *(convert.n));
        _Stype::conts[i] = ref_conts[i];
        last_row += _Stype::stack_sizes[i];
      }
  }
  /* Here we deallocate the refined containers, and deallocate the array of
     refined containers. */
  virtual ~FineContainer()
  {
    for (int i = 0; i < _Stype::numConts(); i++)
      delete ref_conts[i];
    delete [] ref_conts;
  }
  itype
  getType(int i, const Symmetry &s) const
  {
    return stack_cont.getType(getOldIndex(i), s);
  }
 };
 /* Here is |FineContainer| specialization for folded tensors. */
 class FoldedFineContainer : public FineContainer<FGSTensor>, public FoldedStackContainer
 {
 public:
  FoldedFineContainer(const StackContainer<FGSTensor> &sc, int max)
    : FineContainer<FGSTensor>(sc, max)
  {
  }
 };
 /* Here is |FineContainer| specialization for unfolded tensors. */
 class UnfoldedFineContainer : public FineContainer<UGSTensor>, public UnfoldedStackContainer
 {
 public:
  UnfoldedFineContainer(const StackContainer<UGSTensor> &sc, int max)
    : FineContainer<UGSTensor>(sc, max)
  {
  }
 };
 #endif
--- a/dynare++/tl/cc/fine_container.hweb
+++ b/dynare++/tl/cc/fine_container.hweb
@ -1,164 +0,0 @@
@q $Id: fine_container.hweb 332 2005-07-15 13:41:48Z kamenik $ @>
@q Copyright 2005, Ondra Kamenik @>
@*2 Refined stack of containers. Start of {\tt fine\_container.h} file.
 This file defines a refinement of the stack container. It makes a
 vertical refinement of a given stack container, it refines only matrix
 items, the items which are always zero, or can be identity matrices
 are not refined.
 The refinement is done by a simple construction from the stack
 container being refined. A parameter is passed meaning a maximum size
 of each stack in the refined container. The resulting object is stack
 container, so everything works seamlessly.
 We define here a class for refinement of sizes |SizeRefinement|, this
 is purely an auxiliary class allowing us to write a code more
 concisely. The main class of this file is |FineContainer|, which
 corresponds to refining. The two more classes |FoldedFineContainer|
 and |UnfoldedFineContainer| are its specializations.
 NOTE: This code was implemented with a hope that it will help to cut
 down memory allocations during the Faa Di Bruno formula
 evaluation. However, it seems that this needs to be accompanied with a
 similar thing for tensor multidimensional index. Thus, the abstraction
 is not currently used, but it might be useful in future.
@s SizeRefinement int
@s FineContainer int
@s FoldedFineContainer int
@s UnfoldedFineContainer int
@c
 #ifndef FINE_CONTAINER_H
 #define FINE_CONTAINER_H
 #include "stack_container.h"
 #include <vector>
@<|SizeRefinement| class declaration@>;
@<|FineContainer| class declaration@>;
@<|FoldedFineContainer| class declaration@>;
@<|UnfoldedFineContainer| class declaration@>;
 #endif
@ This class splits the first |nc| elements of the given sequence |s|
 to a sequence not having items greater than given |max|. The remaining
 elements (those behind |nc|) are left untouched. It also remembers the
 mapping, i.e. for a given index in a new sequence, it is able to
 return a corresponding index in old sequence.
@<|SizeRefinement| class declaration@>=
 class SizeRefinement {
 	vector<int> rsizes;
 	vector<int> ind_map;
 	int new_nc;
 public:@;
 	SizeRefinement(const IntSequence& s, int nc, int max);
 	int getRefSize(int i) const
 		{@+ return rsizes[i];@+}
 	int numRefinements() const
 		{@+ return rsizes.size();@+}
 	int getOldIndex(int i) const
 		{@+ return ind_map[i];@+}
 	int getNC() const
 		{@+ return new_nc;@+}
 };
@ This main class of this class refines a given stack container, and
 inherits from the stack container. It also defines the |getType|
 method, which returns a type for a given stack as the type of the
 corresponding (old) stack of the former stack container.
@<|FineContainer| class declaration@>=
 template <class _Ttype>@;
 class FineContainer : public SizeRefinement, public StackContainer<_Ttype> {
 protected:@;
 	typedef StackContainer<_Ttype> _Stype;
 	typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
 	typedef typename StackContainerInterface<_Ttype>::itype itype;
 	_Ctype** const ref_conts;
 	const _Stype& stack_cont;
 public:@;
 	@<|FineContainer| constructor@>;
 	@<|FineContainer| destructor@>;
 	itype getType(int i, const Symmetry& s) const
 		{@+ return stack_cont.getType(getOldIndex(i), s);@+}
 };
@ Here we construct the |SizeRefinement| and allocate space for the
 refined containers. Then, the containers are created and put to
 |conts| array. Note that the containers do not claim any further
 space, since all the tensors of the created containers are in-place
 submatrices.
 Here we use a dirty trick of converting |const| pointer to non-|const|
 pointer and passing it to a subtensor container constructor. The
 containers are stored in |ref_conts| and then in |conts| from
 |StackContainer|. However, this is safe since neither |ref_conts| nor
 |conts| are used in non-|const| contexts. For example,
 |StackContainer| has only a |const| method to return a member of
 |conts|.
@<|FineContainer| constructor@>=
 FineContainer(const _Stype& sc, int max)
 	: SizeRefinement(sc.getStackSizes(), sc.numConts(), max),
 	  StackContainer<_Ttype>(numRefinements(), getNC()),
 	  ref_conts(new _Ctype*[getNC()]),
 	  stack_cont(sc)
 {
 	for (int i = 0; i < numRefinements(); i++)
 		_Stype::stack_sizes[i] = getRefSize(i);
 	_Stype::calculateOffsets();
 	int last_cont = -1;
 	int last_row = 0;
 	for (int i = 0; i < getNC(); i++) {
 		if (getOldIndex(i) != last_cont) {
 			last_cont = getOldIndex(i);
 			last_row = 0;
 		}
 		union {const _Ctype* c; _Ctype* n;} convert;
 		convert.c = stack_cont.getCont(last_cont);
 		ref_conts[i] = new _Ctype(last_row, _Stype::stack_sizes[i],
 								  *(convert.n));
 		_Stype::conts[i] = ref_conts[i];
 		last_row += _Stype::stack_sizes[i];
 	}
 }
@ Here we deallocate the refined containers, and deallocate the array of refined containers.
@<|FineContainer| destructor@>=
 virtual ~FineContainer()
 {
 	for (int i = 0; i < _Stype::numConts(); i++)
 		delete ref_conts[i];
 	delete [] ref_conts;
 }
@ Here is |FineContainer| specialization for folded tensors.
@<|FoldedFineContainer| class declaration@>=
 class FoldedFineContainer : public FineContainer<FGSTensor>, public FoldedStackContainer {
 public:@;
 	FoldedFineContainer(const StackContainer<FGSTensor>& sc, int max)
 		: FineContainer<FGSTensor>(sc, max) @+ {}
 };
@ Here is |FineContainer| specialization for unfolded tensors.
@<|UnfoldedFineContainer| class declaration@>=
 class UnfoldedFineContainer : public FineContainer<UGSTensor>, public UnfoldedStackContainer {
 public:@;
 	UnfoldedFineContainer(const StackContainer<UGSTensor>& sc, int max)
 		: FineContainer<UGSTensor>(sc, max) @+ {}
 };
@ End of {\tt fine\_container.h} file.
--- a/dynare++/tl/cc/fs_tensor.cc
+++ b/dynare++/tl/cc/fs_tensor.cc
@ -0,0 +1,290 @@
 // Copyright 2004, Ondra Kamenik
 #include "fs_tensor.hh"
 #include "gs_tensor.hh"
 #include "sparse_tensor.hh"
 #include "rfs_tensor.hh"
 #include "tl_exception.hh"
 /* This constructs a fully symmetric tensor as given by the contraction:
   $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
   \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
   We go through all columns of output tensor $[g]$ and for each column
   we cycle through all variables, insert a variable to the column
   coordinates obtaining a column of tensor $[t]$. the column is multiplied
   by an appropriate item of |x| and added to the column of $[g]$ tensor. */
 FFSTensor::FFSTensor(const FFSTensor &t, const ConstVector &x)
  : FTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
    nv(t.nvar())
 {
  TL_RAISE_IF(t.dimen() < 1,
              "Wrong dimension for tensor contraction of FFSTensor");
  TL_RAISE_IF(t.nvar() != x.length(),
              "Wrong number of variables for tensor contraction of FFSTensor");
  zeros();
  for (Tensor::index to = begin(); to != end(); ++to)
    {
      for (int i = 0; i < nvar(); i++)
        {
          IntSequence from_ind(i, to.getCoor());
          Tensor::index from(&t, from_ind);
          addColumn(x[i], t, *from, *to);
        }
    }
 }
 /* This returns number of indices for folded tensor with full
   symmetry. Let $n$ be a number of variables |nvar| and $d$ the
   dimension |dim|. Then the number of indices is $\pmatrix{n+d-1\cr d}$. */
 int
 FFSTensor::calcMaxOffset(int nvar, int d)
 {
  if (nvar == 0 && d == 0)
    return 1;
  if (nvar == 0 && d > 0)
    return 0;
  return noverk(nvar + d - 1, d);
 }
 /* The conversion from sparse tensor is clear. We go through all the
   tensor and write to the dense what is found. */
 FFSTensor::FFSTensor(const FSSparseTensor &t)
  : FTensor(along_col, IntSequence(t.dimen(), t.nvar()),
            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()), t.dimen()),
    nv(t.nvar())
 {
  zeros();
  for (FSSparseTensor::const_iterator it = t.getMap().begin();
       it != t.getMap().end(); ++it)
    {
      index ind(this, (*it).first);
      get((*it).second.first, *ind) = (*it).second.second;
    }
 }
 /* The conversion from unfolded copies only columns of respective
   coordinates. So we go through all the columns in the folded tensor
   (this), make an index of the unfolded vector from coordinates, and
   copy the column. */
 FFSTensor::FFSTensor(const UFSTensor &ut)
  : FTensor(along_col, IntSequence(ut.dimen(), ut.nvar()),
            ut.nrows(), calcMaxOffset(ut.nvar(), ut.dimen()), ut.dimen()),
    nv(ut.nvar())
 {
  for (index in = begin(); in != end(); ++in)
    {
      index src(&ut, in.getCoor());
      copyColumn(ut, *src, *in);
    }
 }
 /* Here just make a new instance and return the reference. */
 UTensor &
 FFSTensor::unfold() const
 {
  return *(new UFSTensor(*this));
 }
 /* Incrementing is easy. We have to increment by calling static method
   |UTensor::increment| first. In this way, we have coordinates of
   unfolded tensor. Then we have to skip to the closest folded index
   which corresponds to monotonizeing the integer sequence. */
 void
 FFSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in FFSTensor::increment");
  UTensor::increment(v, nv);
  v.monotone();
 }
 /* Decrement calls static |FTensor::decrement|. */
 void
 FFSTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in FFSTensor::decrement");
  FTensor::decrement(v, nv);
 }
 int
 FFSTensor::getOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in FFSTensor::getOffset");
  return FTensor::getOffset(v, nv);
 }
 /* Here we add a general symmetry tensor to the (part of) full symmetry
   tensor provided that the unique variable of the full symmetry tensor
   is a stack of variables from the general symmetry tensor.
   We check for the dimensions and number of variables. Then we calculate
   a shift of coordinates when going from the general symmetry tensor to
   full symmetry (it corresponds to shift of coordinates induces by
   stacking the variables). Then we add the appropriate columns by going
   through the columns in general symmetry, adding the shift and sorting. */
 void
 FFSTensor::addSubTensor(const FGSTensor &t)
 {
  TL_RAISE_IF(dimen() != t.getDims().dimen(),
              "Wrong dimensions for FFSTensor::addSubTensor");
  TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
              "Wrong nvs for FFSTensor::addSubTensor");
  // set shift for |addSubTensor|
  /* Code shared with UFSTensor::addSubTensor() */
  IntSequence shift_pre(t.getSym().num(), 0);
  for (int i = 1; i < t.getSym().num(); i++)
    shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
  IntSequence shift(t.getSym(), shift_pre);
  for (Tensor::index ind = t.begin(); ind != t.end(); ++ind)
    {
      IntSequence c(ind.getCoor());
      c.add(1, shift);
      c.sort();
      Tensor::index tar(this, c);
      addColumn(t, *ind, *tar);
    }
 }
 // |UFSTensor| contraction constructor
 /* This is a bit more straightforward than |@<|FFSTensor| contraction constructor@>|.
   We do not add column by column but we do it by submatrices due to
   regularity of the unfolded tensor. */
 UFSTensor::UFSTensor(const UFSTensor &t, const ConstVector &x)
  : UTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
    nv(t.nvar())
 {
  TL_RAISE_IF(t.dimen() < 1,
              "Wrong dimension for tensor contraction of UFSTensor");
  TL_RAISE_IF(t.nvar() != x.length(),
              "Wrong number of variables for tensor contraction of UFSTensor");
  zeros();
  for (int i = 0; i < ncols(); i++)
    {
      ConstTwoDMatrix tpart(t, i *nvar(), nvar());
      Vector outcol(*this, i);
      tpart.multaVec(outcol, x);
    }
 }
 /* Here we convert folded full symmetry tensor to unfolded. We copy all
   columns of folded tensor, and then call |unfoldData()|. */
 UFSTensor::UFSTensor(const FFSTensor &ft)
  : UTensor(along_col, IntSequence(ft.dimen(), ft.nvar()),
            ft.nrows(), calcMaxOffset(ft.nvar(), ft.dimen()), ft.dimen()),
    nv(ft.nvar())
 {
  for (index src = ft.begin(); src != ft.end(); ++src)
    {
      index in(this, src.getCoor());
      copyColumn(ft, *src, *in);
    }
  unfoldData();
 }
 /* Here we just return a reference to new instance of folded tensor. */
 FTensor &
 UFSTensor::fold() const
 {
  return *(new FFSTensor(*this));
 }
 // |UFSTensor| increment and decrement
 /* Here we just call |UTensor| respective static methods. */
 void
 UFSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UFSTensor::increment");
  UTensor::increment(v, nv);
 }
 void
 UFSTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UFSTensor::decrement");
  UTensor::decrement(v, nv);
 }
 int
 UFSTensor::getOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in UFSTensor::getOffset");
  return UTensor::getOffset(v, nv);
 }
 /* This is very similar to |@<|FFSTensor::addSubTensor| code@>|. The
   only difference is the addition. We go through all columns in the full
   symmetry tensor and cancel the shift. If the coordinates after the
   cancellation are positive, we find the column in the general symmetry
   tensor, and add it. */
 void
 UFSTensor::addSubTensor(const UGSTensor &t)
 {
  TL_RAISE_IF(dimen() != t.getDims().dimen(),
              "Wrong dimensions for UFSTensor::addSubTensor");
  TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
              "Wrong nvs for UFSTensor::addSubTensor");
  // set shift for |addSubTensor|
  /* Code shared with FFSTensor::addSubTensor() */
  IntSequence shift_pre(t.getSym().num(), 0);
  for (int i = 1; i < t.getSym().num(); i++)
    shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
  IntSequence shift(t.getSym(), shift_pre);
  for (Tensor::index tar = begin(); tar != end(); ++tar)
    {
      IntSequence c(tar.getCoor());
      c.sort();
      c.add(-1, shift);
      if (c.isPositive() && c.less(t.getDims().getNVX()))
        {
          Tensor::index from(&t, c);
          addColumn(t, *from, *tar);
        }
    }
 }
 /* Here we go through all columns, find a column of folded index, and
   then copy the column data. Finding the index is done by sorting the
   integer sequence. */
 void
 UFSTensor::unfoldData()
 {
  for (index in = begin(); in != end(); ++in)
    {
      IntSequence v(in.getCoor());
      v.sort();
      index tmp(this, v);
      copyColumn(*tmp, *in);
    }
 }
--- a/dynare++/tl/cc/fs_tensor.cweb
+++ b/dynare++/tl/cc/fs_tensor.cweb
@ -1,306 +0,0 @@
@q $Id: fs_tensor.cweb 280 2005-06-13 09:40:02Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt fs\_tensor.cpp} file.
@c
 #include "fs_tensor.h"
 #include "gs_tensor.h"
 #include "sparse_tensor.h"
 #include "rfs_tensor.h"
 #include "tl_exception.h"
@<|FFSTensor| contraction constructor@>;
@<|FFSTensor::calcMaxOffset| code@>;
@<|FFSTensor| conversion from sparse@>;
@<|FFSTensor| conversion from unfolded@>;
@<|FFSTensor::unfold| code@>;
@<|FFSTensor::increment| code@>;
@<|FFSTensor::decrement| code@>;
@<|FFSTensor::getOffset| code@>;
@<|FFSTensor::addSubTensor| code@>;
@<|UFSTensor| contraction constructor@>;
@<|UFSTensor| conversion from folded@>;
@<|UFSTensor::fold| code@>;
@<|UFSTensor| increment and decrement@>;
@<|UFSTensor::getOffset| code@>;
@<|UFSTensor::addSubTensor| code@>;
@<|UFSTensor::unfoldData| code@>;
@ This constructs a fully symmetric tensor as given by the contraction:
 $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
 \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
 We go through all columns of output tensor $[g]$ and for each column
 we cycle through all variables, insert a variable to the column
 coordinates obtaining a column of tensor $[t]$. the column is multiplied
 by an appropriate item of |x| and added to the column of $[g]$ tensor.
@<|FFSTensor| contraction constructor@>=
 FFSTensor::FFSTensor(const FFSTensor& t, const ConstVector& x)
 	: FTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
 			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
 	  nv(t.nvar())
 {
 	TL_RAISE_IF(t.dimen() < 1,
 				"Wrong dimension for tensor contraction of FFSTensor");
 	TL_RAISE_IF(t.nvar() != x.length(),
 				"Wrong number of variables for tensor contraction of FFSTensor");
 	zeros();
 	for (Tensor::index to = begin(); to != end(); ++to) {
 		for (int i = 0; i < nvar(); i++) {
 			IntSequence from_ind(i, to.getCoor());
 			Tensor::index from(&t, from_ind);
 			addColumn(x[i], t, *from, *to);
 		}
 	}
 }
@ This returns number of indices for folded tensor with full
 symmetry. Let $n$ be a number of variables |nvar| and $d$ the
 dimension |dim|. Then the number of indices is $\pmatrix{n+d-1\cr d}$.
@<|FFSTensor::calcMaxOffset| code@>=
 int FFSTensor::calcMaxOffset(int nvar, int d)
 {
 	if (nvar == 0 && d == 0)
 		return 1;
 	if (nvar == 0 && d > 0)
 		return 0;
 	return noverk(nvar + d - 1, d);
 }
@ The conversion from sparse tensor is clear. We go through all the
 tensor and write to the dense what is found.
@<|FFSTensor| conversion from sparse@>=
 FFSTensor::FFSTensor(const FSSparseTensor& t)
 	: FTensor(along_col, IntSequence(t.dimen(), t.nvar()),
 			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()), t.dimen()),
 	  nv(t.nvar())
 {
 	zeros();
 	for (FSSparseTensor::const_iterator it = t.getMap().begin();
 		 it != t.getMap().end(); ++it) {
 		index ind(this, (*it).first);
 		get((*it).second.first, *ind) = (*it).second.second;
 	}
 }
@ The conversion from unfolded copies only columns of respective
 coordinates. So we go through all the columns in the folded tensor
 (this), make an index of the unfolded vector from coordinates, and
 copy the column.
@<|FFSTensor| conversion from unfolded@>=
 FFSTensor::FFSTensor(const UFSTensor& ut)
 	: FTensor(along_col, IntSequence(ut.dimen(), ut.nvar()),
 			  ut.nrows(), calcMaxOffset(ut.nvar(), ut.dimen()), ut.dimen()),
 	  nv(ut.nvar())
 {
 	for (index in = begin(); in != end(); ++in) {
 		index src(&ut, in.getCoor());
 		copyColumn(ut, *src, *in);
 	}
 }
@ Here just make a new instance and return the reference.
@<|FFSTensor::unfold| code@>=
 UTensor& FFSTensor::unfold() const
 {
 	return *(new UFSTensor(*this));
 }
@ Incrementing is easy. We have to increment by calling static method
 |UTensor::increment| first. In this way, we have coordinates of
 unfolded tensor. Then we have to skip to the closest folded index
 which corresponds to monotonizeing the integer sequence.
@<|FFSTensor::increment| code@>=
 void FFSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in FFSTensor::increment");
 	UTensor::increment(v, nv);
 	v.monotone();
 }
@ Decrement calls static |FTensor::decrement|.
@<|FFSTensor::decrement| code@>=
 void FFSTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in FFSTensor::decrement");
 	FTensor::decrement(v, nv);
 }
@ 
@<|FFSTensor::getOffset| code@>=
 int FFSTensor::getOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in FFSTensor::getOffset");
 	return FTensor::getOffset(v, nv);
 }
@ Here we add a general symmetry tensor to the (part of) full symmetry
 tensor provided that the unique variable of the full symmetry tensor
 is a stack of variables from the general symmetry tensor.
 We check for the dimensions and number of variables. Then we calculate
 a shift of coordinates when going from the general symmetry tensor to
 full symmetry (it corresponds to shift of coordinates induces by
 stacking the variables). Then we add the appropriate columns by going
 through the columns in general symmetry, adding the shift and sorting.
@<|FFSTensor::addSubTensor| code@>=
 void FFSTensor::addSubTensor(const FGSTensor& t)
 {
 	TL_RAISE_IF(dimen() != t.getDims().dimen(),
 				"Wrong dimensions for FFSTensor::addSubTensor");
 	TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
 				"Wrong nvs for FFSTensor::addSubTensor");
 	@<set shift for |addSubTensor|@>;
 	for (Tensor::index ind = t.begin(); ind != t.end(); ++ind) {
 		IntSequence c(ind.getCoor());
 		c.add(1, shift);
 		c.sort();
 		Tensor::index tar(this, c);
 		addColumn(t, *ind, *tar);
 	}
 }
@ 
@<set shift for |addSubTensor|@>=
 	IntSequence shift_pre(t.getSym().num(), 0);
 	for (int i = 1; i < t.getSym().num(); i++)
 		shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
 	IntSequence shift(t.getSym(), shift_pre);
@ This is a bit more straightforward than |@<|FFSTensor| contraction constructor@>|.
 We do not add column by column but we do it by submatrices due to
 regularity of the unfolded tensor.
@<|UFSTensor| contraction constructor@>=
 UFSTensor::UFSTensor(const UFSTensor& t, const ConstVector& x)
 	: UTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
 			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
 	  nv(t.nvar())
 {
 	TL_RAISE_IF(t.dimen() < 1,
 				"Wrong dimension for tensor contraction of UFSTensor");
 	TL_RAISE_IF(t.nvar() != x.length(),
 				"Wrong number of variables for tensor contraction of UFSTensor");
 	zeros();
 	for (int i = 0; i < ncols(); i++) {
 		ConstTwoDMatrix tpart(t, i*nvar(), nvar());
 		Vector outcol(*this, i);
 		tpart.multaVec(outcol, x);
 	}
 }
@ Here we convert folded full symmetry tensor to unfolded. We copy all
 columns of folded tensor, and then call |unfoldData()|.
@<|UFSTensor| conversion from folded@>=
 UFSTensor::UFSTensor(const FFSTensor& ft)
 	: UTensor(along_col, IntSequence(ft.dimen(), ft.nvar()),
 			  ft.nrows(), calcMaxOffset(ft.nvar(), ft.dimen()), ft.dimen()),
 	  nv(ft.nvar())
 {
 	for (index src = ft.begin(); src != ft.end(); ++src) {
 		index in(this, src.getCoor());
 		copyColumn(ft, *src, *in);
 	}
 	unfoldData();
 }
@ Here we just return a reference to new instance of folded tensor.
@<|UFSTensor::fold| code@>=
 FTensor& UFSTensor::fold() const
 {
 	return *(new FFSTensor(*this));
 }
@ Here we just call |UTensor| respective static methods.
@<|UFSTensor| increment and decrement@>=
 void UFSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UFSTensor::increment");
 	UTensor::increment(v, nv);
 }
 void UFSTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UFSTensor::decrement");
 	UTensor::decrement(v, nv);
 }
@ 
@<|UFSTensor::getOffset| code@>=
 int UFSTensor::getOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in UFSTensor::getOffset");
 	return UTensor::getOffset(v, nv);
 }
@ This is very similar to |@<|FFSTensor::addSubTensor| code@>|. The
 only difference is the addition. We go through all columns in the full
 symmetry tensor and cancel the shift. If the coordinates after the
 cancellation are positive, we find the column in the general symmetry
 tensor, and add it.
@<|UFSTensor::addSubTensor| code@>=
 void UFSTensor::addSubTensor(const UGSTensor& t)
 {
 	TL_RAISE_IF(dimen() != t.getDims().dimen(),
 				"Wrong dimensions for UFSTensor::addSubTensor");
 	TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
 				"Wrong nvs for UFSTensor::addSubTensor");
 	@<set shift for |addSubTensor|@>;
 	for (Tensor::index tar = begin(); tar != end(); ++tar) {
 		IntSequence c(tar.getCoor());
 		c.sort();
 		c.add(-1, shift);
 		if (c.isPositive() && c.less(t.getDims().getNVX())) {
 			Tensor::index from(&t, c);
 			addColumn(t, *from, *tar);
 		}
 	}
 }
@ Here we go through all columns, find a column of folded index, and
 then copy the column data. Finding the index is done by sorting the
 integer sequence.
@<|UFSTensor::unfoldData| code@>=
 void UFSTensor::unfoldData()
 {
 	for (index in = begin(); in != end(); ++in) {
 		IntSequence v(in.getCoor());
 		v.sort();
 		index tmp(this, v);
 		copyColumn(*tmp, *in);
 	}
 }
@ End of {\tt fs\_tensor.cpp} file.
--- a/dynare++/tl/cc/fs_tensor.hh
+++ b/dynare++/tl/cc/fs_tensor.hh
@ -0,0 +1,141 @@
 // Copyright 2004, Ondra Kamenik
 // Full symmetry tensor.
 /* Here we define folded and unfolded tensors for full symmetry. All
   tensors from here are identifying the multidimensional index with
   columns. */
 #ifndef FS_TENSOR_H
 #define FS_TENSOR_H
 #include "tensor.hh"
 #include "symmetry.hh"
 class FGSTensor;
 class UGSTensor;
 class FRSingleTensor;
 class FSSparseTensor;
 /* Folded tensor with full symmetry maintains only information about
   number of symmetrical variables |nv|. Further, we implement what is
   left from the super class |FTensor|.
   We implement |getOffset| which should be used with care since
   its complexity.
   We implement a method adding a given general symmetry tensor to the
   full symmetry tensor supposing the variables of the general symmetry
   tensor are stacked giving only one variable of the full symmetry
   tensor. For instance, if $x=[y^T, u^T]^T$, then we can add tensor
   $\left[g_{y^2u}\right]$ to tensor $g_{x^3}$. This is done in method
   |addSubTensor|. Consult |@<|FGSTensor| class declaration@>| to know
   what is general symmetry tensor. */
 class UFSTensor;
 class FFSTensor : public FTensor
 {
  int nv;
 public:
  /* Here are the constructors. The second constructor constructs a
     tensor by one-dimensional contraction from the higher dimensional
     tensor |t|. This is, it constructs a tensor
     $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
     \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
     See implementation |@<|FFSTensor| contraction constructor@>| for details.
     The next constructor converts from sparse tensor (which is fully
     symmetric and folded by nature).
     The fourth constructs object from unfolded fully symmetric.
     The fifth constructs a subtensor of selected rows. */
  FFSTensor(int r, int nvar, int d)
    : FTensor(along_col, IntSequence(d, nvar),
              r, calcMaxOffset(nvar, d), d), nv(nvar)
  {
  }
  FFSTensor(const FFSTensor &t, const ConstVector &x);
  FFSTensor(const FSSparseTensor &t);
  FFSTensor(const FFSTensor &ft)
    : FTensor(ft), nv(ft.nv)
  {
  }
  FFSTensor(const UFSTensor &ut);
  FFSTensor(int first_row, int num, FFSTensor &t)
    : FTensor(first_row, num, t), nv(t.nv)
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  UTensor&unfold() const;
  Symmetry
  getSym() const
  {
    return Symmetry(dimen());
  }
  int getOffset(const IntSequence &v) const;
  void addSubTensor(const FGSTensor &t);
  int
  nvar() const
  {
    return nv;
  }
  static int calcMaxOffset(int nvar, int d);
 };
 /* Unfolded fully symmetric tensor is almost the same in structure as
   |FFSTensor|, but the method |unfoldData|. It takes columns which also
   exist in folded version and copies them to all their symmetrical
   locations. This is useful when constructing unfolded tensor from
   folded one. */
 class UFSTensor : public UTensor
 {
  int nv;
 public:
  UFSTensor(int r, int nvar, int d)
    : UTensor(along_col, IntSequence(d, nvar),
              r, calcMaxOffset(nvar, d), d), nv(nvar)
  {
  }
  UFSTensor(const UFSTensor &t, const ConstVector &x);
  UFSTensor(const UFSTensor &ut)
    : UTensor(ut), nv(ut.nv)
  {
  }
  UFSTensor(const FFSTensor &ft);
  UFSTensor(int first_row, int num, UFSTensor &t)
    : UTensor(first_row, num, t), nv(t.nv)
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  FTensor&fold() const;
  Symmetry
  getSym() const
  {
    return Symmetry(dimen());
  }
  int getOffset(const IntSequence &v) const;
  void addSubTensor(const UGSTensor &t);
  int
  nvar() const
  {
    return nv;
  }
  static int
  calcMaxOffset(int nvar, int d)
  {
    return power(nvar, d);
  }
 private:
  void unfoldData();
 };
 #endif
--- a/dynare++/tl/cc/fs_tensor.hweb
+++ b/dynare++/tl/cc/fs_tensor.hweb
@ -1,129 +0,0 @@
@q $Id: fs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Full symmetry tensor. Start of {\tt fs\_tensor.h} file.
 Here we define folded and unfolded tensors for full symmetry. All
 tensors from here are identifying the multidimensional index with
 columns.
@c
 #ifndef FS_TENSOR_H
 #define FS_TENSOR_H
 #include "tensor.h"
 #include "symmetry.h"
 class FGSTensor;
 class UGSTensor;
 class FRSingleTensor;
 class FSSparseTensor;
@<|FFSTensor| class declaration@>;
@<|UFSTensor| class declaration@>;
 #endif
@ Folded tensor with full symmetry maintains only information about
 number of symmetrical variables |nv|. Further, we implement what is
 left from the super class |FTensor|.
 We implement |getOffset| which should be used with care since
 its complexity.
 We implement a method adding a given general symmetry tensor to the
 full symmetry tensor supposing the variables of the general symmetry
 tensor are stacked giving only one variable of the full symmetry
 tensor. For instance, if $x=[y^T, u^T]^T$, then we can add tensor
 $\left[g_{y^2u}\right]$ to tensor $g_{x^3}$. This is done in method
 |addSubTensor|. Consult |@<|FGSTensor| class declaration@>| to know
 what is general symmetry tensor.
@<|FFSTensor| class declaration@>=
 class UFSTensor;
 class FFSTensor : public FTensor {
 	int nv;
 public:@;
    @<|FFSTensor| constructor declaration@>;
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	UTensor& unfold() const;
 	Symmetry getSym() const
 		{@+ return Symmetry(dimen());@+}
 	int getOffset(const IntSequence& v) const;
 	void addSubTensor(const FGSTensor& t);
 	int nvar() const
 		{@+return nv;@+}
 	static int calcMaxOffset(int nvar, int d);
 };
@ Here are the constructors. The second constructor constructs a
 tensor by one-dimensional contraction from the higher dimensional
 tensor |t|. This is, it constructs a tensor
 $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
 \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
 See implementation |@<|FFSTensor| contraction constructor@>| for details.
 The next constructor converts from sparse tensor (which is fully
 symmetric and folded by nature).
 The fourth constructs object from unfolded fully symmetric.
 The fifth constructs a subtensor of selected rows.
@<|FFSTensor| constructor declaration@>=
 	FFSTensor(int r, int nvar, int d)
 		: FTensor(along_col, IntSequence(d, nvar),
 				  r, calcMaxOffset(nvar, d), d), nv(nvar)@+ {}
 	FFSTensor(const FFSTensor& t, const ConstVector& x);
 	FFSTensor(const FSSparseTensor& t);
 	FFSTensor(const FFSTensor& ft)
 		: FTensor(ft), nv(ft.nv)@+ {}
 	FFSTensor(const UFSTensor& ut);
 	FFSTensor(int first_row, int num, FFSTensor& t)
 		: FTensor(first_row, num, t), nv(t.nv)@+ {}
@ Unfolded fully symmetric tensor is almost the same in structure as
 |FFSTensor|, but the method |unfoldData|. It takes columns which also
 exist in folded version and copies them to all their symmetrical
 locations. This is useful when constructing unfolded tensor from
 folded one.
@<|UFSTensor| class declaration@>=
 class UFSTensor : public UTensor {
 	int nv;
 public:@;
 	@<|UFSTensor| constructor declaration@>;
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	FTensor& fold() const;
 	Symmetry getSym() const
 		{@+ return Symmetry(dimen());@+}
 	int getOffset(const IntSequence& v) const;
 	void addSubTensor(const UGSTensor& t);
 	int nvar() const
 		{@+ return nv;@+}
 	static int calcMaxOffset(int nvar, int d)
 		{@+ return power(nvar, d);@+}
 private:@;
 	void unfoldData();
 };
@ 
@<|UFSTensor| constructor declaration@>=
 	UFSTensor(int r, int nvar, int d)
 		: UTensor(along_col, IntSequence(d, nvar),
 				  r, calcMaxOffset(nvar, d), d), nv(nvar)@+ {}
 	UFSTensor(const UFSTensor& t, const ConstVector& x);
 	UFSTensor(const UFSTensor& ut)
 		: UTensor(ut), nv(ut.nv)@+ {}
 	UFSTensor(const FFSTensor& ft);
 	UFSTensor(int first_row, int num, UFSTensor& t)
 		: UTensor(first_row, num, t), nv(t.nv)@+ {}
@ End of {\tt fs\_tensor.h} file.
--- a/dynare++/tl/cc/gs_tensor.cc
+++ b/dynare++/tl/cc/gs_tensor.cc
@ -0,0 +1,490 @@
 // Copyright 2004, Ondra Kamenik
 #include "gs_tensor.hh"
 #include "sparse_tensor.hh"
 #include "tl_exception.hh"
 #include "kron_prod.hh"
 /* This constructs the tensor dimensions for slicing. See
   |@<|TensorDimens| class declaration@>| for details. */
 TensorDimens::TensorDimens(const IntSequence &ss, const IntSequence &coor)
  : nvs(ss),
    sym(ss.size(), ""),
    nvmax(coor.size(), 0)
 {
  TL_RAISE_IF(!coor.isSorted(),
              "Coordinates not sorted in TensorDimens slicing constructor");
  TL_RAISE_IF(coor[0] < 0 || coor[coor.size()-1] >= ss.size(),
              "A coordinate out of stack range in TensorDimens slicing constructor");
  for (int i = 0; i < coor.size(); i++)
    {
      sym[coor[i]]++;
      nvmax[i] = ss[coor[i]];
    }
 }
 /* Number of unfold offsets is a product of all members of |nvmax|. */
 int
 TensorDimens::calcUnfoldMaxOffset() const
 {
  return nvmax.mult();
 }
 /* Number of folded offsets is a product of all unfold offsets within
   each equivalence class of the symmetry. */
 int
 TensorDimens::calcFoldMaxOffset() const
 {
  int res = 1;
  for (int i = 0; i < nvs.size(); i++)
    {
      if (nvs[i] == 0 && sym[i] > 0)
        return 0;
      if (sym[i] > 0)
        res *= Tensor::noverk(nvs[i]+sym[i]-1, sym[i]);
    }
  return res;
 }
 /* Here we implement offset calculation for folded general symmetry
   tensor. The offset of a given sequence is calculated by breaking the
   sequence to subsequences according to the symmetry. The offset is
   orthogonal with respect to the blocks, this means that indexing within
   the blocks is independent. If there are two blocks, for instance, then
   the offset will be an offset within the outer block (the first)
   multiplied with all offsets of the inner block (last) plus an offset
   within the second block.
   Generally, the resulting offset $r$ will be
   $$\sum_{i=1}^s r_i\cdot\left(\prod_{j=i+1}^sn_j\right),$$
   where $s$ is a number of blocks (|getSym().num()|), $r_i$ is an offset
   within $i$-th block, and $n_j$ is a number of all offsets in $j$-th
   block.
   In the code, we go from the innermost to the outermost, maintaining the
   product in |pow|. */
 int
 TensorDimens::calcFoldOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in TensorDimens::getFoldOffset");
  int res = 0;
  int pow = 1;
  int blstart = v.size();
  for (int ibl = getSym().num()-1; ibl >= 0; ibl--)
    {
      int bldim = getSym()[ibl];
      if (bldim > 0)
        {
          blstart -= bldim;
          int blnvar = getNVX()[blstart];
          IntSequence subv(v, blstart, blstart+bldim);
          res += FTensor::getOffset(subv, blnvar)*pow;
          pow *= FFSTensor::calcMaxOffset(blnvar, bldim);
        }
    }
  TL_RAISE_IF(blstart != 0,
              "Error in tracing symmetry in TensorDimens::getFoldOffset");
  return res;
 }
 /* In order to find the predecessor of index within folded generally
   symmetric tensor, note, that a decrease action in $i$-th partition of
   symmetric indices can happen only if all indices in all subsequent
   partitions are zero. Then the decrease action of whole the index
   consists of decrease action of the first nonzero partition from the
   right, and setting these trailing zero partitions to their maximum
   indices.
   So we set |iblock| to the number of last partitions. During the
   execution, |block_first|, and |block_last| will point to the first
   element of |iblock| and, first element of following block.
   Then we check for all trailing zero partitions, set them to their
   maximums and return |iblock| to point to the first non-zero partition
   (or the first partition). Then for this partition, we decrease the
   index (fully symmetric within that partition). */
 void
 TensorDimens::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(getNVX().size() != v.size(),
              "Wrong size of input/output sequence in TensorDimens::decrement");
  int iblock = getSym().num()-1;
  int block_last = v.size();
  int block_first = block_last-getSym()[iblock];
  // check for zero trailing blocks
  while (iblock > 0 && v[block_last-1] == 0)
    {
      for (int i = block_first; i < block_last; i++)
        v[i] = getNVX(i); // equivalent to |nvs[iblock]|
      iblock--;
      block_last = block_first;
      block_first -= getSym()[iblock];
    }
  // decrease the non-zero block
  IntSequence vtmp(v, block_first, block_last);
  FTensor::decrement(vtmp, getNVX(block_first));
 }
 // |FGSTensor| conversion from |UGSTensor|
 /* Here we go through columns of folded, calculate column of unfolded,
   and copy data. */
 FGSTensor::FGSTensor(const UGSTensor &ut)
  : FTensor(along_col, ut.tdims.getNVX(), ut.nrows(),
            ut.tdims.calcFoldMaxOffset(), ut.dimen()),
    tdims(ut.tdims)
 {
  for (index ti = begin(); ti != end(); ++ti)
    {
      index ui(&ut, ti.getCoor());
      copyColumn(ut, *ui, *ti);
    }
 }
 // |FGSTensor| slicing from |FSSparseTensor|
 /* Here is the code of slicing constructor from the sparse tensor. We
   first calculate coordinates of first and last index of the slice
   within the sparse tensor (these are |lb| and |ub|), and then we
   iterate through all items between them (in lexicographical ordering of
   sparse tensor), and check whether an item is between the |lb| and |ub|
   in Cartesian ordering (this corresponds to belonging to the
   slices). If it belongs, then we subtract the lower bound |lb| to
   obtain coordinates in the |this| tensor and we copy the item. */
 FGSTensor::FGSTensor(const FSSparseTensor &t, const IntSequence &ss,
                     const IntSequence &coor, const TensorDimens &td)
  : FTensor(along_col, td.getNVX(), t.nrows(),
            td.calcFoldMaxOffset(), td.dimen()),
    tdims(td)
 {
  // set |lb| and |ub| to lower and upper bounds of indices
  /* Here we first set |s_offsets| to offsets of partitions whose lengths
     are given by |ss|. So |s_offsets| is a cumulative sum of |ss|.
     Then we create |lb| to be coordinates of the possibly first index from
     the slice, and |ub| to be coordinates of possibly last index of the
     slice. */
  IntSequence s_offsets(ss.size(), 0);
  for (int i = 1; i < ss.size(); i++)
    s_offsets[i] = s_offsets[i-1] + ss[i-1];
  IntSequence lb(coor.size());
  IntSequence ub(coor.size());
  for (int i = 0; i < coor.size(); i++)
    {
      lb[i] = s_offsets[coor[i]];
      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
    }
  zeros();
  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
    {
      if (lb.lessEq((*run).first) && (*run).first.lessEq(ub))
        {
          IntSequence c((*run).first);
          c.add(-1, lb);
          Tensor::index ind(this, c);
          TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
                      "Internal error in slicing constructor of FGSTensor");
          get((*run).second.first, *ind) = (*run).second.second;
        }
    }
 }
 // |FGSTensor| slicing from |FFSTensor|
 /* The code is similar to |@<|FGSTensor| slicing from |FSSparseTensor|@>|. */
 FGSTensor::FGSTensor(const FFSTensor &t, const IntSequence &ss,
                     const IntSequence &coor, const TensorDimens &td)
  : FTensor(along_col, td.getNVX(), t.nrows(),
            td.calcFoldMaxOffset(), td.dimen()),
    tdims(td)
 {
  if (ncols() == 0)
    return;
  // set |lb| and |ub| to lower and upper bounds of indices
  /* Same code as in the previous converting constructor */
  IntSequence s_offsets(ss.size(), 0);
  for (int i = 1; i < ss.size(); i++)
    s_offsets[i] = s_offsets[i-1] + ss[i-1];
  IntSequence lb(coor.size());
  IntSequence ub(coor.size());
  for (int i = 0; i < coor.size(); i++)
    {
      lb[i] = s_offsets[coor[i]];
      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
    }
  zeros();
  Tensor::index lbi(&t, lb);
  Tensor::index ubi(&t, ub);
  ++ubi;
  for (Tensor::index run = lbi; run != ubi; ++run)
    {
      if (lb.lessEq(run.getCoor()) && run.getCoor().lessEq(ub))
        {
          IntSequence c(run.getCoor());
          c.add(-1, lb);
          Tensor::index ind(this, c);
          TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
                      "Internal error in slicing constructor of FGSTensor");
          copyColumn(t, *run, *ind);
        }
    }
 }
 // |FGSTensor| conversion from |GSSparseTensor|
 FGSTensor::FGSTensor(const GSSparseTensor &t)
  : FTensor(along_col, t.getDims().getNVX(), t.nrows(),
            t.getDims().calcFoldMaxOffset(), t.dimen()), tdims(t.getDims())
 {
  zeros();
  for (FSSparseTensor::const_iterator it = t.getMap().begin();
       it != t.getMap().end(); ++it)
    {
      index ind(this, (*it).first);
      get((*it).second.first, *ind) = (*it).second.second;
    }
 }
 /* First we increment as unfolded, then we must monotonize within
   partitions defined by the symmetry. This is done by
   |IntSequence::pmonotone|. */
 void
 FGSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in FGSTensor::increment");
  UTensor::increment(v, tdims.getNVX());
  v.pmonotone(tdims.getSym());
 }
 /* Return unfolded version of the tensor. */
 UTensor &
 FGSTensor::unfold() const
 {
  return *(new UGSTensor(*this));
 }
 /* Here we implement the contraction
   $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
   \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
   \left[c\right]^{\beta_1\ldots\beta_j}
   $$
   More generally, $x^i$ and $z^k$ can represent also general symmetries.
   The operation can be rewritten as a matrix product
   $$\left[t_{x^iy^jz^k}\right]\cdot\left(I_l\otimes c\otimes I_r\right)$$
   where $l$ is a number of columns in tensor with symmetry on the left
   (i.e. $x^i$), and $r$ is a number of columns in tensor with a symmetry
   on the right (i.e. $z^k$). The code proceeds accordingly. We first
   form two symmetries |sym_left| and |sym_right|, then calculate the
   number of columns |dleft|$=l$ and |dright|$=r$, form the Kronecker
   product and multiply and add.
   The input parameter |i| is the order of a variable being contracted
   starting from 0. */
 void
 FGSTensor::contractAndAdd(int i, FGSTensor &out,
                          const FRSingleTensor &col) const
 {
  TL_RAISE_IF(i < 0 || i >= getSym().num(),
              "Wrong index for FGSTensor::contractAndAdd");
  TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
              "Wrong dimensions for FGSTensor::contractAndAdd");
  // set |sym_left| and |sym_right| to symmetries around |i|
  /* Here we have a symmetry of |this| tensor and we have to set
     |sym_left| to the subsymmetry left from the |i|-th variable and
     |sym_right| to the subsymmetry right from the |i|-th variable. So we
     copy first all the symmetry and then put zeros to the left for
     |sym_right| and to the right for |sym_left|. */
  Symmetry sym_left(getSym());
  Symmetry sym_right(getSym());
  for (int j = 0; j < getSym().num(); j++)
    {
      if (j <= i)
        sym_right[j] = 0;
      if (j >= i)
        sym_left[j] = 0;
    }
  int dleft = TensorDimens(sym_left, tdims.getNVS()).calcFoldMaxOffset();
  int dright = TensorDimens(sym_right, tdims.getNVS()).calcFoldMaxOffset();
  KronProdAll kp(3);
  kp.setUnit(0, dleft);
  kp.setMat(1, col);
  kp.setUnit(2, dright);
  FGSTensor tmp(out.nrows(), out.getDims());
  kp.mult(*this, tmp);
  out.add(1.0, tmp);
 }
 /* Here we go through folded tensor, and each index we convert to index
   of the unfolded tensor and copy the data to the unfolded. Then we
   unfold data within the unfolded tensor. */
 UGSTensor::UGSTensor(const FGSTensor &ft)
  : UTensor(along_col, ft.tdims.getNVX(), ft.nrows(),
            ft.tdims.calcUnfoldMaxOffset(), ft.dimen()),
    tdims(ft.tdims)
 {
  for (index fi = ft.begin(); fi != ft.end(); ++fi)
    {
      index ui(this, fi.getCoor());
      copyColumn(ft, *fi, *ui);
    }
  unfoldData();
 }
 // |UGSTensor| slicing from |FSSparseTensor|
 /* This makes a folded slice from the sparse tensor and unfolds it. */
 UGSTensor::UGSTensor(const FSSparseTensor &t, const IntSequence &ss,
                     const IntSequence &coor, const TensorDimens &td)
  : UTensor(along_col, td.getNVX(), t.nrows(),
            td.calcUnfoldMaxOffset(), td.dimen()),
    tdims(td)
 {
  if (ncols() == 0)
    return;
  FGSTensor ft(t, ss, coor, td);
  for (index fi = ft.begin(); fi != ft.end(); ++fi)
    {
      index ui(this, fi.getCoor());
      copyColumn(ft, *fi, *ui);
    }
  unfoldData();
 }
 // |UGSTensor| slicing from |UFSTensor|
 /* This makes a folded slice from dense and unfolds it. */
 UGSTensor::UGSTensor(const UFSTensor &t, const IntSequence &ss,
                     const IntSequence &coor, const TensorDimens &td)
  : UTensor(along_col, td.getNVX(), t.nrows(),
            td.calcUnfoldMaxOffset(), td.dimen()),
    tdims(td)
 {
  FFSTensor folded(t);
  FGSTensor ft(folded, ss, coor, td);
  for (index fi = ft.begin(); fi != ft.end(); ++fi)
    {
      index ui(this, fi.getCoor());
      copyColumn(ft, *fi, *ui);
    }
  unfoldData();
 }
 // |UGSTensor| increment and decrement codes
 /* Clear, just call |UTensor| static methods. */
 void
 UGSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UGSTensor::increment");
  UTensor::increment(v, tdims.getNVX());
 }
 void
 UGSTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UGSTensor::decrement");
  UTensor::decrement(v, tdims.getNVX());
 }
 /* Return a new instance of folded version. */
 FTensor &
 UGSTensor::fold() const
 {
  return *(new FGSTensor(*this));
 }
 /* Return an offset of a given index. */
 int
 UGSTensor::getOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in UGSTensor::getOffset");
  return UTensor::getOffset(v, tdims.getNVX());
 }
 /* Unfold all data. We go through all the columns and for each we
   obtain an index of the first equivalent, and copy the data. */
 void
 UGSTensor::unfoldData()
 {
  for (index in = begin(); in != end(); ++in)
    copyColumn(*(getFirstIndexOf(in)), *in);
 }
 /* Here we return the first index which is equivalent in the symmetry
   to the given index. It is a matter of sorting all the symmetry
   partitions of the index. */
 Tensor::index
 UGSTensor::getFirstIndexOf(const index &in) const
 {
  IntSequence v(in.getCoor());
  int last = 0;
  for (int i = 0; i < tdims.getSym().num(); i++)
    {
      IntSequence vtmp(v, last, last+tdims.getSym()[i]);
      vtmp.sort();
      last += tdims.getSym()[i];
    }
  return index(this, v);
 }
 /* Here is perfectly same code with the same semantics as in
   |@<|FGSTensor::contractAndAdd| code@>|. */
 void
 UGSTensor::contractAndAdd(int i, UGSTensor &out,
                          const URSingleTensor &col) const
 {
  TL_RAISE_IF(i < 0 || i >= getSym().num(),
              "Wrong index for UGSTensor::contractAndAdd");
  TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
              "Wrong dimensions for UGSTensor::contractAndAdd");
  // set |sym_left| and |sym_right| to symmetries around |i|
  /* Same code as in FGSTensor::contractAndAdd */
  Symmetry sym_left(getSym());
  Symmetry sym_right(getSym());
  for (int j = 0; j < getSym().num(); j++)
    {
      if (j <= i)
        sym_right[j] = 0;
      if (j >= i)
        sym_left[j] = 0;
    }
  int dleft = TensorDimens(sym_left, tdims.getNVS()).calcUnfoldMaxOffset();
  int dright = TensorDimens(sym_right, tdims.getNVS()).calcUnfoldMaxOffset();
  KronProdAll kp(3);
  kp.setUnit(0, dleft);
  kp.setMat(1, col);
  kp.setUnit(2, dright);
  UGSTensor tmp(out.nrows(), out.getDims());
  kp.mult(*this, tmp);
  out.add(1.0, tmp);
 }
--- a/dynare++/tl/cc/gs_tensor.cweb
+++ b/dynare++/tl/cc/gs_tensor.cweb
@ -1,501 +0,0 @@
@q $Id: gs_tensor.cweb 425 2005-08-16 15:18:01Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt gs\_tensor.cpp} file.
@c
 #include "gs_tensor.h"
 #include "sparse_tensor.h"
 #include "tl_exception.h"
 #include "kron_prod.h"
@<|TensorDimens| constructor code@>;
@<|TensorDimens::calcUnfoldMaxOffset| code@>;
@<|TensorDimens::calcFoldMaxOffset| code@>;
@<|TensorDimens::calcFoldOffset| code@>;
@<|TensorDimens::decrement| code@>;
@<|FGSTensor| conversion from |UGSTensor|@>;
@<|FGSTensor| slicing from |FSSparseTensor|@>;
@<|FGSTensor| slicing from |FFSTensor|@>;
@<|FGSTensor| conversion from |GSSparseTensor|@>;
@<|FGSTensor::increment| code@>;
@<|FGSTensor::unfold| code@>;
@<|FGSTensor::contractAndAdd| code@>;
@<|UGSTensor| conversion from |FGSTensor|@>;
@<|UGSTensor| slicing from |FSSparseTensor|@>;
@<|UGSTensor| slicing from |UFSTensor|@>;
@<|UGSTensor| increment and decrement codes@>;
@<|UGSTensor::fold| code@>;
@<|UGSTensor::getOffset| code@>;
@<|UGSTensor::unfoldData| code@>;
@<|UGSTensor::getFirstIndexOf| code@>;
@<|UGSTensor::contractAndAdd| code@>;
@ This constructs the tensor dimensions for slicing. See
 |@<|TensorDimens| class declaration@>| for details.
@<|TensorDimens| constructor code@>=
 TensorDimens::TensorDimens(const IntSequence& ss, const IntSequence& coor)
 	: nvs(ss),
 	  sym(ss.size(), ""),
 	  nvmax(coor.size(), 0)
 {
 	TL_RAISE_IF(! coor.isSorted(),
 				"Coordinates not sorted in TensorDimens slicing constructor");
 	TL_RAISE_IF(coor[0] < 0 || coor[coor.size()-1] >= ss.size(),
 				"A coordinate out of stack range in TensorDimens slicing constructor");
 	for (int i = 0; i < coor.size(); i++) {
 		sym[coor[i]]++;
 		nvmax[i] = ss[coor[i]];
 	}
 }
@ Number of unfold offsets is a product of all members of |nvmax|.
@<|TensorDimens::calcUnfoldMaxOffset| code@>=
 int TensorDimens::calcUnfoldMaxOffset() const
 {
 	return nvmax.mult();
 }
@ Number of folded offsets is a product of all unfold offsets within
 each equivalence class of the symmetry.
@<|TensorDimens::calcFoldMaxOffset| code@>=
 int TensorDimens::calcFoldMaxOffset() const
 {
 	int res = 1;
 	for (int i = 0; i < nvs.size(); i++) {
 		if (nvs[i] == 0 && sym[i] > 0)
 			return 0;
 		if (sym[i] > 0)
 			res *= Tensor::noverk(nvs[i]+sym[i]-1, sym[i]);
 	}
 	return res;
 }
@ Here we implement offset calculation for folded general symmetry
 tensor. The offset of a given sequence is calculated by breaking the
 sequence to subsequences according to the symmetry. The offset is
 orthogonal with respect to the blocks, this means that indexing within
 the blocks is independent. If there are two blocks, for instance, then
 the offset will be an offset within the outer block (the first)
 multiplied with all offsets of the inner block (last) plus an offset
 within the second block.
 Generally, the resulting offset $r$ will be
 $$\sum_{i=1}^s r_i\cdot\left(\prod_{j=i+1}^sn_j\right),$$
 where $s$ is a number of blocks (|getSym().num()|), $r_i$ is an offset
 within $i$-th block, and $n_j$ is a number of all offsets in $j$-th
 block.
 In the code, we go from the innermost to the outermost, maintaining the
 product in |pow|.
@<|TensorDimens::calcFoldOffset| code@>=
 int TensorDimens::calcFoldOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in TensorDimens::getFoldOffset");
 	int res = 0;
 	int pow = 1;
 	int blstart = v.size();
 	for (int ibl = getSym().num()-1; ibl >= 0; ibl--) {
 		int bldim = getSym()[ibl];
 		if (bldim > 0) {
 			blstart -= bldim;
 			int blnvar = getNVX()[blstart];
 			IntSequence subv(v, blstart, blstart+bldim);
 			res += FTensor::getOffset(subv, blnvar)*pow;
 			pow *= FFSTensor::calcMaxOffset(blnvar, bldim);
 		}
 	}
 	TL_RAISE_IF(blstart != 0,
 				"Error in tracing symmetry in TensorDimens::getFoldOffset");
 	return res;
 }
@ In order to find the predecessor of index within folded generally
 symmetric tensor, note, that a decrease action in $i$-th partition of
 symmetric indices can happen only if all indices in all subsequent
 partitions are zero. Then the decrease action of whole the index
 consists of decrease action of the first nonzero partition from the
 right, and setting these trailing zero partitions to their maximum
 indices.
 So we set |iblock| to the number of last partitions. During the
 execution, |block_first|, and |block_last| will point to the first
 element of |iblock| and, first element of following block.
 Then we check for all trailing zero partitions, set them to their
 maximums and return |iblock| to point to the first non-zero partition
 (or the first partition). Then for this partition, we decrease the
 index (fully symmetric within that partition).
@<|TensorDimens::decrement| code@>=
 void TensorDimens::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(getNVX().size() != v.size(),
 				"Wrong size of input/output sequence in TensorDimens::decrement");
 	int iblock = getSym().num()-1;
 	int block_last = v.size();
 	int block_first = block_last-getSym()[iblock];
 	@<check for zero trailing blocks@>;
 	@<decrease the non-zero block@>;
 }
@ 
@<check for zero trailing blocks@>=
 	while (iblock > 0 && v[block_last-1] == 0) {
 		for (int i = block_first; i < block_last; i++)
 			v[i] = getNVX(i); // equivalent to |nvs[iblock]|
 		iblock--;
 		block_last = block_first;
 		block_first -= getSym()[iblock];
 	}
@ 
@<decrease the non-zero block@>=
 	IntSequence vtmp(v, block_first, block_last);
 	FTensor::decrement(vtmp, getNVX(block_first));
@ Here we go through columns of folded, calculate column of unfolded,
 and copy data.
@<|FGSTensor| conversion from |UGSTensor|@>=
 FGSTensor::FGSTensor(const UGSTensor& ut)
 	: FTensor(along_col, ut.tdims.getNVX(), ut.nrows(),
 			  ut.tdims.calcFoldMaxOffset(), ut.dimen()),
 	  tdims(ut.tdims)
 {
 	for (index ti = begin(); ti != end(); ++ti) {
 		index ui(&ut, ti.getCoor());
 		copyColumn(ut, *ui, *ti);
 	}
 }
@ Here is the code of slicing constructor from the sparse tensor. We
 first calculate coordinates of first and last index of the slice
 within the sparse tensor (these are |lb| and |ub|), and then we
 iterate through all items between them (in lexicographical ordering of
 sparse tensor), and check whether an item is between the |lb| and |ub|
 in Cartesian ordering (this corresponds to belonging to the
 slices). If it belongs, then we subtract the lower bound |lb| to
 obtain coordinates in the |this| tensor and we copy the item.
@<|FGSTensor| slicing from |FSSparseTensor|@>=
 FGSTensor::FGSTensor(const FSSparseTensor& t, const IntSequence& ss,
 					 const IntSequence& coor, const TensorDimens& td)
 	: FTensor(along_col, td.getNVX(), t.nrows(),
 			  td.calcFoldMaxOffset(), td.dimen()),
 	  tdims(td)
 {
 	@<set |lb| and |ub| to lower and upper bounds of indices@>;
 	zeros();
 	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
 	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
 	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
 		if (lb.lessEq((*run).first) && (*run).first.lessEq(ub)) {
 			IntSequence c((*run).first);
 			c.add(-1, lb);
 			Tensor::index ind(this, c);
 			TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
 						"Internal error in slicing constructor of FGSTensor");
 			get((*run).second.first, *ind) = (*run).second.second;
 		}
 	}
 }
@ Here we first set |s_offsets| to offsets of partitions whose lengths
 are given by |ss|. So |s_offsets| is a cumulative sum of |ss|.
 Then we create |lb| to be coordinates of the possibly first index from
 the slice, and |ub| to be coordinates of possibly last index of the
 slice.
@<set |lb| and |ub| to lower and upper bounds of indices@>=
 	IntSequence s_offsets(ss.size(), 0);
 	for (int i = 1; i < ss.size(); i++)
 		s_offsets[i] = s_offsets[i-1] + ss[i-1];
 	IntSequence lb(coor.size());
 	IntSequence ub(coor.size());
 	for (int i = 0; i < coor.size(); i++) {
 		lb[i] = s_offsets[coor[i]];
 		ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
 	}
@ The code is similar to |@<|FGSTensor| slicing from |FSSparseTensor|@>|.
@<|FGSTensor| slicing from |FFSTensor|@>=
 FGSTensor::FGSTensor(const FFSTensor& t, const IntSequence& ss,
 					 const IntSequence& coor, const TensorDimens& td)
 	: FTensor(along_col, td.getNVX(), t.nrows(),
 			  td.calcFoldMaxOffset(), td.dimen()),
 	  tdims(td)
 {
 	if (ncols() == 0)
 		return;
 	@<set |lb| and |ub| to lower and upper bounds of indices@>;
 	zeros();
 	Tensor::index lbi(&t, lb);
 	Tensor::index ubi(&t, ub);
 	++ubi;
 	for (Tensor::index run = lbi; run != ubi; ++run) {
 		if (lb.lessEq(run.getCoor()) && run.getCoor().lessEq(ub)) {
 			IntSequence c(run.getCoor());
 			c.add(-1, lb);
 			Tensor::index ind(this, c);
 			TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
 						"Internal error in slicing constructor of FGSTensor");
 			copyColumn(t, *run, *ind);
 		}
 	}
 }
@ 
@<|FGSTensor| conversion from |GSSparseTensor|@>=
 FGSTensor::FGSTensor(const GSSparseTensor& t)
 	: FTensor(along_col, t.getDims().getNVX(), t.nrows(),
 			  t.getDims().calcFoldMaxOffset(), t.dimen()), tdims(t.getDims())
 {
 	zeros();
 	for (FSSparseTensor::const_iterator it = t.getMap().begin();
 		 it != t.getMap().end(); ++it) {
 		index ind(this, (*it).first);
 		get((*it).second.first, *ind) = (*it).second.second;
 	}
 }
@ First we increment as unfolded, then we must monotonize within
 partitions defined by the symmetry. This is done by
 |IntSequence::pmonotone|.
@<|FGSTensor::increment| code@>=
 void FGSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in FGSTensor::increment");
 	UTensor::increment(v, tdims.getNVX());
 	v.pmonotone(tdims.getSym());
 }
@ Return unfolded version of the tensor.
@<|FGSTensor::unfold| code@>=
 UTensor& FGSTensor::unfold() const
 {
 	return *(new UGSTensor(*this));
 }
@ Here we implement the contraction
 $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
 \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
 \left[c\right]^{\beta_1\ldots\beta_j}
 $$
 More generally, $x^i$ and $z^k$ can represent also general symmetries. 
 The operation can be rewritten as a matrix product
 $$\left[t_{x^iy^jz^k}\right]\cdot\left(I_l\otimes c\otimes I_r\right)$$
 where $l$ is a number of columns in tensor with symmetry on the left
 (i.e. $x^i$), and $r$ is a number of columns in tensor with a symmetry
 on the right (i.e. $z^k$). The code proceeds accordingly. We first
 form two symmetries |sym_left| and |sym_right|, then calculate the
 number of columns |dleft|$=l$ and |dright|$=r$, form the Kronecker
 product and multiply and add.
 The input parameter |i| is the order of a variable being contracted
 starting from 0.
@<|FGSTensor::contractAndAdd| code@>=
 void FGSTensor::contractAndAdd(int i, FGSTensor& out,
 							   const FRSingleTensor& col) const
 {
 	TL_RAISE_IF(i < 0 || i >= getSym().num(),
 				"Wrong index for FGSTensor::contractAndAdd");
 	TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
 				"Wrong dimensions for FGSTensor::contractAndAdd");
 	@<set |sym_left| and |sym_right| to symmetries around |i|@>;
 	int dleft = TensorDimens(sym_left, tdims.getNVS()).calcFoldMaxOffset();
 	int dright = TensorDimens(sym_right, tdims.getNVS()).calcFoldMaxOffset();
 	KronProdAll kp(3);
 	kp.setUnit(0, dleft);
 	kp.setMat(1, col);
 	kp.setUnit(2, dright);
 	FGSTensor tmp(out.nrows(), out.getDims());
 	kp.mult(*this, tmp);
 	out.add(1.0, tmp);
 }
@ Here we have a symmetry of |this| tensor and we have to set
 |sym_left| to the subsymmetry left from the |i|-th variable and
 |sym_right| to the subsymmetry right from the |i|-th variable. So we
 copy first all the symmetry and then put zeros to the left for
 |sym_right| and to the right for |sym_left|.
@<set |sym_left| and |sym_right| to symmetries around |i|@>=
 	Symmetry sym_left(getSym());
 	Symmetry sym_right(getSym());
 	for (int j = 0; j < getSym().num(); j++) {
 		if (j <= i)
 			sym_right[j] = 0;
 		if (j >= i)
 			sym_left[j] = 0;
 	}
@ Here we go through folded tensor, and each index we convert to index
 of the unfolded tensor and copy the data to the unfolded. Then we
 unfold data within the unfolded tensor.
@<|UGSTensor| conversion from |FGSTensor|@>=
 UGSTensor::UGSTensor(const FGSTensor& ft)
 	: UTensor(along_col, ft.tdims.getNVX(), ft.nrows(),
 			  ft.tdims.calcUnfoldMaxOffset(), ft.dimen()),
 	  tdims(ft.tdims)
 {
 	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
 		index ui(this, fi.getCoor());
 		copyColumn(ft, *fi, *ui);
 	}
 	unfoldData();
 }
@ This makes a folded slice from the sparse tensor and unfolds it.
@<|UGSTensor| slicing from |FSSparseTensor|@>=
 UGSTensor::UGSTensor(const FSSparseTensor& t, const IntSequence& ss,
 					 const IntSequence& coor, const TensorDimens& td)
 	: UTensor(along_col, td.getNVX(), t.nrows(),
 			  td.calcUnfoldMaxOffset(), td.dimen()),
 	  tdims(td)
 {
 	if (ncols() == 0)
 		return;
 	FGSTensor ft(t, ss, coor, td);
 	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
 		index ui(this, fi.getCoor());
 		copyColumn(ft, *fi, *ui);
 	}
 	unfoldData();
 }
@ This makes a folded slice from dense and unfolds it. 
@<|UGSTensor| slicing from |UFSTensor|@>=
 UGSTensor::UGSTensor(const UFSTensor& t, const IntSequence& ss,
 					 const IntSequence& coor, const TensorDimens& td)
 	: UTensor(along_col, td.getNVX(), t.nrows(),
 			  td.calcUnfoldMaxOffset(), td.dimen()),
 	  tdims(td)
 {
 	FFSTensor folded(t);
 	FGSTensor ft(folded, ss, coor, td);
 	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
 		index ui(this, fi.getCoor());
 		copyColumn(ft, *fi, *ui);
 	}
 	unfoldData();
 }
@ Clear, just call |UTensor| static methods.
@<|UGSTensor| increment and decrement codes@>=
 void UGSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UGSTensor::increment");
 	UTensor::increment(v, tdims.getNVX());
 }
 void UGSTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UGSTensor::decrement");
 	UTensor::decrement(v, tdims.getNVX());
 }
@ Return a new instance of folded version.
@<|UGSTensor::fold| code@>=
 FTensor& UGSTensor::fold() const
 {
 	return *(new FGSTensor(*this));
 }
@ Return an offset of a given index.
@<|UGSTensor::getOffset| code@>=
 int UGSTensor::getOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in UGSTensor::getOffset");
 	return UTensor::getOffset(v, tdims.getNVX());
 }
@ Unfold all data. We go through all the columns and for each we
 obtain an index of the first equivalent, and copy the data.
@<|UGSTensor::unfoldData| code@>=
 void UGSTensor::unfoldData()
 {
 	for (index in = begin(); in != end(); ++in)
 		copyColumn(*(getFirstIndexOf(in)), *in);
 }
@ Here we return the first index which is equivalent in the symmetry
 to the given index. It is a matter of sorting all the symmetry
 partitions of the index.
@<|UGSTensor::getFirstIndexOf| code@>=
 Tensor::index UGSTensor::getFirstIndexOf(const index& in) const
 {
 	IntSequence v(in.getCoor());
 	int last = 0;
 	for (int i = 0; i < tdims.getSym().num(); i++) {
 		IntSequence vtmp(v, last, last+tdims.getSym()[i]);
 		vtmp.sort();
 		last += tdims.getSym()[i];
 	}
 	return index(this, v);
 }
@ Here is perfectly same code with the same semantics as in 
 |@<|FGSTensor::contractAndAdd| code@>|.
@<|UGSTensor::contractAndAdd| code@>=
 void UGSTensor::contractAndAdd(int i, UGSTensor& out,
 							   const URSingleTensor& col) const
 {
 	TL_RAISE_IF(i < 0 || i >= getSym().num(),
 				"Wrong index for UGSTensor::contractAndAdd");
 	TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
 				"Wrong dimensions for UGSTensor::contractAndAdd");
 	@<set |sym_left| and |sym_right| to symmetries around |i|@>;
 	int dleft = TensorDimens(sym_left, tdims.getNVS()).calcUnfoldMaxOffset();
 	int dright = TensorDimens(sym_right, tdims.getNVS()).calcUnfoldMaxOffset();
 	KronProdAll kp(3);
 	kp.setUnit(0, dleft);
 	kp.setMat(1, col);
 	kp.setUnit(2, dright);
 	UGSTensor tmp(out.nrows(), out.getDims());
 	kp.mult(*this, tmp);
 	out.add(1.0, tmp);
 }
@ End of {\tt gs\_tensor.cpp} file.
--- a/dynare++/tl/cc/gs_tensor.hh
+++ b/dynare++/tl/cc/gs_tensor.hh
@ -0,0 +1,274 @@
 // Copyright 2004, Ondra Kamenik
 // General symmetry tensor.
 /* Here we define tensors for general symmetry. All tensors from here are
   identifying the multidimensional index with columns. Thus all
   symmetries regard to columns. The general symmetry here is not the most
   general. It captures all symmetries of indices which are given by
   continuous partitioning of indices. Two items are symmetric if they
   belong to the same group. The continuity implies that if two items
   belong to one group, then all items between them belong to that
   group. This continuous partitioning of indices is described by
   |Symmetry| class.
   The dimension of the tensors here are described (besides the symmetry)
   also by number of variables for each group. This is dealt in the class
   for tensor dimensions defined also here. */
 #ifndef GS_TENSOR_H
 #define GS_TENSOR_H
 #include "tensor.hh"
 #include "fs_tensor.hh"
 #include "symmetry.hh"
 #include "rfs_tensor.hh"
 class FGSTensor;
 class UGSTensor;
 class FSSparseTensor;
 /* This class encapsulates symmetry information for the general
   symmetry tensor. It maintains a vector of variable numbers |nvs|, and
   symmetry |sym|. For example, let the symmetry be $y^2u^3$, and
   variable numbers be 10 for $y$, and 5 for $u$. Then the |nvs| is
   $(10,5)$, and |sym| is $(2,3)$. Also it maintains |nvmax| unfolded |nvs| with
   respect to the symmetry, this is $(10,10,5,5,5)$.
   The constructors of |TensorDimens| are clear and pretty intuitive but
   the constructor which is used for slicing fully symmetric tensor. It
   constructs the dimensions from the partitioning of variables of fully
   symmetric tensor. Let the partitioning be, for instance, $(a,b,c,d)$,
   where $(n_a,n_b,n_c,n_d)$ are lengths of the partitions. Let one want
   to get a slice only of the part of the fully symmetric tensor
   corresponding to indices of the form $b^2d^3$. This corresponds to the
   symmetry $a^0b^2c^0d^3$. So, the dimension of the slice would be also
   $(n_a,n_b,n_c,n_d)$ for number of variables and $(0,2,0,3)$ for the
   symmetry. So we provide the constructor which takes sizes of
   partitions $(n_a,n_b,n_c,n_d)$ as |IntSequence|, and indices of picked
   partitions, in our case $(1,1,3,3,3)$, as |IntSequence|.
   The class is able to calculate number of offsets (columns or rows depending
   what matrix coordinate we describe) in unfolded and folded tensors
   with the given symmetry. */
 class TensorDimens
 {
 protected:
  IntSequence nvs;
  Symmetry sym;
  IntSequence nvmax;
 public:
  TensorDimens(const Symmetry &s, const IntSequence &nvars)
    : nvs(nvars), sym(s), nvmax(sym, nvs)
  {
  }
  TensorDimens(int nvar, int dimen)
    : nvs(1), sym(dimen), nvmax(dimen, nvar)
  {
    nvs[0] = nvar;
  }
  TensorDimens(const TensorDimens &td)
    : nvs(td.nvs), sym(td.sym), nvmax(td.nvmax)
  {
  }
  virtual ~TensorDimens()
  {
  }
  TensorDimens(const IntSequence &ss, const IntSequence &coor);
  const TensorDimens &
  operator=(const TensorDimens &td)
  {
    nvs = td.nvs; sym = td.sym; nvmax = td.nvmax; return *this;
  }
  bool
  operator==(const TensorDimens &td) const
  {
    return nvs == td.nvs && sym == td.sym;
  }
  bool
  operator!=(const TensorDimens &td) const
  {
    return !operator==(td);
  }
  int
  dimen() const
  {
    return sym.dimen();
  }
  int
  getNVX(int i) const
  {
    return nvmax[i];
  }
  const IntSequence &
  getNVS() const
  {
    return nvs;
  }
  const IntSequence &
  getNVX() const
  {
    return nvmax;
  }
  const Symmetry &
  getSym() const
  {
    return sym;
  }
  int calcUnfoldMaxOffset() const;
  int calcFoldMaxOffset() const;
  int calcFoldOffset(const IntSequence &v) const;
  void decrement(IntSequence &v) const;
 };
 /* Here is a class for folded general symmetry tensor. It only contains
   tensor dimensions, it defines types for indices, implement virtual
   methods of super class |FTensor|.
   We add a method |contractAndAdd| which performs a contraction of one
   variable in the tensor. This is, for instance
   $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
   \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
   \left[c\right]^{\beta_1\ldots\beta_j}
   $$
   Also we add |getOffset| which should be used with care. */
 class GSSparseTensor;
 class FGSTensor : public FTensor
 {
  friend class UGSTensor;
  const TensorDimens tdims;
 public:
  /* These are standard constructors followed by two slicing. The first
     constructs a slice from the sparse, the second from the dense (both
     fully symmetric). Next constructor is just a conversion from
     |GSSParseTensor|. The last constructor allows for in-place conversion
     from |FFSTensor| to |FGSTensor|. */
  FGSTensor(int r, const TensorDimens &td)
    : FTensor(along_col, td.getNVX(), r,
              td.calcFoldMaxOffset(), td.dimen()), tdims(td)
  {
  }
  FGSTensor(const FGSTensor &ft)
    : FTensor(ft), tdims(ft.tdims)
  {
  }
  FGSTensor(const UGSTensor &ut);
  FGSTensor(int first_row, int num, FGSTensor &t)
    : FTensor(first_row, num, t), tdims(t.tdims)
  {
  }
  FGSTensor(const FSSparseTensor &t, const IntSequence &ss,
            const IntSequence &coor, const TensorDimens &td);
  FGSTensor(const FFSTensor &t, const IntSequence &ss,
            const IntSequence &coor, const TensorDimens &td);
  FGSTensor(const GSSparseTensor &sp);
  FGSTensor(FFSTensor &t)
    : FTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())
  {
  }
  virtual ~FGSTensor()
  {
  }
  void increment(IntSequence &v) const;
  void
  decrement(IntSequence &v) const
  {
    tdims.decrement(v);
  }
  UTensor&unfold() const;
  const TensorDimens &
  getDims() const
  {
    return tdims;
  }
  const Symmetry &
  getSym() const
  {
    return getDims().getSym();
  }
  void contractAndAdd(int i, FGSTensor &out,
                      const FRSingleTensor &col) const;
  int
  getOffset(const IntSequence &v) const
  {
    return tdims.calcFoldOffset(v);
  }
 };
 /* Besides similar things that has |FGSTensor|, we have here also
   method |unfoldData|, and helper method |getFirstIndexOf|
   which corresponds to sorting coordinates in fully symmetric case (here
   the action is more complicated, so we put it to the method). */
 class UGSTensor : public UTensor
 {
  friend class FGSTensor;
  const TensorDimens tdims;
 public:
  /* These are standard constructors. The last two constructors are
     slicing. The first makes a slice from fully symmetric sparse, the
     second from fully symmetric dense unfolded tensor. The last
     constructor allows for in-place conversion from |UFSTensor| to
     |UGSTensor|. */
  UGSTensor(int r, const TensorDimens &td)
    : UTensor(along_col, td.getNVX(), r,
              td.calcUnfoldMaxOffset(), td.dimen()), tdims(td)
  {
  }
  UGSTensor(const UGSTensor &ut)
    : UTensor(ut), tdims(ut.tdims)
  {
  }
  UGSTensor(const FGSTensor &ft);
  UGSTensor(int first_row, int num, UGSTensor &t)
    : UTensor(first_row,  num, t), tdims(t.tdims)
  {
  }
  UGSTensor(const FSSparseTensor &t, const IntSequence &ss,
            const IntSequence &coor, const TensorDimens &td);
  UGSTensor(const UFSTensor &t, const IntSequence &ss,
            const IntSequence &coor, const TensorDimens &td);
  UGSTensor(UFSTensor &t)
    : UTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())
  {
  }
  virtual ~UGSTensor()
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  FTensor&fold() const;
  const TensorDimens &
  getDims() const
  {
    return tdims;
  }
  const Symmetry &
  getSym() const
  {
    return getDims().getSym();
  }
  void contractAndAdd(int i, UGSTensor &out,
                      const URSingleTensor &col) const;
  int getOffset(const IntSequence &v) const;
 private:
  void unfoldData();
 public:
  index getFirstIndexOf(const index &in) const;
 };
 #endif
--- a/dynare++/tl/cc/gs_tensor.hweb
+++ b/dynare++/tl/cc/gs_tensor.hweb
@ -1,222 +0,0 @@
@q $Id: gs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 General symmetry tensor. Start of {\tt gs\_tensor.h} file.
 Here we define tensors for general symmetry. All tensors from here are
 identifying the multidimensional index with columns. Thus all
 symmetries regard to columns. The general symmetry here is not the most
 general. It captures all symmetries of indices which are given by
 continuous partitioning of indices. Two items are symmetric if they
 belong to the same group. The continuity implies that if two items
 belong to one group, then all items between them belong to that
 group. This continuous partitioning of indices is described by
 |Symmetry| class.
 The dimension of the tensors here are described (besides the symmetry)
 also by number of variables for each group. This is dealt in the class
 for tensor dimensions defined also here.
@c
 #ifndef GS_TENSOR_H
 #define GS_TENSOR_H
 #include "tensor.h"
 #include "fs_tensor.h"
 #include "symmetry.h"
 #include "rfs_tensor.h"
 class FGSTensor;
 class UGSTensor;
 class FSSparseTensor;
@<|TensorDimens| class declaration@>;
@<|FGSTensor| class declaration@>;
@<|UGSTensor| class declaration@>;
 #endif
@ This class encapsulates symmetry information for the general
 symmetry tensor. It maintains a vector of variable numbers |nvs|, and
 symmetry |sym|. For example, let the symmetry be $y^2u^3$, and
 variable numbers be 10 for $y$, and 5 for $u$. Then the |nvs| is
 $(10,5)$, and |sym| is $(2,3)$. Also it maintains |nvmax| unfolded |nvs| with
 respect to the symmetry, this is $(10,10,5,5,5)$.
 The constructors of |TensorDimens| are clear and pretty intuitive but
 the constructor which is used for slicing fully symmetric tensor. It
 constructs the dimensions from the partitioning of variables of fully
 symmetric tensor. Let the partitioning be, for instance, $(a,b,c,d)$,
 where $(n_a,n_b,n_c,n_d)$ are lengths of the partitions. Let one want
 to get a slice only of the part of the fully symmetric tensor
 corresponding to indices of the form $b^2d^3$. This corresponds to the
 symmetry $a^0b^2c^0d^3$. So, the dimension of the slice would be also
 $(n_a,n_b,n_c,n_d)$ for number of variables and $(0,2,0,3)$ for the
 symmetry. So we provide the constructor which takes sizes of
 partitions $(n_a,n_b,n_c,n_d)$ as |IntSequence|, and indices of picked
 partitions, in our case $(1,1,3,3,3)$, as |IntSequence|.
 The class is able to calculate number of offsets (columns or rows depending
 what matrix coordinate we describe) in unfolded and folded tensors
 with the given symmetry.
@s TensorDimens int
@<|TensorDimens| class declaration@>=
 class TensorDimens {
 protected:@;
 	IntSequence nvs;
 	Symmetry sym;
 	IntSequence nvmax;
 public:@;
 	TensorDimens(const Symmetry& s, const IntSequence& nvars)
 		: nvs(nvars), sym(s), nvmax(sym, nvs)@+ {}
 	TensorDimens(int nvar, int dimen)
 		: nvs(1), sym(dimen), nvmax(dimen, nvar)
 		{@+ nvs[0] = nvar;@+}
 	TensorDimens(const TensorDimens& td)
 		: nvs(td.nvs), sym(td.sym), nvmax(td.nvmax)@+ {}
 	virtual ~TensorDimens()@+ {}
 	TensorDimens(const IntSequence& ss, const IntSequence& coor);
 	const TensorDimens& operator=(const TensorDimens& td)
 		{@+ nvs = td.nvs;@+ sym = td.sym;@+ nvmax = td.nvmax;@+ return *this;@+}
 	bool operator==(const TensorDimens& td) const
 		{@+ return nvs == td.nvs && sym == td.sym;@+}
 	bool operator!=(const TensorDimens& td) const
 		{@+ return !operator==(td);@+}
 	int dimen() const
 		{@+ return sym.dimen();@+}
 	int getNVX(int i) const
 		{@+ return nvmax[i];@+}
 	const IntSequence& getNVS() const
 		{ @+ return nvs;@+}
 	const IntSequence& getNVX() const
 		{@+ return nvmax;@+}
 	const Symmetry& getSym() const
 		{@+ return sym;@+}
 	int calcUnfoldMaxOffset() const;
 	int calcFoldMaxOffset() const;
 	int calcFoldOffset(const IntSequence& v) const;
 	void decrement(IntSequence& v) const; 
 };
@ Here is a class for folded general symmetry tensor. It only contains
 tensor dimensions, it defines types for indices, implement virtual
 methods of super class |FTensor|.
 We add a method |contractAndAdd| which performs a contraction of one
 variable in the tensor. This is, for instance
 $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
 \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
 \left[c\right]^{\beta_1\ldots\beta_j}
 $$
 Also we add |getOffset| which should be used with care.
@<|FGSTensor| class declaration@>=
 class GSSparseTensor;
 class FGSTensor : public FTensor {
 	friend class UGSTensor;
 	const TensorDimens tdims;
 public:@;
 	@<|FGSTensor| constructor declarations@>;
 	virtual ~FGSTensor()@+ {}
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const
 		{@+ tdims.decrement(v);@+}
 	UTensor& unfold() const;
 	const TensorDimens& getDims() const
 		{@+ return tdims;@+}
 	const Symmetry& getSym() const
 		{@+ return getDims().getSym();@+}
 	void contractAndAdd(int i, FGSTensor& out,
 						const FRSingleTensor& col) const;
 	int getOffset(const IntSequence& v) const
 		{@+ return tdims.calcFoldOffset(v);@+}
 };
@ These are standard constructors followed by two slicing. The first
 constructs a slice from the sparse, the second from the dense (both
 fully symmetric). Next constructor is just a conversion from
 |GSSParseTensor|. The last constructor allows for in-place conversion
 from |FFSTensor| to |FGSTensor|.
@<|FGSTensor| constructor declarations@>=
 	FGSTensor(int r, const TensorDimens& td)
 		: FTensor(along_col, td.getNVX(), r,
 				  td.calcFoldMaxOffset(), td.dimen()), tdims(td)@+ {}
 	FGSTensor(const FGSTensor& ft)
 		: FTensor(ft), tdims(ft.tdims)@+ {}
 	FGSTensor(const UGSTensor& ut);
 	FGSTensor(int first_row, int num, FGSTensor& t)
 		: FTensor(first_row, num, t), tdims(t.tdims)@+ {}
 	FGSTensor(const FSSparseTensor& t, const IntSequence& ss,
 			  const IntSequence& coor, const TensorDimens& td);
 	FGSTensor(const FFSTensor& t, const IntSequence& ss,
 			  const IntSequence& coor, const TensorDimens& td);
 	FGSTensor(const GSSparseTensor& sp);
 	FGSTensor(FFSTensor& t)
 		: FTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())@+ {}
@ Besides similar things that has |FGSTensor|, we have here also
 method |unfoldData|, and helper method |getFirstIndexOf|
 which corresponds to sorting coordinates in fully symmetric case (here
 the action is more complicated, so we put it to the method).
@<|UGSTensor| class declaration@>=
 class UGSTensor : public UTensor {
 	friend class FGSTensor;
 	const TensorDimens tdims;
 public:@;
 	@<|UGSTensor| constructor declarations@>;
 	virtual ~UGSTensor()@+ {}
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	FTensor& fold() const;
 	const TensorDimens& getDims() const
 		{@+ return tdims;@+}
 	const Symmetry& getSym() const
 		{@+ return getDims().getSym();@+}
 	void contractAndAdd(int i, UGSTensor& out,
 						const URSingleTensor& col) const;
 	int getOffset(const IntSequence& v) const;
 private:@;
 	void unfoldData();
 public:@;
 	index getFirstIndexOf(const index& in) const;
 };
@ These are standard constructors. The last two constructors are
 slicing. The first makes a slice from fully symmetric sparse, the
 second from fully symmetric dense unfolded tensor. The last
 constructor allows for in-place conversion from |UFSTensor| to
 |UGSTensor|.
@<|UGSTensor| constructor declarations@>=
 	UGSTensor(int r, const TensorDimens& td)
 		: UTensor(along_col, td.getNVX(), r,
 				  td.calcUnfoldMaxOffset(), td.dimen()), tdims(td)@+ {}
 	UGSTensor(const UGSTensor& ut)
 		: UTensor(ut), tdims(ut.tdims)@+ {}
 	UGSTensor(const FGSTensor& ft);
 	UGSTensor(int first_row, int num, UGSTensor& t)
 		: UTensor(first_row,  num, t), tdims(t.tdims)@+ {}
 	UGSTensor(const FSSparseTensor& t, const IntSequence& ss,
 			  const IntSequence& coor, const TensorDimens& td);
 	UGSTensor(const UFSTensor& t, const IntSequence& ss,
 			  const IntSequence& coor, const TensorDimens& td);
 	UGSTensor(UFSTensor& t)
 		: UTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())@+ {}
@ End of {\tt gs\_tensor.h} file.
--- a/dynare++/tl/cc/int_sequence.cc
+++ b/dynare++/tl/cc/int_sequence.cc
@ -0,0 +1,312 @@
 // Copyright 2004, Ondra Kamenik
 #include "int_sequence.hh"
 #include "symmetry.hh"
 #include "tl_exception.hh"
 #include <cstdio>
 #include <climits>
 /* This unfolds a given integer sequence with respect to the given
   symmetry. If for example the symmetry is $(2,3)$, and the sequence is
   $(a,b)$, then the result is $(a,a,b,b,b)$. */
 IntSequence::IntSequence(const Symmetry &sy, const IntSequence &se)
  : data(new int[sy.dimen()]), length(sy.dimen()), destroy(true)
 {
  int k = 0;
  for (int i = 0; i < sy.num(); i++)
    for (int j = 0; j < sy[i]; j++, k++)
      operator[](k) = se[i];
 }
 /* This constructs an implied symmetry (implemented as |IntSequence|
   from a more general symmetry and equivalence class (implemented as
   |vector<int>|). For example, let the general symmetry be $y^3u^2$ and
   the equivalence class is $\{0,4\}$ picking up first and fifth
   variable, we calculate symmetry (at this point only |IntSequence|)
   corresponding to the picked variables. These are $yu$. Thus the
   constructed sequence must be $(1,1)$, meaning that we picked one $y$
   and one $u$. */
 IntSequence::IntSequence(const Symmetry &sy, const vector<int> &se)
  : data(new int[sy.num()]), length(sy.num()), destroy(true)
 {
  TL_RAISE_IF(sy.dimen() <= se[se.size()-1],
              "Sequence is not reachable by symmetry in IntSequence()");
  for (int i = 0; i < length; i++)
    operator[](i) = 0;
  for (unsigned int i = 0; i < se.size(); i++)
    operator[](sy.findClass(se[i]))++;
 }
 /* This constructs an ordered integer sequence from the given ordered
   sequence inserting the given number to the sequence. */
 IntSequence::IntSequence(int i, const IntSequence &s)
  : data(new int[s.size()+1]), length(s.size()+1), destroy(true)
 {
  int j = 0;
  while (j < s.size() && s[j] < i)
    j++;
  for (int jj = 0; jj < j; jj++)
    operator[](jj) = s[jj];
  operator[](j) = i;
  for (int jj = j; jj < s.size(); jj++)
    operator[](jj+1) = s[jj];
 }
 IntSequence::IntSequence(int i, const IntSequence &s, int pos)
  : data(new int[s.size()+1]), length(s.size()+1), destroy(true)
 {
  TL_RAISE_IF(pos < 0 || pos > s.size(),
              "Wrong position for insertion IntSequence constructor");
  for (int jj = 0; jj < pos; jj++)
    operator[](jj) = s[jj];
  operator[](pos) = i;
  for (int jj = pos; jj < s.size(); jj++)
    operator[](jj+1) = s[jj];
 }
 const IntSequence &
 IntSequence::operator=(const IntSequence &s)
 {
  TL_RAISE_IF(!destroy && length != s.length,
              "Wrong length for in-place IntSequence::operator=");
  if (destroy && length != s.length)
    {
      delete [] data;
      data = new int[s.length];
      destroy = true;
      length = s.length;
    }
  memcpy(data, s.data, sizeof(int)*length);
  return *this;
 }
 bool
 IntSequence::operator==(const IntSequence &s) const
 {
  if (size() != s.size())
    return false;
  int i = 0;
  while (i < size() && operator[](i) == s[i])
    i++;
  return i == size();
 }
 /* We need some linear irreflexive ordering, we implement it as
   lexicographic ordering without identity. */
 bool
 IntSequence::operator<(const IntSequence &s) const
 {
  int len = min(size(), s.size());
  int i = 0;
  while (i < len && operator[](i) == s[i])
    i++;
  return (i < s.size() && (i == size() || operator[](i) < s[i]));
 }
 bool
 IntSequence::lessEq(const IntSequence &s) const
 {
  TL_RAISE_IF(size() != s.size(),
              "Sequence with different lengths in IntSequence::lessEq");
  int i = 0;
  while (i < size() && operator[](i) <= s[i])
    i++;
  return (i == size());
 }
 bool
 IntSequence::less(const IntSequence &s) const
 {
  TL_RAISE_IF(size() != s.size(),
              "Sequence with different lengths in IntSequence::less");
  int i = 0;
  while (i < size() && operator[](i) < s[i])
    i++;
  return (i == size());
 }
 /* This is a bubble sort, all sequences are usually very short, so this
   sin might be forgiven. */
 void
 IntSequence::sort()
 {
  for (int i = 0; i < length; i++)
    {
      int swaps = 0;
      for (int j = 0; j < length-1; j++)
        {
          if (data[j] > data[j+1])
            {
              int s = data[j+1];
              data[j+1] = data[j];
              data[j] = s;
              swaps++;
            }
        }
      if (swaps == 0)
        return;
    }
 }
 /* Here we monotonize the sequence. If an item is less then its
   predecessor, it is equalized. */
 void
 IntSequence::monotone()
 {
  for (int i = 1; i < length; i++)
    if (data[i-1] > data[i])
      data[i] = data[i-1];
 }
 /* This partially monotones the sequence. The partitioning is done by a
   symmetry. So the subsequence given by the symmetry classes are
   monotonized. For example, if the symmetry is $y^2u^3$, and the
   |IntSequence| is $(5,3,1,6,4)$, the result is $(5,5,1,6,6)$. */
 void
 IntSequence::pmonotone(const Symmetry &s)
 {
  int cum = 0;
  for (int i = 0; i < s.num(); i++)
    {
      for (int j = cum + 1; j < cum + s[i]; j++)
        if (data[j-1] > data[j])
          data[j] = data[j-1];
      cum += s[i];
    }
 }
 /* This returns sum of all elements. Useful for symmetries. */
 int
 IntSequence::sum() const
 {
  int res = 0;
  for (int i = 0; i < length; i++)
    res += operator[](i);
  return res;
 }
 /* This returns product of subsequent items. Useful for Kronecker product
   dimensions. */
 int
 IntSequence::mult(int i1, int i2) const
 {
  int res = 1;
  for (int i = i1; i < i2; i++)
    res *= operator[](i);
  return res;
 }
 /* Return a number of the same items in the beginning of the sequence. */
 int
 IntSequence::getPrefixLength() const
 {
  int i = 0;
  while (i+1 < size() && operator[](i+1) == operator[](0))
    i++;
  return i+1;
 }
 /* This returns a number of distinct items in the sequence. It supposes
   that the sequence is ordered. For the empty sequence it returns zero. */
 int
 IntSequence::getNumDistinct() const
 {
  int res = 0;
  if (size() > 0)
    res++;
  for (int i = 1; i < size(); i++)
    if (operator[](i) != operator[](i-1))
      res++;
  return res;
 }
 /* This returns a maximum of the sequence. If the sequence is empty, it
   returns the least possible |int| value. */
 int
 IntSequence::getMax() const
 {
  int res = INT_MIN;
  for (int i = 0; i < size(); i++)
    if (operator[](i) > res)
      res = operator[](i);
  return res;
 }
 void
 IntSequence::add(int i)
 {
  for (int j = 0; j < size(); j++)
    operator[](j) += i;
 }
 void
 IntSequence::add(int f, const IntSequence &s)
 {
  TL_RAISE_IF(size() != s.size(),
              "Wrong sequence length in IntSequence::add");
  for (int j = 0; j < size(); j++)
    operator[](j) += f*s[j];
 }
 bool
 IntSequence::isPositive() const
 {
  int i = 0;
  while (i < size() && operator[](i) >= 0)
    i++;
  return (i == size());
 }
 bool
 IntSequence::isConstant() const
 {
  bool res = true;
  int i = 1;
  while (res && i < size())
    {
      res = res && operator[](0) == operator[](i);
      i++;
    }
  return res;
 }
 bool
 IntSequence::isSorted() const
 {
  bool res = true;
  int i = 1;
  while (res && i < size())
    {
      res = res && operator[](i-1) <= operator[](i);
      i++;
    }
  return res;
 }
 /* Debug print. */
 void
 IntSequence::print() const
 {
  printf("[");
  for (int i = 0; i < size(); i++)
    printf("%2d ", operator[](i));
  printf("]\n");
 }
--- a/dynare++/tl/cc/int_sequence.cweb
+++ b/dynare++/tl/cc/int_sequence.cweb
@ -1,351 +0,0 @@
@q $Id: int_sequence.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt int\_sequence.cpp} file.
@c
 #include "int_sequence.h"
 #include "symmetry.h"
 #include "tl_exception.h"
 #include <cstdio>
 #include <climits>
@<|IntSequence| constructor code 1@>;
@<|IntSequence| constructor code 2@>;
@<|IntSequence| constructor code 3@>;
@<|IntSequence| constructor code 4@>;
@<|IntSequence::operator=| code@>;
@<|IntSequence::operator==| code@>;
@<|IntSequence::operator<| code@>;
@<|IntSequence::lessEq| code@>;
@<|IntSequence::less| code@>;
@<|IntSequence::sort| code@>;
@<|IntSequence::monotone| code@>;
@<|IntSequence::pmonotone| code@>;
@<|IntSequence::sum| code@>;
@<|IntSequence::mult| code@>;
@<|IntSequence::getPrefixLength| code@>;
@<|IntSequence::getNumDistinct| code@>;
@<|IntSequence::getMax| code@>;
@<|IntSequence::add| code 1@>;
@<|IntSequence::add| code 2@>;
@<|IntSequence::isPositive| code@>;
@<|IntSequence::isConstant| code@>;
@<|IntSequence::isSorted| code@>;
@<|IntSequence::print| code@>;
@ This unfolds a given integer sequence with respect to the given
 symmetry. If for example the symmetry is $(2,3)$, and the sequence is
 $(a,b)$, then the result is $(a,a,b,b,b)$.
@<|IntSequence| constructor code 1@>=
 IntSequence::IntSequence(const Symmetry& sy, const IntSequence& se)
 	: data(new int[sy.dimen()]), length(sy.dimen()), destroy(true)
 {
 	int k = 0;
 	for (int i = 0; i < sy.num(); i++)
 		for (int j = 0;	 j < sy[i]; j++, k++)
 			operator[](k) = se[i];
 }
@ This constructs an implied symmetry (implemented as |IntSequence|
 from a more general symmetry and equivalence class (implemented as
 |vector<int>|). For example, let the general symmetry be $y^3u^2$ and
 the equivalence class is $\{0,4\}$ picking up first and fifth
 variable, we calculate symmetry (at this point only |IntSequence|)
 corresponding to the picked variables. These are $yu$. Thus the
 constructed sequence must be $(1,1)$, meaning that we picked one $y$
 and one $u$.
@<|IntSequence| constructor code 2@>=
 IntSequence::IntSequence(const Symmetry& sy, const vector<int>& se)
 	: data(new int[sy.num()]), length(sy.num()), destroy(true)
 {
 	TL_RAISE_IF(sy.dimen() <= se[se.size()-1],
 				"Sequence is not reachable by symmetry in IntSequence()");
 	for (int i = 0; i < length; i++) @/
 		operator[](i) = 0;
    for (unsigned int i = 0; i < se.size(); i++) @/
 		operator[](sy.findClass(se[i]))++;
 }
@ This constructs an ordered integer sequence from the given ordered
 sequence inserting the given number to the sequence.
@<|IntSequence| constructor code 3@>=
 IntSequence::IntSequence(int i, const IntSequence& s)
 	: data(new int[s.size()+1]), length(s.size()+1), destroy(true)
 {
 	int j = 0;
 	while (j < s.size() && s[j] < i)
 		j++;
 	for (int jj = 0; jj < j; jj++)
 		operator[](jj) = s[jj];
 	operator[](j) = i;
 	for (int jj = j; jj < s.size(); jj++)
 		operator[](jj+1) = s[jj];
 }
@ 
@<|IntSequence| constructor code 4@>=
 IntSequence::IntSequence(int i, const IntSequence& s, int pos)
 	: data(new int[s.size()+1]), length(s.size()+1), destroy(true)
 {
 	TL_RAISE_IF(pos < 0 || pos > s.size(),
 				"Wrong position for insertion IntSequence constructor");
 	for (int jj = 0; jj < pos; jj++)
 		operator[](jj) = s[jj];
 	operator[](pos) = i;
 	for (int jj = pos; jj < s.size(); jj++)
 		operator[](jj+1) = s[jj];
 }
@ 
@<|IntSequence::operator=| code@>=
 const IntSequence& IntSequence::operator=(const IntSequence& s)
 {
 	 TL_RAISE_IF(!destroy && length != s.length,
 				 "Wrong length for in-place IntSequence::operator=");
 	 if (destroy && length != s.length) {
 		 delete [] data;
 		 data = new int[s.length];
 		 destroy = true;
 		 length = s.length;
 	 }
 	 memcpy(data, s.data, sizeof(int)*length);
 	 return *this;
 }
@ 
@<|IntSequence::operator==| code@>=
 bool IntSequence::operator==(const IntSequence& s) const
 {
 	if (size() != s.size())
 		return false;
 	int i = 0;
 	while (i < size() && operator[](i) == s[i])
 		i++;
 	return i == size();
 }
@ We need some linear irreflexive ordering, we implement it as
 lexicographic ordering without identity.
@<|IntSequence::operator<| code@>=
 bool IntSequence::operator<(const IntSequence& s) const
 {
 	int len = min(size(), s.size());
 	int i = 0;
 	while (i < len && operator[](i) == s[i])
 		i++;
 	return (i < s.size() && (i == size() || operator[](i) < s[i]));
 }
@ 
@<|IntSequence::lessEq| code@>=
 bool IntSequence::lessEq(const IntSequence& s) const
 {
 	TL_RAISE_IF(size() != s.size(),
 				"Sequence with different lengths in IntSequence::lessEq");
 	int i = 0;
 	while (i < size() && operator[](i) <= s[i])
 		i++;
 	return (i == size());
 }
@ 
@<|IntSequence::less| code@>=
 bool IntSequence::less(const IntSequence& s) const
 {
 	TL_RAISE_IF(size() != s.size(),
 				"Sequence with different lengths in IntSequence::less");
 	int i = 0;
 	while (i < size() && operator[](i) < s[i])
 		i++;
 	return (i == size());
 }
@ This is a bubble sort, all sequences are usually very short, so this
 sin might be forgiven.
@<|IntSequence::sort| code@>=
 void IntSequence::sort()
 {
 	for (int i = 0; i < length; i++) {
 		int swaps = 0;
 		for (int j = 0; j < length-1; j++) {
 			if (data[j] > data[j+1]) {
 				int s = data[j+1];
 				data[j+1] = data[j];
 				data[j] = s;
 				swaps++;
 			}
 		}
 		if (swaps == 0)
 			return;
 	}
 }
@ Here we monotonize the sequence. If an item is less then its
 predecessor, it is equalized.
@<|IntSequence::monotone| code@>=
 void IntSequence::monotone()
 {
 	for (int i = 1; i < length; i++)
 		if (data[i-1] > data[i])@/
 			data[i] = data[i-1];
 }
@ This partially monotones the sequence. The partitioning is done by a
 symmetry. So the subsequence given by the symmetry classes are
 monotonized. For example, if the symmetry is $y^2u^3$, and the
 |IntSequence| is $(5,3,1,6,4)$, the result is $(5,5,1,6,6)$.
@<|IntSequence::pmonotone| code@>=
 void IntSequence::pmonotone(const Symmetry& s)
 {
 	int cum = 0;
 	for (int i = 0; i < s.num(); i++) {
 		for (int j = cum + 1; j < cum + s[i]; j++)
 			if (data[j-1] > data[j])@/
 				data[j] = data[j-1];
 		cum += s[i];
 	}
 }
@ This returns sum of all elements. Useful for symmetries.
@<|IntSequence::sum| code@>=
 int IntSequence::sum() const
 {
 	int res = 0;
 	for (int i = 0; i < length; i++) @/
 		res += operator[](i);
 	return res;
 }
@ This returns product of subsequent items. Useful for Kronecker product
 dimensions.
@<|IntSequence::mult| code@>=
 int IntSequence::mult(int i1, int i2) const
 {
 	int res = 1;
 	for (int i = i1; i < i2; i++)@/
 		res *= operator[](i);
 	return res;
 }
@ Return a number of the same items in the beginning of the sequence.
@<|IntSequence::getPrefixLength| code@>=
 int IntSequence::getPrefixLength() const
 {
 	int i = 0;
 	while (i+1 < size() && operator[](i+1) == operator[](0))
 		i++;
 	return i+1;
 }
@ This returns a number of distinct items in the sequence. It supposes
 that the sequence is ordered. For the empty sequence it returns zero.
@<|IntSequence::getNumDistinct| code@>=
 int IntSequence::getNumDistinct() const
 {
 	int res = 0;
 	if (size() > 0)
 		res++;
 	for (int i = 1; i < size(); i++)
 		if (operator[](i) != operator[](i-1))
 			res++;
 	return res;
 }
@ This returns a maximum of the sequence. If the sequence is empty, it
 returns the least possible |int| value.
@<|IntSequence::getMax| code@>=
 int IntSequence::getMax() const
 {
 	int res = INT_MIN;
 	for (int i = 0; i < size(); i++)
 		if (operator[](i) > res)
 			res = operator[](i);
 	return res;
 }
@ 
@<|IntSequence::add| code 1@>=
 void IntSequence::add(int i)
 {
 	for (int j = 0; j < size(); j++)
 		operator[](j) += i;
 }
@ 
@<|IntSequence::add| code 2@>=
 void IntSequence::add(int f, const IntSequence& s)
 {
 	TL_RAISE_IF(size() != s.size(),
 				"Wrong sequence length in IntSequence::add");
 	for (int j = 0; j < size(); j++)
 		operator[](j) += f*s[j];
 }
@ 
@<|IntSequence::isPositive| code@>=
 bool IntSequence::isPositive() const
 {
 	int i = 0;
 	while (i < size() && operator[](i) >= 0)
 		i++;
 	return (i == size());
 }
@ 
@<|IntSequence::isConstant| code@>=
 bool IntSequence::isConstant() const
 {
 	bool res = true;
 	int i = 1;
 	while (res && i < size()) {
 		res = res && operator[](0) == operator[](i);
 		i++;
 	}
 	return res;
 }
@ 
@<|IntSequence::isSorted| code@>=
 bool IntSequence::isSorted() const
 {
 	bool res = true;
 	int i = 1;
 	while (res && i < size()) {
 		res = res && operator[](i-1) <= operator[](i);
 		i++;
 	}
 	return res;
 }
@ Debug print.
@<|IntSequence::print| code@>=
 void IntSequence::print() const
 {
 	printf("[");
 	for (int i = 0; i < size(); i++)@/
 		printf("%2d ",operator[](i));
 	printf("]\n");
 }
@ End of {\tt int\_sequence.cpp} file.
--- a/dynare++/tl/cc/int_sequence.hh
+++ b/dynare++/tl/cc/int_sequence.hh
@ -0,0 +1,148 @@
 // Copyright 2004, Ondra Kamenik
 // Integer sequence.
 /* Here we define an auxiliary abstraction for a sequence of integers. The
   basic functionality is to hold an ordered sequence of integers with
   constant length. We prefer using this simple class before STL
   |vector<int>| since it is more efficient for our purposes.
   The class is used in index of a tensor, in symmetry definition, in
   Kronecker product dimensions, or as a class of an equivalence. The
   latter case is not ordered, but we always order equivalence classes in
   order to ensure unique representativeness. For almost all cases we
   need the integer sequence to be ordered (sort), or monotonize (indices
   of folded tensors), or partially monotonize (indices of folded tensors
   not fully symmetric), or calculate a product of all members or only of
   a part (used in Kronecker product dimensions). When we calculate
   offsets in folded tensors, we need to obtain a number of the same
   items in the front (|getPrefixLength|), and also to add some integer
   number to all items.
   Also, we need to construct a subsequence of a sequence, so
   some instances do destroy the underlying data, and some not. */
 #ifndef INT_SEQUENCE_H
 #define INT_SEQUENCE_H
 #include <cstring>
 #include <vector>
 using namespace std;
 /* The implementation of |IntSequence| is straightforward. It has a
   pointer |data|, a |length| of the data, and a flag |destroy|, whether
   the instance must destroy the underlying data. */
 class Symmetry;
 class IntSequence
 {
  int *data;
  int length;
  bool destroy;
 public:
  /* We have a constructor allocating a given length of data, constructor
     allocating and then initializing all members to a given number, a copy
     constructor, a conversion from |vector<int>|, a subsequence
     constructor, a constructor used for calculating implied symmetry from
     a more general symmetry and one equivalence class (see |Symmetry|
     class). Finally we have a constructor which unfolds a sequence with
     respect to a given symmetry and constructor which inserts a given
     number to the ordered sequence or given number to a given position. */
  IntSequence(int l)
    : data(new int[l]), length(l), destroy(true)
  {
  }
  IntSequence(int l, int n)
    :  data(new int[l]), length(l), destroy(true)
  {
    for (int i = 0; i < length; i++)
      data[i] = n;
  }
  IntSequence(const IntSequence &s)
    : data(new int[s.length]), length(s.length), destroy(true)
  {
    memcpy(data, s.data, length*sizeof(int));
  }
  IntSequence(IntSequence &s, int i1, int i2)
    : data(s.data+i1), length(i2-i1), destroy(false)
  {
  }
  IntSequence(const IntSequence &s, int i1, int i2)
    : data(new int[i2-i1]), length(i2-i1), destroy(true)
  {
    memcpy(data, s.data+i1, sizeof(int)*length);
  }
  IntSequence(const Symmetry &sy, const vector<int> &se);
  IntSequence(const Symmetry &sy, const IntSequence &se);
  IntSequence(int i, const IntSequence &s);
  IntSequence(int i, const IntSequence &s, int pos);
  IntSequence(int l, const int *d)
    : data(new int[l]), length(l), destroy(true)
  {
    memcpy(data, d, sizeof(int)*length);
  }
  const IntSequence &operator=(const IntSequence &s);
  virtual ~IntSequence()
  {
    if (destroy)
      delete [] data;
  }
  bool operator==(const IntSequence &s) const;
  bool
  operator!=(const IntSequence &s) const
  {
    return !operator==(s);
  }
  int &
  operator[](int i)
  {
    return data[i];
  }
  int
  operator[](int i) const
  {
    return data[i];
  }
  int
  size() const
  {
    return length;
  }
  /* We provide two orderings. The first |operator<| is the linear
     lexicographic ordering, the second |less| is the non-linear Cartesian
     ordering. */
  bool operator<(const IntSequence &s) const;
  bool
  operator<=(const IntSequence &s) const
  {
    return (operator==(s) || operator<(s));
  }
  bool lessEq(const IntSequence &s) const;
  bool less(const IntSequence &s) const;
  void sort();
  void monotone();
  void pmonotone(const Symmetry &s);
  int sum() const;
  int mult(int i1, int i2) const;
  int
  mult() const
  {
    return mult(0, length);
  }
  void add(int i);
  void add(int f, const IntSequence &s);
  int getPrefixLength() const;
  int getNumDistinct() const;
  int getMax() const;
  bool isPositive() const;
  bool isConstant() const;
  bool isSorted() const;
  void print() const;
 };
 #endif
--- a/dynare++/tl/cc/int_sequence.hweb
+++ b/dynare++/tl/cc/int_sequence.hweb
@ -1,132 +0,0 @@
@q $Id: int_sequence.hweb 758 2006-05-22 08:31:18Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Integer sequence. Start of {\tt int\_sequence.h} file.
 Here we define an auxiliary abstraction for a sequence of integers. The
 basic functionality is to hold an ordered sequence of integers with
 constant length. We prefer using this simple class before STL
 |vector<int>| since it is more efficient for our purposes.
 The class is used in index of a tensor, in symmetry definition, in
 Kronecker product dimensions, or as a class of an equivalence. The
 latter case is not ordered, but we always order equivalence classes in
 order to ensure unique representativeness. For almost all cases we
 need the integer sequence to be ordered (sort), or monotonize (indices
 of folded tensors), or partially monotonize (indices of folded tensors
 not fully symmetric), or calculate a product of all members or only of
 a part (used in Kronecker product dimensions). When we calculate
 offsets in folded tensors, we need to obtain a number of the same
 items in the front (|getPrefixLength|), and also to add some integer
 number to all items.
 Also, we need to construct a subsequence of a sequence, so
 some instances do destroy the underlying data, and some not.
@s IntSequence int
@s Symmetry int
@c
 #ifndef INT_SEQUENCE_H
 #define INT_SEQUENCE_H
 #include <cstring>
 #include <vector>
 using namespace std;
@<|IntSequence| class declaration@>;
 #endif
@ The implementation of |IntSequence| is straightforward. It has a
 pointer |data|, a |length| of the data, and a flag |destroy|, whether
 the instance must destroy the underlying data.
@<|IntSequence| class declaration@>=
 class Symmetry;
 class IntSequence {
 	int* data;
 	int length;
 	bool destroy;
 public:@/
 	@<|IntSequence| constructors@>;
 	@<|IntSequence| inlines and operators@>;
 	@<|IntSequence| orderings@>;
 	void sort();
 	void monotone();
 	void pmonotone(const Symmetry& s);
 	int sum() const;
 	int mult(int i1, int i2) const;
 	int mult() const
 		{@+return mult(0, length);@+}
 	void add(int i);
 	void add(int f, const IntSequence& s); 
 	int getPrefixLength() const;
 	int getNumDistinct() const;
 	int getMax() const;
 	bool isPositive() const;
 	bool isConstant() const;
 	bool isSorted() const;
 	void print() const;
 };
@ We have a constructor allocating a given length of data, constructor
 allocating and then initializing all members to a given number, a copy
 constructor, a conversion from |vector<int>|, a subsequence
 constructor, a constructor used for calculating implied symmetry from
 a more general symmetry and one equivalence class (see |Symmetry|
 class). Finally we have a constructor which unfolds a sequence with
 respect to a given symmetry and constructor which inserts a given
 number to the ordered sequence or given number to a given position.
@<|IntSequence| constructors@>=
 	IntSequence(int l)
 		: data(new int[l]), length(l), destroy(true)@+ {}	
 	IntSequence(int l, int n)
 		:  data(new int[l]), length(l), destroy(true)
 		{@+ for (int i = 0; i < length; i++) data[i] = n;@+}
 	IntSequence(const IntSequence& s)
 		: data(new int[s.length]), length(s.length), destroy(true)
 		{@+ memcpy(data, s.data, length*sizeof(int));@+}
 	IntSequence(IntSequence& s, int i1, int i2)
 		: data(s.data+i1), length(i2-i1), destroy(false)@+ {}
 	IntSequence(const IntSequence& s, int i1, int i2)
 		: data(new int[i2-i1]), length(i2-i1), destroy(true)
 		{@+ memcpy(data, s.data+i1, sizeof(int)*length);@+}
 	IntSequence(const Symmetry& sy, const vector<int>& se);
 	IntSequence(const Symmetry& sy, const IntSequence& se);
 	IntSequence(int i, const IntSequence& s);
 	IntSequence(int i, const IntSequence& s, int pos);
 	IntSequence(int l, const int* d)
 		: data(new int[l]), length(l), destroy(true)
 		{@+ memcpy(data, d, sizeof(int)*length);@+}
@ These are clear inlines and operators.
@<|IntSequence| inlines and operators@>=
    const IntSequence& operator=(const IntSequence& s);
    virtual ~IntSequence()
 		{@+ if (destroy) delete [] data;@+}
 	bool operator==(const IntSequence& s) const;
 	bool operator!=(const IntSequence& s) const
 		{@+ return ! operator==(s);@+}
 	int& operator[](int i)
 		{@+ return data[i];@+}
 	int operator[](int i) const
 		{@+ return data[i];@+}
 	int size() const
 		{@+ return length;@+}
@ We provide two orderings. The first |operator<| is the linear
 lexicographic ordering, the second |less| is the non-linear Cartesian
 ordering.
@<|IntSequence| orderings@>=
 	bool operator<(const IntSequence& s) const;
 	bool operator<=(const IntSequence& s) const
 		{@+ return (operator==(s) || operator<(s));@+}
 	bool lessEq(const IntSequence& s) const;
 	bool less(const IntSequence& s) const;
@ End of {\tt int\_sequence.h} file.
--- a/dynare++/tl/cc/kron_prod.cc
+++ b/dynare++/tl/cc/kron_prod.cc
@ -0,0 +1,430 @@
 // Copyright 2004, Ondra Kamenik
 #include "kron_prod.hh"
 #include "tl_exception.hh"
 #include <cstdio>
 /* Here we construct Kronecker product dimensions from Kronecker
   product dimensions by picking a given matrix and all other set to
   identity. The constructor takes dimensions of $A_1\otimes
   A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
   A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
   $i$. The identity matrices must fit into the described order. See
   header file.
   We first decide what is a length of the resulting dimensions. Possible
   length is three for $I\otimes A\otimes I$, and two for $I\otimes A$,
   or $A\otimes I$.
   Then we fork according to |i|. */
 KronProdDimens::KronProdDimens(const KronProdDimens &kd, int i)
  : rows((i == 0 || i == kd.dimen()-1) ? (2) : (3)),
    cols((i == 0 || i == kd.dimen()-1) ? (2) : (3))
 {
  TL_RAISE_IF(i < 0 || i >= kd.dimen(),
              "Wrong index for pickup in KronProdDimens constructor");
  int kdim = kd.dimen();
  if (i == 0)
    {
      // set AI dimensions
      /* The first rows and cols are taken from |kd|. The dimensions of
         identity matrix is a number of rows in $A_2\otimes\ldots\otimes A_n$
         since the matrix $A_1\otimes I$ is the first. */
      rows[0] = kd.rows[0];
      rows[1] = kd.rows.mult(1, kdim);
      cols[0] = kd.cols[0];
      cols[1] = rows[1];
    }
  else if (i == kdim-1)
    {
      // set IA dimensions
      /* The second dimension is taken from |kd|. The dimensions of identity
         matrix is a number of columns of $A_1\otimes\ldots A_{n-1}$, since the
         matrix $I\otimes A_n$ is the last. */
      rows[0] = kd.cols.mult(0, kdim-1);
      rows[1] = kd.rows[kdim-1];
      cols[0] = rows[0];
      cols[1] = kd.cols[kdim-1];
    }
  else
    {
      // set IAI dimensions
      /* The dimensions of the middle matrix are taken from |kd|. The
         dimensions of the first identity matrix are a number of columns of
         $A_1\otimes\ldots\otimes A_{i-1}$, and the dimensions of the last
         identity matrix are a number of rows of $A_{i+1}\otimes\ldots\otimes
         A_n$. */
      rows[0] = kd.cols.mult(0, i);
      cols[0] = rows[0];
      rows[1] = kd.rows[i];
      cols[1] = kd.cols[i];
      cols[2] = kd.rows.mult(i+1, kdim);
      rows[2] = cols[2];
    }
 }
 /* This raises an exception if dimensions are bad for multiplication
   |out = in*this|. */
 void
 KronProd::checkDimForMult(const ConstTwoDMatrix &in, const TwoDMatrix &out) const
 {
  int my_rows;
  int my_cols;
  kpd.getRC(my_rows, my_cols);
  TL_RAISE_IF(in.nrows() != out.nrows() || in.ncols() != my_rows,
              "Wrong dimensions for KronProd in KronProd::checkDimForMult");
 }
 /* Here we Kronecker multiply two given vectors |v1| and |v2| and
   store the result in preallocated |res|. */
 void
 KronProd::kronMult(const ConstVector &v1, const ConstVector &v2,
                   Vector &res)
 {
  TL_RAISE_IF(res.length() != v1.length()*v2.length(),
              "Wrong vector lengths in KronProd::kronMult");
  res.zeros();
  for (int i = 0; i < v1.length(); i++)
    {
      Vector sub(res, i *v2.length(), v2.length());
      sub.add(v1[i], v2);
    }
 }
 void
 KronProdAll::setMat(int i, const TwoDMatrix &m)
 {
  matlist[i] = &m;
  kpd.setRC(i, m.nrows(), m.ncols());
 }
 void
 KronProdAll::setUnit(int i, int n)
 {
  matlist[i] = NULL;
  kpd.setRC(i, n, n);
 }
 bool
 KronProdAll::isUnit() const
 {
  int i = 0;
  while (i < dimen() && matlist[i] == NULL)
    i++;
  return i == dimen();
 }
 /* Here we multiply $B\cdot(I\otimes A)$. If $m$ is a dimension of the
   identity matrix, then the product is equal to
   $B\cdot\hbox{diag}_m(A)$. If $B$ is partitioned accordingly, then the
   result is $[B_1A, B_2A,\ldots B_mA]$.
   Here, |outi| are partitions of |out|, |ini| are const partitions of
   |in|, and |id_cols| is $m$. We employ level-2 BLAS. */
 void
 KronProdIA::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
 {
  checkDimForMult(in, out);
  int id_cols = kpd.cols[0];
  ConstTwoDMatrix a(mat);
  for (int i = 0; i < id_cols; i++)
    {
      TwoDMatrix outi(out, i *a.ncols(), a.ncols());
      ConstTwoDMatrix ini(in, i *a.nrows(), a.nrows());
      outi.mult(ini, a);
    }
 }
 /* Here we construct |KronProdAI| from |KronProdIAI|. It is clear. */
 KronProdAI::KronProdAI(const KronProdIAI &kpiai)
  : KronProd(KronProdDimens(2)), mat(kpiai.mat)
 {
  kpd.rows[0] = mat.nrows();
  kpd.cols[0] = mat.ncols();
  kpd.rows[1] = kpiai.kpd.rows[2];
  kpd.cols[1] = kpiai.kpd.cols[2];
 }
 /* Here we multiply $B\cdot(A\otimes I)$. Let the dimension of the
   matrix $A$ be $m\times n$, the dimension of $I$ be $p$, and a number
   of rows of $B$ be $q$. We use the fact that $B\cdot(A\otimes
   I)=\hbox{reshape}(\hbox{reshape}(B, q, mp)\cdot A, q, np)$. This works
   only for matrix $B$, whose storage has leading dimension equal to
   number of rows.
   For cases where the leading dimension is not equal to the number of
   rows, we partition the matrix $A\otimes I$ to $m\times n$ square
   partitions $a_{ij}I$. Therefore, we partition $B$ to $m$ partitions
   $[B_1, B_2,\ldots,B_m]$. Each partition of $B$ has the same number of
   columns as the identity matrix. If $R$ denotes the resulting matrix,
   then it can be partitioned to $n$ partitions
   $[R_1,R_2,\ldots,R_n]$. Each partition of $R$ has the same number of
   columns as the identity matrix. Then we have $R_i=\sum a_{ji}B_j$.
   In code, |outi| is $R_i$, |ini| is $B_j$, and |id_cols| is a dimension
   of the identity matrix */
 void
 KronProdAI::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
 {
  checkDimForMult(in, out);
  int id_cols = kpd.cols[1];
  ConstTwoDMatrix a(mat);
  if (in.getLD() == in.nrows())
    {
      ConstTwoDMatrix in_resh(in.nrows()*id_cols, a.nrows(), in.getData().base());
      TwoDMatrix out_resh(in.nrows()*id_cols, a.ncols(), out.getData().base());
      out_resh.mult(in_resh, a);
    }
  else
    {
      out.zeros();
      for (int i = 0; i < a.ncols(); i++)
        {
          TwoDMatrix outi(out, i *id_cols, id_cols);
          for (int j = 0; j < a.nrows(); j++)
            {
              ConstTwoDMatrix ini(in, j *id_cols, id_cols);
              outi.add(a.get(j, i), ini);
            }
        }
    }
 }
 /* Here we multiply $B\cdot(I\otimes A\otimes I)$. If $n$ is a
   dimension of the first identity matrix, then we multiply
   $B\cdot\hbox{diag}_n(A\otimes I)$. So we partition $B$ and result $R$
   accordingly, and multiply $B_i\cdot(A\otimes I)$, which is in fact
   |KronProdAI::mult|. Note that number of columns of partitions of $B$
   are number of rows of $A\otimes I$, and number of columns of $R$ are
   number of columns of $A\otimes I$.
   In code, |id_cols| is $n$, |akronid| is a Kronecker product object of
   $A\otimes I$, and |in_bl_width|, and |out_bl_width| are rows and cols of
   $A\otimes I$. */
 void
 KronProdIAI::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
 {
  checkDimForMult(in, out);
  int id_cols = kpd.cols[0];
  KronProdAI akronid(*this);
  int in_bl_width;
  int out_bl_width;
  akronid.kpd.getRC(in_bl_width, out_bl_width);
  for (int i = 0; i < id_cols; i++)
    {
      TwoDMatrix outi(out, i *out_bl_width, out_bl_width);
      ConstTwoDMatrix ini(in, i *in_bl_width, in_bl_width);
      akronid.mult(ini, outi);
    }
 }
 /* Here we multiply $B\cdot(A_1\otimes\ldots\otimes A_n)$. First we
   multiply $B\cdot(A_1\otimes)$, then this is multiplied by all
   $I\otimes A_i\otimes I$, and finally by $I\otimes A_n$.
   If the dimension of the Kronecker product is only 1, then we multiply
   two matrices in straight way and return.
   The intermediate results are stored on heap pointed by |last|. A new
   result is allocated, and then the former storage is deallocated.
   We have to be careful in cases when last or first matrix is unit and
   no calculations are performed in corresponding codes. The codes should
   handle |last| safely also if no calcs are done. */
 void
 KronProdAll::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
 {
  // quick copy if product is unit
  if (isUnit())
    {
      out.zeros();
      out.add(1.0, in);
      return;
    }
  // quick zero if one of the matrices is zero
  /* If one of the matrices is exactly zero or the |in| matrix is zero,
     set out to zero and return */
  bool is_zero = false;
  for (int i = 0; i < dimen() && !is_zero; i++)
    is_zero = matlist[i] && matlist[i]->isZero();
  if (is_zero || in.isZero())
    {
      out.zeros();
      return;
    }
  // quick multiplication if dimension is 1
  if (dimen() == 1)
    {
      if (matlist[0]) // always true
        out.mult(in, ConstTwoDMatrix(*(matlist[0])));
      return;
    }
  int c;
  TwoDMatrix *last = NULL;
  // perform first multiplication AI
  /* Here we have to construct $A_1\otimes I$, allocate intermediate
     result |last|, and perform the multiplication. */
  if (matlist[0])
    {
      KronProdAI akronid(*this);
      c = akronid.kpd.ncols();
      last = new TwoDMatrix(in.nrows(), c);
      akronid.mult(in, *last);
    }
  else
    {
      last = new TwoDMatrix(in.nrows(), in.ncols(), in.getData().base());
    }
  // perform intermediate multiplications IAI
  /* Here we go through all $I\otimes A_i\otimes I$, construct the
     product, allocate new storage for result |newlast|, perform the
     multiplication, deallocate old |last|, and set |last| to |newlast|. */
  for (int i = 1; i < dimen()-1; i++)
    {
      if (matlist[i])
        {
          KronProdIAI interkron(*this, i);
          c = interkron.kpd.ncols();
          TwoDMatrix *newlast = new TwoDMatrix(in.nrows(), c);
          interkron.mult(*last, *newlast);
          delete last;
          last = newlast;
        }
    }
  // perform last multiplication IA
  /* Here just construct $I\otimes A_n$ and perform multiplication and
     deallocate |last|. */
  if (matlist[dimen()-1])
    {
      KronProdIA idkrona(*this);
      idkrona.mult(*last, out);
    }
  else
    {
      out = *last;
    }
  delete last;
 }
 /* This calculates a Kornecker product of rows of matrices, the row
   indices are given by the integer sequence. The result is allocated and
   returned. The caller is repsonsible for its deallocation. */
 Vector *
 KronProdAll::multRows(const IntSequence &irows) const
 {
  TL_RAISE_IF(irows.size() != dimen(),
              "Wrong length of row indices in KronProdAll::multRows");
  Vector *last = NULL;
  ConstVector *row;
  vector<Vector *> to_delete;
  for (int i = 0; i < dimen(); i++)
    {
      int j = dimen()-1-i;
      // set |row| to the row of |j|-th matrix
      /* If the |j|-th matrix is real matrix, then the row is constructed
         from the matrix. It the matrix is unit, we construct a new vector,
         fill it with zeros, than set the unit to appropriate place, and make
         the |row| as ConstVector of this vector, which sheduled for
         deallocation. */
      if (matlist[j])
        row = new ConstVector(irows[j], *(matlist[j]));
      else
        {
          Vector *aux = new Vector(ncols(j));
          aux->zeros();
          (*aux)[irows[j]] = 1.0;
          to_delete.push_back(aux);
          row = new ConstVector(*aux);
        }
      // set |last| to product of |row| and |last|
      /* If the |last| is exists, we allocate new storage, Kronecker
         multiply, deallocate the old storage. If the |last| does not exist,
         then we only make |last| equal to |row|. */
      if (last)
        {
          Vector *newlast;
          newlast = new Vector(last->length()*row->length());
          kronMult(*row, ConstVector(*last), *newlast);
          delete last;
          last = newlast;
        }
      else
        {
          last = new Vector(*row);
        }
      delete row;
    }
  for (unsigned int i = 0; i < to_delete.size(); i++)
    delete to_delete[i];
  return last;
 }
 /* This permutes the matrices so that the new ordering would minimize
   memory consumption. As shown in |@<|KronProdAllOptim| class declaration@>|,
   we want ${m_k\over n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$,
   where $(m_i,n_i)$ is the dimension of $A_i$. So we implement the bubble
   sort. */
 void
 KronProdAllOptim::optimizeOrder()
 {
  for (int i = 0; i < dimen(); i++)
    {
      int swaps = 0;
      for (int j = 0; j < dimen()-1; j++)
        {
          if (((double) kpd.rows[j])/kpd.cols[j] < ((double) kpd.rows[j+1])/kpd.cols[j+1])
            {
              // swap dimensions and matrices at |j| and |j+1|
              int s = kpd.rows[j+1];
              kpd.rows[j+1] = kpd.rows[j];
              kpd.rows[j] = s;
              s = kpd.cols[j+1];
              kpd.cols[j+1] = kpd.cols[j];
              kpd.cols[j] = s;
              const TwoDMatrix *m = matlist[j+1];
              matlist[j+1] = matlist[j];
              matlist[j] = m;
              // project the swap to the permutation |oper|
              s = oper.getMap()[j+1];
              oper.getMap()[j+1] = oper.getMap()[j];
              oper.getMap()[j] = s;
              swaps++;
            }
        }
      if (swaps == 0)
        {
          return;
        }
    }
 }
--- a/dynare++/tl/cc/kron_prod.cweb
+++ b/dynare++/tl/cc/kron_prod.cweb
@ -1,457 +0,0 @@
@q $Id: kron_prod.cweb 1834 2008-05-18 20:23:54Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt kron\_prod.cpp} file.
@c
 #include "kron_prod.h"
 #include "tl_exception.h"
 #include <cstdio>
@<|KronProdDimens| constructor code@>;
@<|KronProd::checkDimForMult| code@>;
@<|KronProd::kronMult| code@>;
@<|KronProdAll::setMat| code@>;
@<|KronProdAll::setUnit| code@>;
@<|KronProdAll::isUnit| code@>;
@<|KronProdAll::multRows| code@>;
@<|KronProdIA::mult| code@>;
@<|KronProdAI| constructor code@>;
@<|KronProdAI::mult| code@>;
@<|KronProdIAI::mult| code@>;
@<|KronProdAll::mult| code@>;
@<|KronProdAllOptim::optimizeOrder| code@>;
@ Here we construct Kronecker product dimensions from Kronecker
 product dimensions by picking a given matrix and all other set to
 identity. The constructor takes dimensions of $A_1\otimes
 A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
 A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
 $i$. The identity matrices must fit into the described order. See
 header file.
 We first decide what is a length of the resulting dimensions. Possible
 length is three for $I\otimes A\otimes I$, and two for $I\otimes A$,
 or $A\otimes I$.
 Then we fork according to |i|.
@<|KronProdDimens| constructor code@>=
 KronProdDimens::KronProdDimens(const KronProdDimens& kd, int i)
 	: rows((i==0 || i==kd.dimen()-1)? (2):(3)),
 	  cols((i==0 || i==kd.dimen()-1)? (2):(3))
 {
 	TL_RAISE_IF(i < 0 || i >= kd.dimen(),
 				"Wrong index for pickup in KronProdDimens constructor");
 	int kdim = kd.dimen();
 	if (i == 0) {
 		@<set AI dimensions@>;
 	} else if (i == kdim-1){
 		@<set IA dimensions@>;
 	} else {
 		@<set IAI dimensions@>;
 	}
 }
@ The first rows and cols are taken from |kd|. The dimensions of
 identity matrix is a number of rows in $A_2\otimes\ldots\otimes A_n$
 since the matrix $A_1\otimes I$ is the first.
@<set AI dimensions@>=
 rows[0] = kd.rows[0];
 rows[1] = kd.rows.mult(1, kdim);
 cols[0] = kd.cols[0];
 cols[1] = rows[1];
@ The second dimension is taken from |kd|. The dimensions of identity
 matrix is a number of columns of $A_1\otimes\ldots A_{n-1}$, since the
 matrix $I\otimes A_n$ is the last.
@<set IA dimensions@>=
 rows[0] = kd.cols.mult(0, kdim-1);
 rows[1] = kd.rows[kdim-1];
 cols[0] = rows[0];
 cols[1] = kd.cols[kdim-1];
@ The dimensions of the middle matrix are taken from |kd|. The
 dimensions of the first identity matrix are a number of columns of
 $A_1\otimes\ldots\otimes A_{i-1}$, and the dimensions of the last
 identity matrix are a number of rows of $A_{i+1}\otimes\ldots\otimes
 A_n$.
@<set IAI dimensions@>=
 rows[0] = kd.cols.mult(0, i);
 cols[0] = rows[0];
 rows[1] = kd.rows[i];
 cols[1] = kd.cols[i];
 cols[2] = kd.rows.mult(i+1, kdim);
 rows[2] = cols[2];
@ This raises an exception if dimensions are bad for multiplication
 |out = in*this|.
@<|KronProd::checkDimForMult| code@>=
 void KronProd::checkDimForMult(const ConstTwoDMatrix& in, const TwoDMatrix& out) const
 {
 	int my_rows;
 	int my_cols;
 	kpd.getRC(my_rows, my_cols);
 	TL_RAISE_IF(in.nrows() != out.nrows() || in.ncols() != my_rows,
 				"Wrong dimensions for KronProd in KronProd::checkDimForMult");
 }
@ Here we Kronecker multiply two given vectors |v1| and |v2| and
 store the result in preallocated |res|.
@<|KronProd::kronMult| code@>=
 void KronProd::kronMult(const ConstVector& v1, const ConstVector& v2,
 						Vector& res)
 {
 	TL_RAISE_IF(res.length() != v1.length()*v2.length(),
 				"Wrong vector lengths in KronProd::kronMult");
 	res.zeros();
 	for (int i = 0; i < v1.length(); i++) {
 		Vector sub(res, i*v2.length(), v2.length());
 		sub.add(v1[i], v2);
 	}
 }
@ 
@<|KronProdAll::setMat| code@>=
 void KronProdAll::setMat(int i, const TwoDMatrix& m)
 {
 	matlist[i] = &m;
 	kpd.setRC(i, m.nrows(), m.ncols());
 }
@ 
@<|KronProdAll::setUnit| code@>=
 void KronProdAll::setUnit(int i, int n)
 {
 	matlist[i] = NULL;
 	kpd.setRC(i, n, n);
 }
@ 
@<|KronProdAll::isUnit| code@>=
 bool KronProdAll::isUnit() const
 {
 	int i = 0;
 	while (i < dimen() && matlist[i] == NULL)
 		i++; 
 	return i == dimen();
 }
@ Here we multiply $B\cdot(I\otimes A)$. If $m$ is a dimension of the
 identity matrix, then the product is equal to
 $B\cdot\hbox{diag}_m(A)$. If $B$ is partitioned accordingly, then the
 result is $[B_1A, B_2A,\ldots B_mA]$.
 Here, |outi| are partitions of |out|, |ini| are const partitions of
 |in|, and |id_cols| is $m$. We employ level-2 BLAS.
@<|KronProdIA::mult| code@>=
 void KronProdIA::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
 {
 	checkDimForMult(in, out);
 	int id_cols = kpd.cols[0];
 	ConstTwoDMatrix a(mat);
 	for (int i = 0; i < id_cols; i++) {
 		TwoDMatrix outi(out, i*a.ncols(), a.ncols());
 		ConstTwoDMatrix ini(in, i*a.nrows(), a.nrows()); 
 		outi.mult(ini, a);
 	}
 }
@ Here we construct |KronProdAI| from |KronProdIAI|. It is clear.
@<|KronProdAI| constructor code@>=
 KronProdAI::KronProdAI(const KronProdIAI& kpiai)
 	: KronProd(KronProdDimens(2)), mat(kpiai.mat)
 {
 	kpd.rows[0] = mat.nrows();
 	kpd.cols[0] = mat.ncols();
 	kpd.rows[1] = kpiai.kpd.rows[2];
 	kpd.cols[1] = kpiai.kpd.cols[2];
 }
@ Here we multiply $B\cdot(A\otimes I)$. Let the dimension of the
 matrix $A$ be $m\times n$, the dimension of $I$ be $p$, and a number
 of rows of $B$ be $q$. We use the fact that $B\cdot(A\otimes
 I)=\hbox{reshape}(\hbox{reshape}(B, q, mp)\cdot A, q, np)$. This works
 only for matrix $B$, whose storage has leading dimension equal to
 number of rows.
 For cases where the leading dimension is not equal to the number of
 rows, we partition the matrix $A\otimes I$ to $m\times n$ square
 partitions $a_{ij}I$. Therefore, we partition $B$ to $m$ partitions
 $[B_1, B_2,\ldots,B_m]$. Each partition of $B$ has the same number of
 columns as the identity matrix. If $R$ denotes the resulting matrix,
 then it can be partitioned to $n$ partitions
 $[R_1,R_2,\ldots,R_n]$. Each partition of $R$ has the same number of
 columns as the identity matrix. Then we have $R_i=\sum a_{ji}B_j$.
 In code, |outi| is $R_i$, |ini| is $B_j$, and |id_cols| is a dimension
 of the identity matrix
@<|KronProdAI::mult| code@>=
 void KronProdAI::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
 {
 	checkDimForMult(in, out);
 	int id_cols = kpd.cols[1];
 	ConstTwoDMatrix a(mat);
 	if (in.getLD() == in.nrows()) {
 		ConstTwoDMatrix in_resh(in.nrows()*id_cols, a.nrows(), in.getData().base());
 		TwoDMatrix out_resh(in.nrows()*id_cols, a.ncols(), out.getData().base());
 		out_resh.mult(in_resh, a);
 	} else {
 		out.zeros();
 		for (int i = 0; i < a.ncols(); i++) {
 			TwoDMatrix outi(out, i*id_cols, id_cols);
 			for (int j = 0; j < a.nrows(); j++) {
 				ConstTwoDMatrix ini(in, j*id_cols, id_cols);
 				outi.add(a.get(j,i), ini);
 			}
 		}
 	}
 }
@ Here we multiply $B\cdot(I\otimes A\otimes I)$. If $n$ is a
 dimension of the first identity matrix, then we multiply
 $B\cdot\hbox{diag}_n(A\otimes I)$. So we partition $B$ and result $R$
 accordingly, and multiply $B_i\cdot(A\otimes I)$, which is in fact
 |KronProdAI::mult|. Note that number of columns of partitions of $B$
 are number of rows of $A\otimes I$, and number of columns of $R$ are
 number of columns of $A\otimes I$.
 In code, |id_cols| is $n$, |akronid| is a Kronecker product object of
 $A\otimes I$, and |in_bl_width|, and |out_bl_width| are rows and cols of
 $A\otimes I$.
@<|KronProdIAI::mult| code@>=
 void KronProdIAI::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
 {
 	checkDimForMult(in, out);
 	int id_cols = kpd.cols[0];
 	KronProdAI akronid(*this);
 	int in_bl_width;
 	int out_bl_width;
 	akronid.kpd.getRC(in_bl_width, out_bl_width);
 	for (int i = 0; i < id_cols; i++) {
 		TwoDMatrix outi(out, i*out_bl_width, out_bl_width);
 		ConstTwoDMatrix ini(in, i*in_bl_width, in_bl_width);
 		akronid.mult(ini, outi);
 	}
 }
@ Here we multiply $B\cdot(A_1\otimes\ldots\otimes A_n)$. First we
 multiply $B\cdot(A_1\otimes)$, then this is multiplied by all
 $I\otimes A_i\otimes I$, and finally by $I\otimes A_n$.
 If the dimension of the Kronecker product is only 1, then we multiply
 two matrices in straight way and return.
 The intermediate results are stored on heap pointed by |last|. A new
 result is allocated, and then the former storage is deallocated.
 We have to be careful in cases when last or first matrix is unit and
 no calculations are performed in corresponding codes. The codes should
 handle |last| safely also if no calcs are done.
@<|KronProdAll::mult| code@>=
 void KronProdAll::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
 {
 	@<quick copy if product is unit@>;
 	@<quick zero if one of the matrices is zero@>;
 	@<quick multiplication if dimension is 1@>;
 	int c;
 	TwoDMatrix* last = NULL;
 	@<perform first multiplication AI@>;
 	@<perform intermediate multiplications IAI@>;
 	@<perform last multiplication IA@>;
 }
@ 
@<quick copy if product is unit@>=
 	if (isUnit()) {
 		out.zeros();
 		out.add(1.0, in);
 		return;
 	}
@ If one of the matrices is exactly zero or the |in| matrix is zero,
 set out to zero and return
@<quick zero if one of the matrices is zero@>=
 	bool is_zero = false;
 	for (int i = 0; i < dimen() && ! is_zero; i++)
 		is_zero = matlist[i] && matlist[i]->isZero();
 	if (is_zero || in.isZero()) {
 		out.zeros();
 		return;
 	}
@ 
@<quick multiplication if dimension is 1@>=
 	if (dimen() == 1) {
 		if (matlist[0]) // always true
 			out.mult(in, ConstTwoDMatrix(*(matlist[0])));
 		return;
 	}
@ Here we have to construct $A_1\otimes I$, allocate intermediate
 result |last|, and perform the multiplication.
@<perform first multiplication AI@>=
 	if (matlist[0]) {
 		KronProdAI akronid(*this);
 		c = akronid.kpd.ncols();
 		last = new TwoDMatrix(in.nrows(), c);
 		akronid.mult(in, *last);
 	} else {
 		last = new TwoDMatrix(in.nrows(), in.ncols(), in.getData().base());
 	}
@ Here we go through all $I\otimes A_i\otimes I$, construct the
 product, allocate new storage for result |newlast|, perform the
 multiplication, deallocate old |last|, and set |last| to |newlast|.
@<perform intermediate multiplications IAI@>=
 	for (int i = 1; i < dimen()-1; i++) {
 		if (matlist[i]) {
 			KronProdIAI interkron(*this, i);
 			c = interkron.kpd.ncols();
 			TwoDMatrix* newlast = new TwoDMatrix(in.nrows(), c);
 			interkron.mult(*last, *newlast);
 			delete last;
 			last = newlast;
 		}
 	}
@ Here just construct $I\otimes A_n$ and perform multiplication and
 deallocate |last|.
@<perform last multiplication IA@>=
 	if (matlist[dimen()-1]) {
 		KronProdIA idkrona(*this);
 		idkrona.mult(*last, out);
 	} else {
 		out = *last;
 	}
 	delete last;
@ This calculates a Kornecker product of rows of matrices, the row
 indices are given by the integer sequence. The result is allocated and
 returned. The caller is repsonsible for its deallocation.
@<|KronProdAll::multRows| code@>=
 Vector* KronProdAll::multRows(const IntSequence& irows) const
 {
 	TL_RAISE_IF(irows.size() != dimen(),
 				"Wrong length of row indices in KronProdAll::multRows");
 	Vector* last = NULL;
 	ConstVector* row;
 	vector<Vector*> to_delete;
 	for (int i = 0; i < dimen(); i++) {
 		int j = dimen()-1-i;
 		@<set |row| to the row of |j|-th matrix@>;
 		@<set |last| to product of |row| and |last|@>;
 		delete row;
 	}
 	for (unsigned int i = 0; i < to_delete.size(); i++)
 		delete to_delete[i];
 	return last;
 }
@ If the |j|-th matrix is real matrix, then the row is constructed
 from the matrix. It the matrix is unit, we construct a new vector,
 fill it with zeros, than set the unit to appropriate place, and make
 the |row| as ConstVector of this vector, which sheduled for
 deallocation.
@<set |row| to the row of |j|-th matrix@>=
 	if (matlist[j])
 		row = new ConstVector(irows[j], *(matlist[j]));
 	else {
 		Vector* aux = new Vector(ncols(j));
 		aux->zeros();
 		(*aux)[irows[j]] = 1.0;
 		to_delete.push_back(aux);
 		row = new ConstVector(*aux);
 	}
@ If the |last| is exists, we allocate new storage, Kronecker
 multiply, deallocate the old storage. If the |last| does not exist,
 then we only make |last| equal to |row|.
@<set |last| to product of |row| and |last|@>=
 	if (last) {
 		Vector* newlast;
 		newlast = new Vector(last->length()*row->length());
 		kronMult(*row, ConstVector(*last), *newlast);
 		delete last;
 		last = newlast;
 	} else { 
 		last = new Vector(*row);
 	}
@ This permutes the matrices so that the new ordering would minimize
 memory consumption. As shown in |@<|KronProdAllOptim| class declaration@>|,
 we want ${m_k\over n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$,
 where $(m_i,n_i)$ is the dimension of $A_i$. So we implement the bubble
 sort.
@<|KronProdAllOptim::optimizeOrder| code@>=
 void KronProdAllOptim::optimizeOrder()
 {
 	for (int i = 0; i < dimen(); i++) {
 		int swaps = 0;
 		for (int j = 0; j < dimen()-1; j++) {
 			if (((double)kpd.rows[j])/kpd.cols[j] < ((double)kpd.rows[j+1])/kpd.cols[j+1]) {
 				@<swap dimensions and matrices at |j| and |j+1|@>;
 				@<project the swap to the permutation |oper|@>;
 			}
 		}
 		if (swaps == 0) {
 			return;
 		}
 	}
 }
@ 
@<swap dimensions and matrices at |j| and |j+1|@>=
 	int s = kpd.rows[j+1];
 	kpd.rows[j+1] = kpd.rows[j];
 	kpd.rows[j] = s;
 	s = kpd.cols[j+1];
 	kpd.cols[j+1] = kpd.cols[j];
 	kpd.cols[j] = s;
 	const TwoDMatrix* m = matlist[j+1];
 	matlist[j+1] = matlist[j];
 	matlist[j] = m;
@ 
@<project the swap to the permutation |oper|@>=
 	s = oper.getMap()[j+1];
 	oper.getMap()[j+1] = oper.getMap()[j];
 	oper.getMap()[j] = s;
 	swaps++;
@ End of {\tt kron\_prod.cpp} file.
--- a/dynare++/tl/cc/kron_prod.hh
+++ b/dynare++/tl/cc/kron_prod.hh
@ -0,0 +1,348 @@
 // Copyright 2004, Ondra Kamenik
 // Kronecker product.
 /* Here we define an abstraction for a Kronecker product of a sequence of
   matrices. This is $A_1\otimes\ldots\otimes A_n$. Obviously we do not
   store the product in memory. First we need to represent a dimension
   of the Kronecker product. Then we represent the Kronecker product,
   simply it is the Kronecker product dimension with a vector of
   references to the matrices $A_1,\ldots, A_n$.
   The main task of this class is to calculate a matrix product
   $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_n)$ which in
   our application has much more moderate dimensions than $A_1\otimes
   A_2\otimes\ldots\otimes A_n$. We calculate it as
   $$B\cdot(A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes
   I)\cdot\ldots\cdot (I\otimes A_n)$$
   where dimensions of identity matrices differ and are given by the
   chosen order. One can naturally ask, whether there is some optimal
   order minimizing maximum storage needed for intermediate
   results. The optimal ordering is implemented by class |KronProdAllOptim|.
   For this multiplication, we also need to represent products of type
   $A\otimes I$, $I\otimes A\otimes I$, and $I\otimes A$. */
 #ifndef KRON_PROD_H
 #define KRON_PROD_H
 #include "twod_matrix.hh"
 #include "permutation.hh"
 #include "int_sequence.hh"
 class KronProdAll;
 class KronProdAllOptim;
 class KronProdIA;
 class KronProdIAI;
 class KronProdAI;
 /* |KronProdDimens| maintains a dimension of the Kronecker product. So,
   it maintains two sequences, one for rows, and one for columns. */
 class KronProdDimens
 {
  friend class KronProdAll;
  friend class KronProdAllOptim;
  friend class KronProdIA;
  friend class KronProdIAI;
  friend class KronProdAI;
 private:
  IntSequence rows;
  IntSequence cols;
 public:
  /* We define three constructors. First initializes to a given
     dimension, and all rows and cols are set to zeros. Second is a copy
     constructor. The third constructor takes dimensions of $A_1\otimes
     A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
     A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
     $i$. The dimensions of identity matrices are such that
     $$A_1\otimes A_2\otimes\ldots\otimes A_n=
     (A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes I)
     \cdot\ldots\cdot(I\otimes A_n)$$
     Note that the matrices on the right do not commute only because sizes
     of identity matrices which are then given by this ordering. */
  KronProdDimens(int dim)
    : rows(dim, 0), cols(dim, 0)
  {
  }
  KronProdDimens(const KronProdDimens &kd)
    : rows(kd.rows), cols(kd.cols)
  {
  }
  KronProdDimens(const KronProdDimens &kd, int i);
  const KronProdDimens &
  operator=(const KronProdDimens &kd)
  {
    rows = kd.rows; cols = kd.cols; return *this;
  }
  bool
  operator==(const KronProdDimens &kd) const
  {
    return rows == kd.rows && cols == kd.cols;
  }
  int
  dimen() const
  {
    return rows.size();
  }
  void
  setRC(int i, int r, int c)
  {
    rows[i] = r; cols[i] = c;
  }
  void
  getRC(int i, int &r, int &c) const
  {
    r = rows[i]; c = cols[i];
  }
  void
  getRC(int &r, int &c) const
  {
    r = rows.mult(); c = cols.mult();
  }
  int
  nrows() const
  {
    return rows.mult();
  }
  int
  ncols() const
  {
    return cols.mult();
  }
  int
  nrows(int i) const
  {
    return rows[i];
  }
  int
  ncols(int i) const
  {
    return cols[i];
  }
 };
 /* Here we define an abstract class for all Kronecker product classes,
   which are |KronProdAll| (the most general), |KronProdIA| (for
   $I\otimes A$), |KronProdAI| (for $A\otimes I$), and |KronProdIAI| (for
   $I\otimes A\otimes I$). The purpose of the super class is to only
   define some common methods and common member |kpd| for dimensions and
   declare pure virtual |mult| which is implemented by the subclasses.
   The class also contains a static method |kronMult|, which calculates a
   Kronecker product of two vectors and stores it in the provided
   vector. It is useful at a few points of the library. */
 class KronProd
 {
 protected:
  KronProdDimens kpd;
 public:
  KronProd(int dim)
    : kpd(dim)
  {
  }
  KronProd(const KronProdDimens &kd)
    : kpd(kd)
  {
  }
  KronProd(const KronProd &kp)
    : kpd(kp.kpd)
  {
  }
  virtual ~KronProd()
  {
  }
  int
  dimen() const
  {
    return kpd.dimen();
  }
  virtual void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const = 0;
  void
  mult(const TwoDMatrix &in, TwoDMatrix &out) const
  {
    mult(ConstTwoDMatrix(in), out);
  }
  void checkDimForMult(const ConstTwoDMatrix &in, const TwoDMatrix &out) const;
  void
  checkDimForMult(const TwoDMatrix &in, const TwoDMatrix &out) const
  {
    checkDimForMult(ConstTwoDMatrix(in), out);
  }
  static void kronMult(const ConstVector &v1, const ConstVector &v2,
                       Vector &res);
  int
  nrows() const
  {
    return kpd.nrows();
  }
  int
  ncols() const
  {
    return kpd.ncols();
  }
  int
  nrows(int i) const
  {
    return kpd.nrows(i);
  }
  int
  ncols(int i) const
  {
    return kpd.ncols(i);
  }
 };
 /* |KronProdAll| is a main class of this file. It represents the
   Kronecker product $A_1\otimes A_2\otimes\ldots\otimes A_n$. Besides
   dimensions, it stores pointers to matrices in |matlist| array. If a
   pointer is null, then the matrix is considered to be unit. The array
   is set by calls to |setMat| method (for real matrices) or |setUnit|
   method (for unit matrices).
   The object is constructed by a constructor, which allocates the
   |matlist| and initializes dimensions to zeros. Then a caller must feed
   the object with matrices by calling |setMat| and |setUnit| repeatedly
   for different indices.
   We implement the |mult| method of |KronProd|, and a new method
   |multRows|, which creates a vector of kronecker product of all rows of
   matrices in the object. The rows are given by the |IntSequence|. */
 class KronProdAll : public KronProd
 {
  friend class KronProdIA;
  friend class KronProdIAI;
  friend class KronProdAI;
 protected:
  const TwoDMatrix **const matlist;
 public:
  KronProdAll(int dim)
    : KronProd(dim), matlist(new const TwoDMatrix *[dim])
  {
  }
  virtual ~KronProdAll()
  {
    delete [] matlist;
  }
  void setMat(int i, const TwoDMatrix &m);
  void setUnit(int i, int n);
  const TwoDMatrix &
  getMat(int i) const
  {
    return *(matlist[i]);
  }
  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
  Vector *multRows(const IntSequence &irows) const;
 private:
  bool isUnit() const;
 };
 /* The class |KronProdAllOptim| minimizes memory consumption of the
   product $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_k)$. The
   optimization is done by reordering of the matrices $A_1,\ldots,A_k$,
   in order to minimize a sum of all storages needed for intermediate
   results. The optimal ordering is also nearly optimal with respect to
   number of flops.
   Let $(m_i,n_i)$ be dimensions of $A_i$. It is easy to observe, that
   for $i$-th step we need storage of $r\cdot n_1\cdot\ldots\cdot
   n_i\cdot m_{i+1}\cdot\ldots\cdot m_k$, where $r$ is a number of rows
   of $B$. To minimize the sum through all $i$ over all permutations of
   matrices, it is equivalent to minimize the sum
   $\sum_{i=1}^k{m_{i+1}\cdot\ldots\cdot m_k\over n_{i+1}\cdot\ldots\cdot
   n_k}$. The optimal ordering will yield ${m_k\over
   n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$.
   Now observe, that the number of flops for $i$-th step is $r\cdot
   n_1\cdot\ldots\cdot n_i\cdot m_i\cdot\ldots\cdot m_k$. In order to
   minimize a number of flops, it is equivalent to minimize
   $\sum_{i=1}^km_i{m_{i+1}\cdot\ldots\cdot m_k\over
   n_{i+1}\cdot\ldots\cdot n_k}$. Note that, normally, the $m_i$ does not
   change as much as $n_{j+1},\ldots,n_k$, so the ordering minimizing the
   memory will be nearly optimal with respect to number of flops.
   The class |KronProdAllOptim| inherits from |KronProdAll|. A public
   method |optimizeOrder| does the reordering. The permutation is stored
   in |oper|. So, as long as |optimizeOrder| is not called, the class is
   equivalent to |KronProdAll|. */
 class KronProdAllOptim : public KronProdAll
 {
 protected:
  Permutation oper;
 public:
  KronProdAllOptim(int dim)
    : KronProdAll(dim), oper(dim)
  {
  }
  void optimizeOrder();
  const Permutation &
  getPer() const
  {
    return oper;
  }
 };
 /* This class represents $I\otimes A$. We have only one reference to
   the matrix, which is set by constructor. */
 class KronProdIA : public KronProd
 {
  friend class KronProdAll;
  const TwoDMatrix &mat;
 public:
  KronProdIA(const KronProdAll &kpa)
    : KronProd(KronProdDimens(kpa.kpd, kpa.dimen()-1)),
      mat(kpa.getMat(kpa.dimen()-1))
  {
  }
  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
 };
 /* This class represents $A\otimes I$. We have only one reference to
   the matrix, which is set by constructor. */
 class KronProdAI : public KronProd
 {
  friend class KronProdIAI;
  friend class KronProdAll;
  const TwoDMatrix &mat;
 public:
  KronProdAI(const KronProdAll &kpa)
    : KronProd(KronProdDimens(kpa.kpd, 0)),
      mat(kpa.getMat(0))
  {
  }
  KronProdAI(const KronProdIAI &kpiai);
  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
 };
 /* This class represents $I\otimes A\otimes I$. We have only one reference to
   the matrix, which is set by constructor. */
 class KronProdIAI : public KronProd
 {
  friend class KronProdAI;
  friend class KronProdAll;
  const TwoDMatrix &mat;
 public:
  KronProdIAI(const KronProdAll &kpa, int i)
    : KronProd(KronProdDimens(kpa.kpd, i)),
      mat(kpa.getMat(i))
  {
  }
  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
 };
 #endif
--- a/dynare++/tl/cc/kron_prod.hweb
+++ b/dynare++/tl/cc/kron_prod.hweb
@ -1,296 +0,0 @@
@q $Id: kron_prod.hweb 2269 2008-11-23 14:33:22Z michel $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Kronecker product. Start of {\tt kron\_prod.h} file. 
 Here we define an abstraction for a Kronecker product of a sequence of
 matrices. This is $A_1\otimes\ldots\otimes A_n$. Obviously we do not
 store the product in memory. First we need to represent a dimension
 of the Kronecker product. Then we represent the Kronecker product,
 simply it is the Kronecker product dimension with a vector of
 references to the matrices $A_1,\ldots, A_n$.
 The main task of this class is to calculate a matrix product
 $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_n)$ which in
 our application has much more moderate dimensions than $A_1\otimes
 A_2\otimes\ldots\otimes A_n$. We calculate it as
 $$B\cdot(A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes
 I)\cdot\ldots\cdot (I\otimes A_n)$$
 where dimensions of identity matrices differ and are given by the
 chosen order. One can naturally ask, whether there is some optimal
 order minimizing maximum storage needed for intermediate
 results. The optimal ordering is implemented by class |KronProdAllOptim|.
 For this multiplication, we also need to represent products of type
 $A\otimes I$, $I\otimes A\otimes I$, and $I\otimes A$.
@s KronProdDimens int
@s KronProd int
@c
 #ifndef KRON_PROD_H
 #define KRON_PROD_H
 #include "twod_matrix.h"
 #include "permutation.h"
 #include "int_sequence.h"
 class KronProdAll;
 class KronProdAllOptim;
 class KronProdIA;
 class KronProdIAI;
 class KronProdAI;
@<|KronProdDimens| class declaration@>;
@<|KronProd| class declaration@>;
@<|KronProdAll| class declaration@>;
@<|KronProdAllOptim| class declaration@>;
@<|KronProdIA| class declaration@>;
@<|KronProdAI| class declaration@>;
@<|KronProdIAI| class declaration@>;
 #endif
@ |KronProdDimens| maintains a dimension of the Kronecker product. So,
 it maintains two sequences, one for rows, and one for columns.
@<|KronProdDimens| class declaration@>=
 class KronProdDimens {
 	friend class KronProdAll;
 	friend class KronProdAllOptim;
 	friend class KronProdIA;
 	friend class KronProdIAI;
 	friend class KronProdAI;
 private:@;
 	IntSequence rows;
 	IntSequence cols;
 public:@;
 	@<|KronProdDimens| constructors@>;
 	@<|KronProdDimens| inline operators@>;
 	@<|KronProdDimens| inline methods@>;
 };
@ We define three constructors. First initializes to a given
 dimension, and all rows and cols are set to zeros. Second is a copy
 constructor. The third constructor takes dimensions of $A_1\otimes
 A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
 A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
 $i$. The dimensions of identity matrices are such that
 $$A_1\otimes A_2\otimes\ldots\otimes A_n=
 (A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes I)
 \cdot\ldots\cdot(I\otimes A_n)$$
 Note that the matrices on the right do not commute only because sizes
 of identity matrices which are then given by this ordering.
@<|KronProdDimens| constructors@>=
 	KronProdDimens(int dim)
 		: rows(dim,0), cols(dim, 0)@+ {}
 	KronProdDimens(const KronProdDimens& kd)
 		: rows(kd.rows), cols(kd.cols)@+ {}
 	KronProdDimens(const KronProdDimens& kd, int i);
@ 
@<|KronProdDimens| inline operators@>=
 	const KronProdDimens& operator=(const KronProdDimens& kd)
 		{@+ rows = kd.rows;@+ cols = kd.cols;@+ return *this;@+}
 	bool operator==(const KronProdDimens& kd) const
 		{@+ return rows == kd.rows && cols == kd.cols;@+}
@ 
@<|KronProdDimens| inline methods@>=
 	int dimen() const
 		{@+ return rows.size();@+}
 	void setRC(int i, int r, int c)
 		{@+ rows[i] = r;@+ cols[i] = c;@+}
 	void getRC(int i, int& r, int& c) const
 		{@+ r = rows[i];@+ c = cols[i];@+}
 	void getRC(int& r, int& c) const
 		{@+ r = rows.mult();@+ c = cols.mult();@+}
 	int nrows() const
 		{@+ return rows.mult();@+}
 	int ncols() const
 		{@+ return cols.mult();@+}
 	int nrows(int i) const
 		{@+ return rows[i];@+}
 	int ncols(int i) const
 		{@+ return cols[i];@+}
@ Here we define an abstract class for all Kronecker product classes,
 which are |KronProdAll| (the most general), |KronProdIA| (for
 $I\otimes A$), |KronProdAI| (for $A\otimes I$), and |KronProdIAI| (for
 $I\otimes A\otimes I$). The purpose of the super class is to only
 define some common methods and common member |kpd| for dimensions and
 declare pure virtual |mult| which is implemented by the subclasses.
 The class also contains a static method |kronMult|, which calculates a
 Kronecker product of two vectors and stores it in the provided
 vector. It is useful at a few points of the library.
@<|KronProd| class declaration@>=
 class KronProd {
 protected:@/
 	KronProdDimens kpd;
 public:@/
 	KronProd(int dim)
 		: kpd(dim)@+ {}
 	KronProd(const KronProdDimens& kd)
 		: kpd(kd)@+ {}
 	KronProd(const KronProd& kp)
 		: kpd(kp.kpd)@+ {}
 	virtual ~KronProd()@+ {}
 	int dimen() const
 		{@+ return kpd.dimen();@+}
 	virtual void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const =0;
 	void mult(const TwoDMatrix& in, TwoDMatrix& out) const
 		{@+ mult(ConstTwoDMatrix(in), out);@+}
 	void checkDimForMult(const ConstTwoDMatrix& in, const TwoDMatrix& out) const;
 	void checkDimForMult(const TwoDMatrix& in, const TwoDMatrix& out) const
 		{@+ checkDimForMult(ConstTwoDMatrix(in), out);@+}
 	static void kronMult(const ConstVector& v1, const ConstVector& v2,
 						 Vector& res);
 	int nrows() const
 		{@+ return kpd.nrows();@+}
 	int ncols() const
 		{@+ return kpd.ncols();@+}
 	int nrows(int i) const
 		{@+ return kpd.nrows(i);@+}
 	int ncols(int i) const
 		{@+ return kpd.ncols(i);@+}
 };
@ |KronProdAll| is a main class of this file. It represents the
 Kronecker product $A_1\otimes A_2\otimes\ldots\otimes A_n$. Besides
 dimensions, it stores pointers to matrices in |matlist| array. If a
 pointer is null, then the matrix is considered to be unit. The array
 is set by calls to |setMat| method (for real matrices) or |setUnit|
 method (for unit matrices).
 The object is constructed by a constructor, which allocates the
 |matlist| and initializes dimensions to zeros. Then a caller must feed
 the object with matrices by calling |setMat| and |setUnit| repeatedly
 for different indices.
 We implement the |mult| method of |KronProd|, and a new method
 |multRows|, which creates a vector of kronecker product of all rows of
 matrices in the object. The rows are given by the |IntSequence|.
@<|KronProdAll| class declaration@>=
 class KronProdAll : public KronProd {
 	friend class KronProdIA;
 	friend class KronProdIAI;
 	friend class KronProdAI;
 protected:@;
 	const TwoDMatrix** const matlist;
 public:@;
 	KronProdAll(int dim)
 		: KronProd(dim), matlist(new const TwoDMatrix*[dim])@+ {}
 	virtual ~KronProdAll()
 		{@+ delete [] matlist;@+}
 	void setMat(int i, const TwoDMatrix& m);
 	void setUnit(int i, int n);
 	const TwoDMatrix& getMat(int i) const
 		{@+ return *(matlist[i]);@+}
 	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
 	Vector* multRows(const IntSequence& irows) const;
 private:@;
 	bool isUnit() const;
 };
@ The class |KronProdAllOptim| minimizes memory consumption of the
 product $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_k)$. The
 optimization is done by reordering of the matrices $A_1,\ldots,A_k$,
 in order to minimize a sum of all storages needed for intermediate
 results. The optimal ordering is also nearly optimal with respect to
 number of flops.
 Let $(m_i,n_i)$ be dimensions of $A_i$. It is easy to observe, that
 for $i$-th step we need storage of $r\cdot n_1\cdot\ldots\cdot
 n_i\cdot m_{i+1}\cdot\ldots\cdot m_k$, where $r$ is a number of rows
 of $B$. To minimize the sum through all $i$ over all permutations of
 matrices, it is equivalent to minimize the sum
 $\sum_{i=1}^k{m_{i+1}\cdot\ldots\cdot m_k\over n_{i+1}\cdot\ldots\cdot
 n_k}$. The optimal ordering will yield ${m_k\over
 n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$.
 Now observe, that the number of flops for $i$-th step is $r\cdot
 n_1\cdot\ldots\cdot n_i\cdot m_i\cdot\ldots\cdot m_k$. In order to
 minimize a number of flops, it is equivalent to minimize
 $\sum_{i=1}^km_i{m_{i+1}\cdot\ldots\cdot m_k\over
 n_{i+1}\cdot\ldots\cdot n_k}$. Note that, normally, the $m_i$ does not
 change as much as $n_{j+1},\ldots,n_k$, so the ordering minimizing the
 memory will be nearly optimal with respect to number of flops.
 The class |KronProdAllOptim| inherits from |KronProdAll|. A public
 method |optimizeOrder| does the reordering. The permutation is stored
 in |oper|. So, as long as |optimizeOrder| is not called, the class is
 equivalent to |KronProdAll|.
@<|KronProdAllOptim| class declaration@>=
 class KronProdAllOptim : public KronProdAll {
 protected:@;
 	Permutation oper;
 public:@;
 	KronProdAllOptim(int dim)
 		: KronProdAll(dim), oper(dim) @+ {}
 	void optimizeOrder();
 	const Permutation& getPer() const
 		{@+ return oper; @+}
 };
@ This class represents $I\otimes A$. We have only one reference to
 the matrix, which is set by constructor.
@<|KronProdIA| class declaration@>=
 class KronProdIA : public KronProd {
 	friend class KronProdAll;
 	const TwoDMatrix& mat;
 public:@/
 	KronProdIA(const KronProdAll& kpa)
 		: KronProd(KronProdDimens(kpa.kpd, kpa.dimen()-1)),
 		  mat(kpa.getMat(kpa.dimen()-1))
 		{}
 	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
 };
@ This class represents $A\otimes I$. We have only one reference to
 the matrix, which is set by constructor.
@<|KronProdAI| class declaration@>=
 class KronProdAI : public KronProd {
 	friend class KronProdIAI;
 	friend class KronProdAll;
 	const TwoDMatrix& mat;
 public:@/
 	KronProdAI(const KronProdAll& kpa)
 		: KronProd(KronProdDimens(kpa.kpd, 0)),
 		  mat(kpa.getMat(0))
 		{}
 	KronProdAI(const KronProdIAI& kpiai);
 	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
 };
@ This class represents $I\otimes A\otimes I$. We have only one reference to
 the matrix, which is set by constructor.
@<|KronProdIAI| class declaration@>=
 class KronProdIAI : public KronProd {
 	friend class KronProdAI;
 	friend class KronProdAll;
 	const TwoDMatrix& mat;
 public:@/
 	KronProdIAI(const KronProdAll& kpa, int i)
 		: KronProd(KronProdDimens(kpa.kpd, i)),
 		  mat(kpa.getMat(i))
 		{}
 	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
 };
@ End of {\tt kron\_prod.h} file. 
--- a/dynare++/tl/cc/main.web
+++ b/dynare++/tl/cc/main.web
@ -1,387 +0,0 @@
@q $Id: main.web 2338 2009-01-14 10:40:30Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@q cwebmac.tex defines its own \ifpdf, which is incompatible with the @>
@q \ifpdf defined by eplain, so undefine it @>
 \let\ifpdf\relax
 \input eplain
@q now define \ifpdf to be always false: PDF macros of cwebmac are buggy @>
 \newif\ifpdf
 \iffalse\fi
 \def\title{{\mainfont Tensor Library}}
@i ../../c++lib.w
@s const_reverse_iterator int
@s value_type int
 \titletrue
 \null\vfill
 \centerline{\titlefont Multidimensional Tensor Library}
 \vskip\baselineskip
 \centerline{\vtop{\hsize=10cm\leftskip=0pt plus 1fil
  \rightskip=0pt plus 1fil\noindent
 	primary use in perturbation methods for Stochastic
 	Dynamic General Equilibrium (SDGE) models}}
 \vfill\vfill
 Copyright \copyright\ 2004 by Ondra Kamenik
@*1 Library overview.
 The design of the library was driven by the needs of perturbation
 methods for solving Stochastic Dynamic General Equilibrium models. The
 aim of the library is not to provide an exhaustive interface to
 multidimensional linear algebra. The tensor library's main purposes
 include:
 \unorderedlist
 \li Define types for tensors, for a multidimensional index of a
 tensor, and types for folded and unfolded tensors. The tensors defined
 here have only one multidimensional index and one reserved
 one-dimensional index. The tensors should allow modelling of higher
 order derivatives with respect to a few vectors with different sizes
 (for example $\left[g_{y^2u^3}\right]$). The tensors should allow
 folded and unfolded storage modes and conversion between them. A
 folded tensor stores symmetric elements only once, while an unfolded
 stores data as a whole multidimensional cube.
 \li Define both sparse and dense tensors. We need only one particular
 type of sparse tensor. This in contrast to dense tensors, where we
 need much wider family of types.
 \li Implement the Faa Di Bruno multidimensional formula. So, the main
 purpose of the library is to implement the following step of Faa Di Bruno:
 $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}
 =\left[h_{y^l}\right]_{\gamma_1\ldots\gamma_l}
 \left(\sum_{c\in M_{l,k}}
 \prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}\right)$$
 where $s$ can be a compound vector of variables, $M_{l,k}$ is a set of
 all equivalences of $k$ element set having $l$ classes, $c_m$ is
 $m$-th class of equivalence $c$, and $c_m(\alpha)$ is a tuple of
 picked indices from $\alpha$ by class $c_m$.
 Note that the sparse tensors play a role of $h$ in the Faa Di Bruno, not
 of $B$ nor $g$.
 \endunorderedlist
 The following table is a road-map to various abstractions in the library.
 \def\defloc#1#2{#1\hfill\break{\tt #2}}
 \noindent
 \halign to\hsize{%
 \vtop{\hsize=6.6cm\rightskip=0pt plus 1fil\noindent #}&
 \vtop{\advance\hsize by-6.6cm%
      \raggedright\noindent\vrule width 0pt height 14pt #}\cr
 Class defined in & Purpose\cr
 \noalign{\hrule}\cr
 \defloc{|@<|Tensor| class declaration@>|}{tensor.hweb}&
 Virtual base class for all dense tensors, defines |index| as the
 multidimensonal iterator
 \cr
 \defloc{|@<|FTensor| class declaration@>|}{tensor.hweb}&
 Virtual base class for all folded tensors
 \cr
 \defloc{|@<|UTensor| class declaration@>|}{tensor.hweb}&
 Virtual base class for all unfolded tensors
 \cr
 \defloc{|@<|FFSTensor| class declaration@>|}{fs\_tensor.hweb}&
 Class representing folded full symmetry dense tensor,
 for instance $\left[g_{y^3}\right]$
 \cr
 \defloc{|@<|FGSTensor| class declaration@>|}{gs\_tensor.hweb}&
 Class representing folded general symmetry dense tensor,
 for instance $\left[g_{y^2u^3}\right]$
 \cr
 \defloc{|@<|UFSTensor| class declaration@>|}{fs\_tensor.hweb}&
 Class representing unfolded full symmetry dense tensor,
 for instance $\left[g_{y^3}\right]$
 \cr
 \defloc{|@<|UGSTensor| class declaration@>|}{gs\_tensor.hweb}&
 Class representing unfolded general symmetry dense tensor,
 for instance $\left[g_{y^2u^3}\right]$
 \cr
 |@<|URTensor| class declaration@>|\hfill\break
 \defloc{|@<|FRTensor| class declaration@>|}{rfs\_tensor.hweb}&
 Class representing unfolded/folded full symmetry, row-orient\-ed,
 dense tensor. Row-oriented tensors are used in the Faa Di Bruno
 above as some part (few or one column) of a product of $g$'s. Their
 fold/unfold conversions are special in such a way, that they must
 yield equivalent results if multiplied with folded/unfolded
 column-oriented counterparts.
 \cr
 |@<|URSingleTensor| class declaration@>|\hfill\break
 \defloc{|@<|FRSingleTensor| class declaration@>|}{rfs\_tensor.hweb}&
 Class representing unfolded/folded full symmetry, row-orient\-ed,
 single column, dense tensor. Besides use in the Faa Di Bruno, the
 single column row oriented tensor models also higher moments of normal
 distribution.
 \cr
 \defloc{|@<|UPSTensor| class declaration@>|}{ps\_tensor.hweb}&
 Class representing unfolded, column-orient\-ed tensor whose symmetry
 is not that of the $\left[B_{y^2u^3}\right]$ but rather of something
 as $\left[B_{yuuyu}\right]$. This tensor evolves during the product
 operation for unfolded tensors and its basic operation is to add
 itself to a tensor with nicer symmetry, here $\left[B_{y^2u^3}\right]$.
 \cr
 \defloc{|@<|FPSTensor| class declaration@>|}{ps\_tensor.hweb}&
 Class representing partially folded, column-orient\-ed tensor who\-se
 symmetry is not that of the $\left[B_{y^3u^4}\right]$ but rather
 something as $\left[B_{yu\vert y^3u\vert u^4}\right]$, where the
 portions of symmetries represent folded dimensions which are combined
 in unfolded manner. This tensor evolves during the Faa Di Bruno
 for folded tensors and its basic operation is to add itself to a
 tensor with nicer symmetry, here folded $\left[B_{y^3u^4}\right]$.
 \cr
 \defloc{|@<|USubTensor| class declaration@>|}{pyramid\_prod.hweb}&
 Class representing unfolded full symmetry, row-orient\-ed tensor which
 contains a few columns of huge product
 $\prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}$. This is
 needed during the Faa Di Bruno for folded matrices.
 \cr
 \defloc{|@<|IrregTensor| class declaration@>|}{pyramid2\_prod.hweb}&
 Class representing a product of columns of derivatives
 $\left[z_{y^ku^l}\right]$, where $z=[y^T,v^T,w^T]^T$. Since the first
 part of $z$ is $y$, the derivatives contain many zeros, which are not
 stored, hence the tensor's irregularity. The tensor is used when
 calculating one step of Faa Di Bruno formula, i.e.
 $\left[f_{z^l}\right]\sum\prod_{m=1}^l\left[z_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}$.
 \cr
 \defloc{|@<|FSSparseTensor| class declaration@>|}{sparse\_tensor.hweb}&
 Class representing full symmetry, column-oriented, sparse tensor. It
 is able to store elements keyed by the multidimensional index, and
 multiply itself with one column of row-oriented tensor.
 \cr
 \defloc{|@<|FGSContainer| class declaration@>|}{t\_container.hweb}&
 Container of |FGSTensor|s. It implements the Faa Di Bruno with
 unfolded or folded tensor $h$ yielding folded $B$. The methods are
 |FGSContainer::multAndAdd|.
 \cr
 \defloc{|@<|UGSContainer| class declaration@>|}{t\_container.hweb}&
 Container of |FGSTensor|s. It implements the Faa Di Bruno with
 unfolded tensor $h$ yielding unfolded $B$. The method is
 |UGSContainer::multAndAdd|.
 \cr
 \defloc{|@<|StackContainerInterface| class declaration@>|}
 {stack\_container.hweb}&Virtual pure interface describing all logic
 of stacked containers for which we will do the Faa Di Bruno operation. 
 \cr
 \defloc{|@<|UnfoldedStackContainer| class declaration@>|}
 {stack\_container.hweb}&Implements the Faa Di Bruno operation for stack of
 containers of unfolded tensors.
 \cr
 \defloc{|@<|FoldedStackContainer| class declaration@>|}{stack\_container.hweb}
 &Implements the Faa Di Bruno for stack of
 containers of fold\-ed tensors.
 \cr
 \defloc{|@<|ZContainer| class declaration@>|}{stack\_container.hweb}&
 The class implements the interface |StackContainerInterface| according
 to $z$ appearing in context of SDGE models. By a simple inheritance,
 we obtain |@<|UnfoldedZContainer| class declaration@>| and also
 |@<|FoldedZContainer| class declaration@>|.
 \cr
 \defloc{|@<|GContainer| class declaration@>|}{stack\_container.hweb}&
 The class implements the interface |StackContainerInterface| according
 to $G$ appearing in context of SDGE models. By a simple inheritance,
 we obtain |@<|UnfoldedGContainer| class declaration@>| and also
 |@<|FoldedGContainer| class declaration@>|.
 \cr
 \defloc{|@<|Equivalence| class declaration@>|}{equivalence.hweb}&
 The class represents an equivalence on $n$-element set. Useful in the
 Faa Di Bruno.
 \cr
 \defloc{|@<|EquivalenceSet| class declaration@>|}{equivalence.hweb}&
 The class representing all equivalences on $n$-element set. Useful in the
 Faa Di Bruno.
 \cr
 \defloc{|@<|Symmetry| class declaration@>|}{symmetry.hweb}&
 The class defines a symmetry of general symmetry tensor. This is it
 defines a basic shape of the tensor. For $\left[B_{y^2u^3}\right]$,
 the symmetry is $y^2u^3$.
 \cr
 \defloc{|@<|Permutation| class declaration@>|}{permutation.hweb}&
 The class represents a permutation of $n$ indices. Useful in the
 Faa Di Bruno.
 \cr
 \defloc{|@<|IntSequence| class declaration@>|}{int\_sequence.hweb}&
 The class represents a sequence of integers. Useful everywhere.
 \cr
 |@<|TwoDMatrix| class declaration@>|\hfill\break
 \defloc{|@<|ConstTwoDMatrix| class declaration@>|}{twod\_matrix.hweb}&
 The class provides an interface to a code handling two-di\-men\-si\-onal
 matrices. The code resides in Sylvester module, in directory {\tt
 sylv/cc}. The object files from that directory need to be linked: {\tt
 GeneralMatrix.o}, {\tt Vector.o} and {\tt SylvException.o}. There is
 no similar interface to |Vector| and |ConstVector| classes from the
 Sylvester module and they are used directly.
 \cr
 \defloc{|@<|KronProdAll| class declaration@>|}{kron\_prod.hweb}&
 The class represents a Kronecker product of a sequence of arbitrary
 matrices and is able to multiply a matrix from the right without
 storing the Kronecker product in memory.
 \cr
 \defloc{|@<|KronProdAllOptim| class declaration@>|}{kron\_prod.hweb}&
 The same as |KronProdAll| but it optimizes the order of matrices in
 the product to minimize the used memory during the Faa Di Bruno
 operation. Note that it is close to optimal flops.
 \cr
 |@<|FTensorPolynomial| class declaration@>|\hfill\break
 \defloc{|@<|UTensorPolynomial| class declaration@>|}{t\_polynomial.hweb}&
 Abstractions representing a polynomial whose coefficients are
 folded/unfolded tensors and variable is a column vector. The classes
 provide methods for traditional and horner-like polynomial
 evaluation. This is useful in simulation code.
 \cr
 |@<|FNormalMoments| class declaration@>|\hfill\break
 \defloc{|@<|UNormalMoments| class declaration@>|}{normal\_moments.hweb}&
 These are containers for folded/unfolded single column tensors for
 higher moments of normal distribution. The code contains an algorithm
 for generating the moments for arbitrary covariance matrix.
 \cr
 \defloc{|@<|TLStatic| class declaration@>|}{tl\_static.hweb}&
 The class encapsulates all static information needed for the
 library. It includes a Pascal triangle (for quick computation of
 binomial coefficients), and precalculated equivalence sets.
 \cr
 \defloc{|@<|TLException| class definition@>|}{tl\_exception.hweb}&
 Simple class thrown as an exception.
 \cr
 }
@s Tensor int
@s FTensor int
@s UTensor int
@s FFSTensor int
@s UFSTensor int
@s FGSTensor int
@s UGSTensor int
@s FRTensor int
@s URTensor int
@s FRSingleTensor int
@s URSingleTensor int
@s UPSTensor int
@s UGSContainer int
@s ZContainer int
@s GContainer int
@s StackContainerInterface int
@s FoldedStackContainer int
@s UnfoldedStackContainer int
@s FoldedZContainer int
@s UnfoldedZContainer int
@s FoldedGContainer int
@s UnfoldedGContainer int
@s Permutation int
@s KronProdAll int
@s KronProdAllOptim int
@s FTensorPolynomial int
@s UTensorPolynomial int
@s FNormalMoments int
@s UNormalMoments int
@s TLStatic int
@s FSSparseTensor int
@ The tensor library is multi-threaded. This means, if appropriate
 compilation options were set, some codes are launched
 concurrently. This boosts the performance on SMP machines or single
 processors with hyper-threading support. The basic property of the
 thread implementation in the library is that we do not allow running
 more concurrent threads than the preset limit. This prevents threads
 from competing for memory in such a way that the OS constantly switches
 among threads with frequent I/O for swaps. This may occur since one
 thread might need much own memory. The threading support allows for
 detached threads, the synchronization points during the Faa Di Bruno
 operation are relatively short, so the resulting load is close to the
 preset maximum number parallel threads.
@ A few words to the library's test suite. The suite resides in
 directory {\tt tl/testing}. There is a file {\tt tests.cpp} which
 contains all tests and {\tt main()} function. Also there are files
 {\tt factory.h} and {\tt factory.cpp} implementing random generation
 of various objects. The important property of these random objects is
 that they are the same for all object's invocations. This is very
 important in testing and debugging. Further, one can find files {\tt
 monoms.h} and {\tt monoms.cpp}. See below for their explanation.
 There are a few types of tests:
 \orderedlist
 \li We test for tensor indices. We go through various tensors with
 various symmetries, convert indices from folded to unfolded and
 vice-versa. We test whether their coordinates are as expected.
 \li We test the Faa Di Bruno by comparison of the results of
 |FGSContainer::multAndAdd| against the results of |UGSContainer::multAndAdd|. The two
 implementations are pretty different, so this is a good test.
 \li We use a code in {\tt monoms.h} and {\tt monoms.cpp} to generate a
 random vector function $f(x(y,u))$ along with derivatives of
 $\left[f_x\right]$, $\left[x_{y^ku^l}\right]$, and
 $\left[f_{y^ku^l}\right]$. Then we calculate the resulting derivatives
 $\left[f_{y^ku^l}\right]$ using |multAndAdd| method of |UGSContainer|
 or |FGSContainer| and compare the derivatives provided by {\tt
 monoms}. The functions generated in {\tt monoms} are monomials with
 integer exponents, so the implementation of {\tt monoms} is quite
 easy.
 \li We do a similar thing for sparse tensors. In this case the {\tt monoms}
 generate a function $f(y,v(y,u),w(y,u))$, provide all the derivatives
 and the result $\left[f_{y^ku^l}\right]$. Then we calculate the
 derivatives with |multAndAdd| of |ZContainer| and compare.
 \li We test the polynomial evaluation by evaluating a folded and
 unfolded polynomial in traditional and horner-like fashion. This gives
 four methods in total. The four results are compared.
 \endorderedlist
@*1 Utilities.
@i sthread.hweb
@i sthread.cweb
@i tl_exception.hweb
@i int_sequence.hweb
@i int_sequence.cweb
@i twod_matrix.hweb
@i twod_matrix.cweb
@i kron_prod.hweb
@i kron_prod.cweb
@*1 Combinatorics.
@i symmetry.hweb
@i symmetry.cweb
@i equivalence.hweb
@i equivalence.cweb
@i permutation.hweb
@i permutation.cweb
@*1 Tensors.
@i tensor.hweb
@i tensor.cweb
@i fs_tensor.hweb
@i fs_tensor.cweb
@i gs_tensor.hweb
@i gs_tensor.cweb
@i rfs_tensor.hweb
@i rfs_tensor.cweb
@i ps_tensor.hweb
@i ps_tensor.cweb
@i sparse_tensor.hweb
@i sparse_tensor.cweb
@*1 The Faa Di Bruno formula.
@i t_container.hweb
@i t_container.cweb
@i stack_container.hweb
@i stack_container.cweb
@i fine_container.hweb
@i fine_container.cweb
@i pyramid_prod.hweb
@i pyramid_prod.cweb
@i pyramid_prod2.hweb
@i pyramid_prod2.cweb
@*1 Miscellany.
@i t_polynomial.hweb
@i t_polynomial.cweb
@i normal_moments.hweb
@i normal_moments.cweb
@i tl_static.hweb
@i tl_static.cweb
@*1 Index.
--- a/dynare++/tl/cc/normal_moments.cc
+++ b/dynare++/tl/cc/normal_moments.cc
@ -0,0 +1,103 @@
 // Copyright 2004, Ondra Kamenik
 #include "normal_moments.hh"
 #include "permutation.hh"
 #include "kron_prod.hh"
 #include "tl_static.hh"
 UNormalMoments::UNormalMoments(int maxdim, const TwoDMatrix &v)
  : TensorContainer<URSingleTensor>(1)
 {
  if (maxdim >= 2)
    generateMoments(maxdim, v);
 }
 /* Here we fill up the container with the tensors for $d=2,4,6,\ldots$
   up to the given dimension. Each tensor of moments is equal to
   $F_n\left(\otimes^nv\right).$ This has a dimension equal to
   $2n$. See the header file for proof and details.
   Here we sequentially construct the Kronecker power
   $\otimes^nv$, and apply $F_n$. */
 void
 UNormalMoments::generateMoments(int maxdim, const TwoDMatrix &v)
 {
  TL_RAISE_IF(v.nrows() != v.ncols(),
              "Variance-covariance matrix is not square in UNormalMoments constructor");
  int nv = v.nrows();
  URSingleTensor *mom2 = new URSingleTensor(nv, 2);
  mom2->getData() = v.getData();
  insert(mom2);
  URSingleTensor *kronv = new URSingleTensor(nv, 2);
  kronv->getData() = v.getData();
  for (int d = 4; d <= maxdim; d += 2)
    {
      URSingleTensor *newkronv = new URSingleTensor(nv, d);
      KronProd::kronMult(ConstVector(v.getData()),
                         ConstVector(kronv->getData()),
                         newkronv->getData());
      delete kronv;
      kronv = newkronv;
      URSingleTensor *mom = new URSingleTensor(nv, d);
      // apply $F_n$ to |kronv|
      /* Here we go through all equivalences, select only those having 2
         elements in each class, then go through all elements in |kronv| and
         add to permuted location of |mom|.
         The permutation must be taken as inverse of the permutation implied by
         the equivalence, since we need a permutation which after application
         to identity of indices yileds indices in the equivalence classes. Note
         how the |Equivalence::apply| method works. */
      mom->zeros();
      const EquivalenceSet eset = ebundle.get(d);
      for (EquivalenceSet::const_iterator cit = eset.begin();
           cit != eset.end(); cit++)
        {
          if (selectEquiv(*cit))
            {
              Permutation per(*cit);
              per.inverse();
              for (Tensor::index it = kronv->begin(); it != kronv->end(); ++it)
                {
                  IntSequence ind(kronv->dimen());
                  per.apply(it.getCoor(), ind);
                  Tensor::index it2(mom, ind);
                  mom->get(*it2, 0) += kronv->get(*it, 0);
                }
            }
        }
      insert(mom);
    }
  delete kronv;
 }
 /* We return |true| for an equivalence whose each class has 2 elements. */
 bool
 UNormalMoments::selectEquiv(const Equivalence &e)
 {
  if (2*e.numClasses() != e.getN())
    return false;
  for (Equivalence::const_seqit si = e.begin();
       si != e.end(); ++si)
    {
      if ((*si).length() != 2)
        return false;
    }
  return true;
 }
 /* Here we go through all the unfolded container, fold each tensor and
   insert it. */
 FNormalMoments::FNormalMoments(const UNormalMoments &moms)
  : TensorContainer<FRSingleTensor>(1)
 {
  for (UNormalMoments::const_iterator it = moms.begin();
       it != moms.end(); ++it)
    {
      FRSingleTensor *fm = new FRSingleTensor(*((*it).second));
      insert(fm);
    }
 }
--- a/dynare++/tl/cc/normal_moments.cweb
+++ b/dynare++/tl/cc/normal_moments.cweb
@ -1,115 +0,0 @@
@q $Id: normal_moments.cweb 281 2005-06-13 09:41:16Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt normal\_moments.cpp} file.
@c
 #include "normal_moments.h"
 #include "permutation.h"
 #include "kron_prod.h"
 #include "tl_static.h"
@<|UNormalMoments| constructor code@>;
@<|UNormalMoments::generateMoments| code@>;
@<|UNormalMoments::selectEquiv| code@>;
@<|FNormalMoments| constructor code@>;
@ 
@<|UNormalMoments| constructor code@>=
 UNormalMoments::UNormalMoments(int maxdim, const TwoDMatrix& v)
 	: TensorContainer<URSingleTensor>(1)
 {
 	if (maxdim >= 2)
 	   	generateMoments(maxdim, v);
 }
@ Here we fill up the container with the tensors for $d=2,4,6,\ldots$
 up to the given dimension. Each tensor of moments is equal to
 $F_n\left(\otimes^nv\right).$ This has a dimension equal to
 $2n$. See the header file for proof and details.
 Here we sequentially construct the Kronecker power
 $\otimes^nv$, and apply $F_n$. 
@<|UNormalMoments::generateMoments| code@>=
 void UNormalMoments::generateMoments(int maxdim, const TwoDMatrix& v)
 {
 	TL_RAISE_IF(v.nrows() != v.ncols(),
 				"Variance-covariance matrix is not square in UNormalMoments constructor");
 	int nv = v.nrows();
 	URSingleTensor* mom2 = new URSingleTensor(nv, 2);
 	mom2->getData() = v.getData();
 	insert(mom2);
 	URSingleTensor* kronv = new URSingleTensor(nv, 2);
 	kronv->getData() = v.getData();
 	for (int d = 4; d <= maxdim; d+=2) {
 		URSingleTensor* newkronv = new URSingleTensor(nv, d);
 		KronProd::kronMult(ConstVector(v.getData()),
 						   ConstVector(kronv->getData()),
 						   newkronv->getData());
 		delete kronv;
 		kronv = newkronv;
 		URSingleTensor* mom = new URSingleTensor(nv, d);
 		@<apply $F_n$ to |kronv|@>;
 		insert(mom);
 	}
 	delete kronv;
 }
@ Here we go through all equivalences, select only those having 2
 elements in each class, then go through all elements in |kronv| and
 add to permuted location of |mom|.
 The permutation must be taken as inverse of the permutation implied by
 the equivalence, since we need a permutation which after application
 to identity of indices yileds indices in the equivalence classes. Note
 how the |Equivalence::apply| method works.
@<apply $F_n$ to |kronv|@>=
 	mom->zeros();
 	const EquivalenceSet eset = ebundle.get(d);
 	for (EquivalenceSet::const_iterator cit = eset.begin();
 		 cit != eset.end(); cit++) { 
 		if (selectEquiv(*cit)) {
 			Permutation per(*cit);
 			per.inverse();
 			for (Tensor::index it = kronv->begin(); it != kronv->end(); ++it) {
 				IntSequence ind(kronv->dimen());
 				per.apply(it.getCoor(), ind);
 				Tensor::index it2(mom, ind);
 				mom->get(*it2, 0) += kronv->get(*it, 0);
 			}
 		}
 	}
@ We return |true| for an equivalence whose each class has 2 elements.
@<|UNormalMoments::selectEquiv| code@>=
 bool UNormalMoments::selectEquiv(const Equivalence& e)
 {
 	if (2*e.numClasses() != e.getN())
 		return false;
 	for (Equivalence::const_seqit si = e.begin();
 		 si != e.end(); ++si) {
 		if ((*si).length() != 2)
 			return false;
 	}
 	return true;
 }
@ Here we go through all the unfolded container, fold each tensor and
 insert it.
@<|FNormalMoments| constructor code@>=
 FNormalMoments::FNormalMoments(const UNormalMoments& moms)
 	: TensorContainer<FRSingleTensor>(1)
 {
 	for (UNormalMoments::const_iterator it = moms.begin();
 		 it != moms.end(); ++it) {
 		FRSingleTensor* fm = new FRSingleTensor(*((*it).second));
 		insert(fm);
 	}
 }
@ End of {\tt normal\_moments.cpp} file.
--- a/dynare++/tl/cc/normal_moments.hh
+++ b/dynare++/tl/cc/normal_moments.hh
@ -0,0 +1,129 @@
 // Copyright 2004, Ondra Kamenik
 // Moments of normal distribution.
 /* Here we calculate the higher order moments of normally distributed
   random vector $u$ with means equal to zero and given
   variance--covariance matrix $V$, this is $u\sim N(0,V)$. The moment
   generating function for such distribution is $f(t)=e^{{1\over 2}t^TVt}$. If
   we derivate it wrt $t$ and unfold the higher dimensional tensors
   row-wise, we obtain terms like
   $$\eqalign{
   {\partial\over\partial t}f(t)=&f(t)\cdot Vt\cr
   {\partial^2\over\partial t^2}f(t)=&f(t)\cdot(Vt\otimes Vt+v)\cr
   {\partial^3\over\partial t^3}f(t)=&f(t)\cdot
   (Vt\otimes Vt\otimes Vt+P_?(v\otimes Vt)+P_?(Vt\otimes v)+v\otimes Vt)\cr
   {\partial^4\over\partial t^4}f(t)=&f(t)\cdot
   (Vt\otimes Vt\otimes Vt\otimes Vt+S_?(v\otimes Vt\otimes Vt)+
   S_?(Vt\otimes v\otimes Vt)+S_?(Vt\otimes Vt\otimes v)+S_?(v\otimes v))}
   $$
   where $v$ is vectorized $V$ ($v=\hbox{vec}(V)$), and $P_?$ is a
   suitable row permutation (corresponds to permutation of
   multidimensional indices) which permutes the tensor data, so that the
   index of a variable being derived would be the last. This ensures that
   all (permuted) tensors can be summed yielding a tensor whose indices
   have some order (in here we chose the order that more recent
   derivating variables are to the right). Finally, $S_?$ is a suitable
   sum of various $P_?$.
   We are interested in $S_?$ multiplying the Kronecker powers
   $\otimes^nv$. The $S_?$ is a (possibly) multi-set of permutations of
   even order. Note that we know a number of permutations in $S_?$. The
   above formulas for $F(t)$ derivatives are valid also for monomial
   $u$, and from literature we know that $2n$-th moment is ${(2n!)\over
   n!2^n}\sigma^2$. So there are ${(2n!)\over n!2^n}$ permutations in
   $S_?$.
   In order to find the $S_?$ we need to define a couple of
   things. First we define a sort of equivalence between the permutations
   applicable to even number of indices. We write $P_1\equiv P_2$
   whenever $P_1^{-1}\circ P_2$ permutes only whole pairs, or items
   within pairs, but not indices across the pairs. For instance the
   permutations $(0,1,2,3)$ and $(3,2,0,1)$ are equivalent, but
   $(0,2,1,3)$ is not equivalent with the two. Clearly, the $\equiv$ is
   an equivalence.
   This allows to define a relation $\sqsubseteq$ between the permutation
   multi-sets $S$, which is basically the subset relation $\subseteq$ but
   with respect to the equivalence $\equiv$, more formally:
   $$S_1\sqsubseteq S_2\quad\hbox{iff}\quad P\in S_1
   \Rightarrow\exists Q\in S_2:P\equiv Q$$
   This induces an equivalence $S_1\equiv S_2$.
   Now let $F_n$ denote a set of permutations on $2n$ indices which is
   maximal with respect to $\sqsubseteq$, and minimal with respect to
   $\equiv$. (In other words, it contains everything up to the
   equivalence $\equiv$.) It is straightforward to calculate a number of
   permutations in $F_n$. This is a total number of all permutations of
   $2n$ divided by permutations of pairs divided by permutations within
   the pairs. This is ${(2n!)\over n!2^n}$.
   We prove that $S_?\equiv F_n$. Clearly $S_?\sqsubseteq F_n$, since
   $F_n$ is maximal. In order to prove that $F_n\sqsubseteq S_?$, let us
   assert that for any permutation $P$ and for any (semi)positive
   definite matrix $V$ we have $PS_?\otimes^nv=S_?\otimes^nv$. Below we
   show that there is a positive definite matrix $V$ of some dimension
   that for any two permutation multi-sets $S_1$, $S_2$, we have
   $$S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$$
   So it follows that for any permutation $P$, we have $PS_?\equiv
   S_?$. For a purpose of contradiction let $P\in F_n$ be a permutation
   which is not equivalent to any permutation from $S_?$. Since $S_?$ is
   non-empty, let us pick $P_0\in S_?$. Now assert that
   $P_0^{-1}S_?\not\equiv P^{-1}S_?$ since the first contains an identity
   and the second does not contain a permutation equivalent to
   identity. Thus we have $(P\circ P_0^{-1})S_?\not\equiv S_?$ which
   gives the contradiction and we have proved that $F_n\sqsubseteq
   S_?$. Thus $F_n\equiv S_?$. Moreover, we know that $S_?$ and $F_n$
   have the same number of permutations, hence the minimality of $S_?$
   with respect to $\equiv$.
   Now it suffices to prove that there exists a positive definite $V$
   such that for any two permutation multi-sets $S_1$, and $S_2$ holds
   $S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$. If
   $V$ is $n\times n$ matrix, then $S_1\not\equiv S_2$ implies that there
   is identically nonzero polynomial of elements from $V$ of order $n$
   over integers. If $V=A^TA$ then there is identically non-zero
   polynomial of elements from $A$ of order $2n$. This means, that we
   have to find $n(n+1)/2$ tuple $x$ of real numbers such that all
   identically non-zero polynomials $p$ of order $2n$ over integers yield
   $p(x)\neq 0$.
   The $x$ is constructed as follows: $x_i = \pi^{\log{r_i}}$, where $r_i$
   is $i$-th prime. Let us consider monom $x_1^{j_1}\cdot\ldots\cdot
   x_k^{j_k}$. When the monom is evaluated, we get
   $$\pi^{\log{r_1^{j_1}}+\ldots+\log{r_k^{j_k}}}=
   \pi^{\log{\left(r_1^{j_1}\cdot\ldots\cdot r_k^{j_k}\right)}}$$
   Now it is easy to see that if an integer combination of such terms is
   zero, then the combination must be either trivial or sum to $0$ and
   all monoms must be equal. Both cases imply a polynomial identically
   equal to zero. So, any non-trivial integer polynomial evaluated at $x$
   must be non-zero.
   So, having this result in hand, now it is straightforward to calculate
   higher moments of normal distribution. Here we define a container,
   which does the job. In its constructor, we simply calculate Kronecker
   powers of $v$ and apply $F_n$ to $\otimes^nv$. $F_n$ is, in fact, a
   set of all equivalences in sense of class |Equivalence| over $2n$
   elements, having $n$ classes each of them having exactly 2 elements. */
 #ifndef NORMAL_MOMENTS_H
 #define NORMAL_MOMENTS_H
 #include "t_container.hh"
 class UNormalMoments : public TensorContainer<URSingleTensor>
 {
 public:
  UNormalMoments(int maxdim, const TwoDMatrix &v);
 private:
  void generateMoments(int maxdim, const TwoDMatrix &v);
  static bool selectEquiv(const Equivalence &e);
 };
 class FNormalMoments : public TensorContainer<FRSingleTensor>
 {
 public:
  FNormalMoments(const UNormalMoments &moms);
 };
 #endif
--- a/dynare++/tl/cc/normal_moments.hweb
+++ b/dynare++/tl/cc/normal_moments.hweb
@ -1,139 +0,0 @@
@q $Id: normal_moments.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Moments of normal distribution. Start of {\tt normal\_moments.h} file.
 Here we calculate the higher order moments of normally distributed
 random vector $u$ with means equal to zero and given
 variance--covariance matrix $V$, this is $u\sim N(0,V)$. The moment
 generating function for such distribution is $f(t)=e^{{1\over 2}t^TVt}$. If
 we derivate it wrt $t$ and unfold the higher dimensional tensors
 row-wise, we obtain terms like
 $$\eqalign{
 {\partial\over\partial t}f(t)=&f(t)\cdot Vt\cr
 {\partial^2\over\partial t^2}f(t)=&f(t)\cdot(Vt\otimes Vt+v)\cr
 {\partial^3\over\partial t^3}f(t)=&f(t)\cdot
  (Vt\otimes Vt\otimes Vt+P_?(v\otimes Vt)+P_?(Vt\otimes v)+v\otimes Vt)\cr
 {\partial^4\over\partial t^4}f(t)=&f(t)\cdot
  (Vt\otimes Vt\otimes Vt\otimes Vt+S_?(v\otimes Vt\otimes Vt)+
   S_?(Vt\otimes v\otimes Vt)+S_?(Vt\otimes Vt\otimes v)+S_?(v\otimes v))}
 $$
 where $v$ is vectorized $V$ ($v=\hbox{vec}(V)$), and $P_?$ is a
 suitable row permutation (corresponds to permutation of
 multidimensional indices) which permutes the tensor data, so that the
 index of a variable being derived would be the last. This ensures that
 all (permuted) tensors can be summed yielding a tensor whose indices
 have some order (in here we chose the order that more recent
 derivating variables are to the right). Finally, $S_?$ is a suitable
 sum of various $P_?$.
 We are interested in $S_?$ multiplying the Kronecker powers
 $\otimes^nv$. The $S_?$ is a (possibly) multi-set of permutations of
 even order. Note that we know a number of permutations in $S_?$. The
 above formulas for $F(t)$ derivatives are valid also for monomial
 $u$, and from literature we know that $2n$-th moment is ${(2n!)\over
 n!2^n}\sigma^2$. So there are ${(2n!)\over n!2^n}$ permutations in
 $S_?$.
 In order to find the $S_?$ we need to define a couple of
 things. First we define a sort of equivalence between the permutations
 applicable to even number of indices. We write $P_1\equiv P_2$
 whenever $P_1^{-1}\circ P_2$ permutes only whole pairs, or items
 within pairs, but not indices across the pairs. For instance the
 permutations $(0,1,2,3)$ and $(3,2,0,1)$ are equivalent, but
 $(0,2,1,3)$ is not equivalent with the two. Clearly, the $\equiv$ is
 an equivalence.
 This allows to define a relation $\sqsubseteq$ between the permutation
 multi-sets $S$, which is basically the subset relation $\subseteq$ but
 with respect to the equivalence $\equiv$, more formally:
 $$S_1\sqsubseteq S_2\quad\hbox{iff}\quad P\in S_1
 \Rightarrow\exists Q\in S_2:P\equiv Q$$
 This induces an equivalence $S_1\equiv S_2$.
 Now let $F_n$ denote a set of permutations on $2n$ indices which is
 maximal with respect to $\sqsubseteq$, and minimal with respect to
 $\equiv$. (In other words, it contains everything up to the
 equivalence $\equiv$.) It is straightforward to calculate a number of
 permutations in $F_n$. This is a total number of all permutations of
 $2n$ divided by permutations of pairs divided by permutations within
 the pairs. This is ${(2n!)\over n!2^n}$.
 We prove that $S_?\equiv F_n$. Clearly $S_?\sqsubseteq F_n$, since
 $F_n$ is maximal. In order to prove that $F_n\sqsubseteq S_?$, let us
 assert that for any permutation $P$ and for any (semi)positive
 definite matrix $V$ we have $PS_?\otimes^nv=S_?\otimes^nv$. Below we
 show that there is a positive definite matrix $V$ of some dimension
 that for any two permutation multi-sets $S_1$, $S_2$, we have
 $$S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$$
 So it follows that for any permutation $P$, we have $PS_?\equiv
 S_?$. For a purpose of contradiction let $P\in F_n$ be a permutation
 which is not equivalent to any permutation from $S_?$. Since $S_?$ is
 non-empty, let us pick $P_0\in S_?$. Now assert that
 $P_0^{-1}S_?\not\equiv P^{-1}S_?$ since the first contains an identity
 and the second does not contain a permutation equivalent to
 identity. Thus we have $(P\circ P_0^{-1})S_?\not\equiv S_?$ which
 gives the contradiction and we have proved that $F_n\sqsubseteq
 S_?$. Thus $F_n\equiv S_?$. Moreover, we know that $S_?$ and $F_n$
 have the same number of permutations, hence the minimality of $S_?$
 with respect to $\equiv$.
 Now it suffices to prove that there exists a positive definite $V$
 such that for any two permutation multi-sets $S_1$, and $S_2$ holds
 $S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$. If
 $V$ is $n\times n$ matrix, then $S_1\not\equiv S_2$ implies that there
 is identically nonzero polynomial of elements from $V$ of order $n$
 over integers. If $V=A^TA$ then there is identically non-zero
 polynomial of elements from $A$ of order $2n$. This means, that we
 have to find $n(n+1)/2$ tuple $x$ of real numbers such that all
 identically non-zero polynomials $p$ of order $2n$ over integers yield
 $p(x)\neq 0$.
 The $x$ is constructed as follows: $x_i = \pi^{\log{r_i}}$, where $r_i$
 is $i$-th prime. Let us consider monom $x_1^{j_1}\cdot\ldots\cdot
 x_k^{j_k}$. When the monom is evaluated, we get
 $$\pi^{\log{r_1^{j_1}}+\ldots+\log{r_k^{j_k}}}=
 \pi^{\log{\left(r_1^{j_1}\cdot\ldots\cdot r_k^{j_k}\right)}}$$
 Now it is easy to see that if an integer combination of such terms is
 zero, then the combination must be either trivial or sum to $0$ and
 all monoms must be equal. Both cases imply a polynomial identically
 equal to zero. So, any non-trivial integer polynomial evaluated at $x$
 must be non-zero.
 So, having this result in hand, now it is straightforward to calculate
 higher moments of normal distribution. Here we define a container,
 which does the job. In its constructor, we simply calculate Kronecker
 powers of $v$ and apply $F_n$ to $\otimes^nv$. $F_n$ is, in fact, a
 set of all equivalences in sense of class |Equivalence| over $2n$
 elements, having $n$ classes each of them having exactly 2 elements.
@c
 #ifndef NORMAL_MOMENTS_H
 #define NORMAL_MOMENTS_H
 #include "t_container.h"
@<|UNormalMoments| class declaration@>;
@<|FNormalMoments| class declaration@>;
 #endif
@ 
@<|UNormalMoments| class declaration@>=
 class UNormalMoments : public TensorContainer<URSingleTensor> {
 public:@;
 	UNormalMoments(int maxdim, const TwoDMatrix& v);
 private:@;
 	void generateMoments(int maxdim, const TwoDMatrix& v);
 	static bool selectEquiv( const Equivalence& e);
 };
@ 
@<|FNormalMoments| class declaration@>=
 class FNormalMoments : public TensorContainer<FRSingleTensor> {
 public:@;
 	FNormalMoments(const UNormalMoments& moms);
 };
@ End of {\tt normal\_moments.h} file.
--- a/dynare++/tl/cc/permutation.cc
+++ b/dynare++/tl/cc/permutation.cc
@ -0,0 +1,163 @@
 // Copyright 2004, Ondra Kamenik
 #include "permutation.hh"
 #include "tl_exception.hh"
 /* This is easy, we simply apply the map in the fashion $s\circ m$.. */
 void
 Permutation::apply(const IntSequence &src, IntSequence &tar) const
 {
  TL_RAISE_IF(src.size() != permap.size() || tar.size() != permap.size(),
              "Wrong sizes of input or output in Permutation::apply");
  for (int i = 0; i < permap.size(); i++)
    tar[i] = src[permap[i]];
 }
 void
 Permutation::apply(IntSequence &tar) const
 {
  IntSequence tmp(tar);
  apply(tmp, tar);
 }
 void
 Permutation::inverse()
 {
  IntSequence former(permap);
  for (int i = 0; i < size(); i++)
    permap[former[i]] = i;
 }
 /* Here we find a number of trailing indices which are identical with
   the permutation. */
 int
 Permutation::tailIdentity() const
 {
  int i = permap.size();
  while (i > 0 && permap[i-1] == i-1)
    i--;
  return permap.size() - i;
 }
 /* This calculates a map which corresponds to sorting in the following
   sense: $(\hbox{sorted }s)\circ m = s$, where $s$ is a given sequence.
   We go through |s| and find an the same item in sorted |s|. We
   construct the |permap| from the found pair of indices. We have to be
   careful, to not assign to two positions in |s| the same position in
   sorted |s|, so we maintain a bitmap |flag|, in which we remember
   indices from the sorted |s| already assigned. */
 void
 Permutation::computeSortingMap(const IntSequence &s)
 {
  IntSequence srt(s);
  srt.sort();
  IntSequence flags(s.size(), 0);
  for (int i = 0; i < s.size(); i++)
    {
      int j = 0;
      while (j < s.size() && (flags[j] || srt[j] != s[i]))
        j++;
      TL_RAISE_IF(j == s.size(),
                  "Internal algorithm error in Permutation::computeSortingMap");
      flags[j] = 1;
      permap[i] = j;
    }
 }
 PermutationSet::PermutationSet()
  : order(1), size(1), pers(new const Permutation *[size])
 {
  pers[0] = new Permutation(1);
 }
 PermutationSet::PermutationSet(const PermutationSet &sp, int n)
  : order(n), size(n*sp.size),
    pers(new const Permutation *[size])
 {
  for (int i = 0; i < size; i++)
    pers[i] = NULL;
  TL_RAISE_IF(n != sp.order+1,
              "Wrong new order in PermutationSet constructor");
  int k = 0;
  for (int i = 0; i < sp.size; i++)
    {
      for (int j = 0; j < order; j++, k++)
        {
          pers[k] = new Permutation(*(sp.pers[i]), j);
        }
    }
 }
 PermutationSet::~PermutationSet()
 {
  for (int i = 0; i < size; i++)
    if (pers[i])
      delete pers[i];
  delete [] pers;
 }
 vector<const Permutation *>
 PermutationSet::getPreserving(const IntSequence &s) const
 {
  TL_RAISE_IF(s.size() != order,
              "Wrong sequence length in PermutationSet::getPreserving");
  vector<const Permutation *> res;
  IntSequence tmp(s.size());
  for (int i = 0; i < size; i++)
    {
      pers[i]->apply(s, tmp);
      if (s == tmp)
        {
          res.push_back(pers[i]);
        }
    }
  return res;
 }
 PermutationBundle::PermutationBundle(int nmax)
 {
  nmax = max(nmax, 1);
  generateUpTo(nmax);
 }
 PermutationBundle::~PermutationBundle()
 {
  for (unsigned int i = 0; i < bundle.size(); i++)
    delete bundle[i];
 }
 const PermutationSet &
 PermutationBundle::get(int n) const
 {
  if (n > (int) (bundle.size()) || n < 1)
    {
      TL_RAISE("Permutation set not found in PermutationSet::get");
      return *(bundle[0]);
    }
  else
    {
      return *(bundle[n-1]);
    }
 }
 void
 PermutationBundle::generateUpTo(int nmax)
 {
  if (bundle.size() == 0)
    bundle.push_back(new PermutationSet());
  int curmax = bundle.size();
  for (int n = curmax+1; n <= nmax; n++)
    {
      bundle.push_back(new PermutationSet(*(bundle.back()), n));
    }
 }
--- a/dynare++/tl/cc/permutation.cweb
+++ b/dynare++/tl/cc/permutation.cweb
@ -1,188 +0,0 @@
@q $Id: permutation.cweb 332 2005-07-15 13:41:48Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt permutation.cweb} file.
@c
 #include "permutation.h"
 #include "tl_exception.h"
@<|Permutation::apply| code@>;
@<|Permutation::inverse| code@>;
@<|Permutation::tailIdentity| code@>;
@<|Permutation::computeSortingMap| code@>;
@<|PermutationSet| constructor code 1@>;
@<|PermutationSet| constructor code 2@>;
@<|PermutationSet| destructor code@>;
@<|PermutationSet::getPreserving| code@>;
@<|PermutationBundle| constructor code@>;
@<|PermutationBundle| destructor code@>;
@<|PermutationBundle::get| code@>;
@<|PermutationBundle::generateUpTo| code@>;
@ This is easy, we simply apply the map in the fashion $s\circ m$..
@<|Permutation::apply| code@>=
 void Permutation::apply(const IntSequence& src, IntSequence& tar) const
 {
 	TL_RAISE_IF(src.size() != permap.size() || tar.size() != permap.size(),
 				"Wrong sizes of input or output in Permutation::apply");
 	for (int i = 0; i < permap.size(); i++)
 		tar[i] = src[permap[i]];
 }
 void Permutation::apply(IntSequence& tar) const
 {
 	IntSequence tmp(tar);
 	apply(tmp, tar);
 }
@ 
@<|Permutation::inverse| code@>=
 void Permutation::inverse()
 {
 	IntSequence former(permap);
 	for (int i = 0; i < size(); i++)
 		permap[former[i]] = i;
 }
@ Here we find a number of trailing indices which are identical with
 the permutation.
@<|Permutation::tailIdentity| code@>=
 int Permutation::tailIdentity() const
 {
 	int i = permap.size();
 	while (i > 0 && permap[i-1] == i-1)
 		i--;
 	return permap.size() - i;
 }
@ This calculates a map which corresponds to sorting in the following
 sense: $(\hbox{sorted }s)\circ m = s$, where $s$ is a given sequence.
 We go through |s| and find an the same item in sorted |s|. We
 construct the |permap| from the found pair of indices. We have to be
 careful, to not assign to two positions in |s| the same position in
 sorted |s|, so we maintain a bitmap |flag|, in which we remember
 indices from the sorted |s| already assigned.
@<|Permutation::computeSortingMap| code@>=
 void Permutation::computeSortingMap(const IntSequence& s)
 {
 	IntSequence srt(s);
 	srt.sort();
 	IntSequence flags(s.size(),0);
 	for (int i = 0; i < s.size(); i++) {
 		int j = 0;
 		while (j < s.size() && (flags[j] || srt[j] != s[i]))
 			j++;
 		TL_RAISE_IF(j == s.size(),
 					"Internal algorithm error in Permutation::computeSortingMap");
 		flags[j] = 1;
 		permap[i] = j;
 	}
 }
@ 
@<|PermutationSet| constructor code 1@>=
 PermutationSet::PermutationSet()
 	: order(1), size(1), pers(new const Permutation*[size])
 {
 	pers[0] = new Permutation(1);
 }
@ 
@<|PermutationSet| constructor code 2@>=
 PermutationSet::PermutationSet(const PermutationSet& sp, int n)
 	: order(n), size(n*sp.size),
 	  pers(new const Permutation*[size])
 {
 	for (int i = 0; i < size; i++)
 		pers[i] = NULL;
 	TL_RAISE_IF(n != sp.order+1,
 				"Wrong new order in PermutationSet constructor");
 	int k = 0;
 	for (int i = 0; i < sp.size; i++) {
 		for (int j = 0;	 j < order; j++,k++) {
 			pers[k] = new Permutation(*(sp.pers[i]), j);
 		}
 	}
 }
@ 
@<|PermutationSet| destructor code@>=
 PermutationSet::~PermutationSet()
 {
 	for (int i = 0; i < size; i++)
 		if (pers[i])
 			delete pers[i];
 	delete [] pers;
 }
@ 
@<|PermutationSet::getPreserving| code@>=
 vector<const Permutation*> PermutationSet::getPreserving(const IntSequence& s) const
 {
 	TL_RAISE_IF(s.size() != order,
 				"Wrong sequence length in PermutationSet::getPreserving");
 	vector<const Permutation*> res;
 	IntSequence tmp(s.size());
 	for (int i = 0; i < size; i++) {
 		pers[i]->apply(s, tmp);
 		if (s == tmp) {
 			res.push_back(pers[i]);
 		}
 	}
 	return res;
 }
@ 
@<|PermutationBundle| constructor code@>=
 PermutationBundle::PermutationBundle(int nmax)
 {
 	nmax = max(nmax, 1);
 	generateUpTo(nmax);
 }
@ 
@<|PermutationBundle| destructor code@>=
 PermutationBundle::~PermutationBundle()
 {
 	for (unsigned int i = 0; i < bundle.size(); i++)
 		delete bundle[i];
 }
@ 
@<|PermutationBundle::get| code@>=
 const PermutationSet& PermutationBundle::get(int n) const
 {
 	if (n > (int)(bundle.size()) || n < 1) {
 		TL_RAISE("Permutation set not found in PermutationSet::get");
 		return *(bundle[0]);
 	} else {
 		return *(bundle[n-1]);
 	}
 }
@ 
@<|PermutationBundle::generateUpTo| code@>=
 void PermutationBundle::generateUpTo(int nmax)
 {
 	if (bundle.size() == 0)
 		bundle.push_back(new PermutationSet());
 	int curmax = bundle.size();
 	for (int n = curmax+1; n <= nmax; n++) {
 		bundle.push_back(new PermutationSet(*(bundle.back()), n));
 	}
 }
@ End of {\tt permutation.cweb} file.
--- a/dynare++/tl/cc/permutation.hh
+++ b/dynare++/tl/cc/permutation.hh
@ -0,0 +1,177 @@
 // Copyright 2004, Ondra Kamenik
 // Permutations.
 /* The permutation class is useful when describing a permutation of
   indices in permuted symmetry tensor. This tensor comes to existence,
   for instance, as a result of the following tensor multiplication:
   $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
   \left[g_u\right]^{\gamma_3}_{\beta_2}
   $$
   If this operation is done by a Kronecker product of unfolded tensors,
   the resulting tensor has permuted indices. So, in this case the
   permutation is implied by the equivalence:
   $\{\{0,4\},\{1,3\},\{2\}\}$. This results in a permutation which maps
   indices $(0,1,2,3,4)\mapsto(0,2,4,3,1)$.
   The other application of |Permutation| class is to permute indices
   with the same permutation as done during sorting.
   Here we only define an abstraction for the permutation defined by an
   equivalence. Its basic operation is to apply the permutation to the
   integer sequence. The application is right (or inner), in sense that
   it works on indices of the sequence not items of the sequence. More
   formally $s\circ m \not=m\circ s$. In here, the application of the
   permutation defined by map $m$ is $s\circ m$.
   Also, we need |PermutationSet| class which contains all permutations
   of $n$ element set, and a bundle of permutations |PermutationBundle|
   which contains all permutation sets up to a given number. */
 #ifndef PERMUTATION_H
 #define PERMUTATION_H
 #include "int_sequence.hh"
 #include "equivalence.hh"
 #include <vector>
 /* The permutation object will have a map, which defines mapping of
   indices $(0,1,\ldots,n-1)\mapsto(m_0,m_1,\ldots, m_{n-1})$. The map is
   the sequence $(m_0,m_1,\ldots, m_{n-1}$. When the permutation with the
   map $m$ is applied on sequence $s$, it permutes its indices:
   $s\circ\hbox{id}\mapsto s\circ m$.
   So we have one constructor from equivalence, then a method |apply|,
   and finally a method |tailIdentity| which returns a number of trailing
   indices which yield identity. Also we have a constructor calculating
   map, which corresponds to permutation in sort. This is, we want
   $(\hbox{sorted }s)\circ m = s$. */
 class Permutation
 {
 protected:
  IntSequence permap;
 public:
  Permutation(int len)
    : permap(len)
  {
    for (int i = 0; i < len; i++)
      permap[i] = i;
  }
  Permutation(const Equivalence &e)
    : permap(e.getN())
  {
    e.trace(permap);
  }
  Permutation(const Equivalence &e, const Permutation &per)
    : permap(e.getN())
  {
    e.trace(permap, per);
  }
  Permutation(const IntSequence &s)
    : permap(s.size())
  {
    computeSortingMap(s);
  };
  Permutation(const Permutation &p)
    : permap(p.permap)
  {
  }
  Permutation(const Permutation &p1, const Permutation &p2)
    : permap(p2.permap)
  {
    p1.apply(permap);
  }
  Permutation(const Permutation &p, int i)
    : permap(p.size(), p.permap, i)
  {
  }
  const Permutation &
  operator=(const Permutation &p)
  {
    permap = p.permap; return *this;
  }
  bool
  operator==(const Permutation &p)
  {
    return permap == p.permap;
  }
  int
  size() const
  {
    return permap.size();
  }
  void
  print() const
  {
    permap.print();
  }
  void apply(const IntSequence &src, IntSequence &tar) const;
  void apply(IntSequence &tar) const;
  void inverse();
  int tailIdentity() const;
  const IntSequence &
  getMap() const
  {
    return permap;
  }
  IntSequence &
  getMap()
  {
    return permap;
  }
 protected:
  void computeSortingMap(const IntSequence &s);
 };
 /* The |PermutationSet| maintains an array of of all permutations. The
   default constructor constructs one element permutation set of one
   element sets. The second constructor constructs a new permutation set
   over $n$ from all permutations over $n-1$. The parameter $n$ need not
   to be provided, but it serves to distinguish the constructor from copy
   constructor, which is not provided.
   The method |getPreserving| returns a factor subgroup of permutations,
   which are invariants with respect to the given sequence. This are all
   permutations $p$ yielding $p\circ s = s$, where $s$ is the given
   sequence. */
 class PermutationSet
 {
  int order;
  int size;
  const Permutation **const pers;
 public:
  PermutationSet();
  PermutationSet(const PermutationSet &ps, int n);
  ~PermutationSet();
  int
  getNum() const
  {
    return size;
  }
  const Permutation &
  get(int i) const
  {
    return *(pers[i]);
  }
  vector<const Permutation *> getPreserving(const IntSequence &s) const;
 };
 /* The permutation bundle encapsulates all permutations sets up to some
   given dimension. */
 class PermutationBundle
 {
  vector<PermutationSet *> bundle;
 public:
  PermutationBundle(int nmax);
  ~PermutationBundle();
  const PermutationSet&get(int n) const;
  void generateUpTo(int nmax);
 };
 #endif
--- a/dynare++/tl/cc/permutation.hweb
+++ b/dynare++/tl/cc/permutation.hweb
@ -1,147 +0,0 @@
@q $Id: permutation.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Permutations. Start of {\tt permutation.h} file.
 The permutation class is useful when describing a permutation of
 indices in permuted symmetry tensor. This tensor comes to existence,
 for instance, as a result of the following tensor multiplication:
 $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
 \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
 \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
 \left[g_u\right]^{\gamma_3}_{\beta_2}
 $$
 If this operation is done by a Kronecker product of unfolded tensors,
 the resulting tensor has permuted indices. So, in this case the
 permutation is implied by the equivalence:
 $\{\{0,4\},\{1,3\},\{2\}\}$. This results in a permutation which maps
 indices $(0,1,2,3,4)\mapsto(0,2,4,3,1)$.
 The other application of |Permutation| class is to permute indices
 with the same permutation as done during sorting.
 Here we only define an abstraction for the permutation defined by an
 equivalence. Its basic operation is to apply the permutation to the
 integer sequence. The application is right (or inner), in sense that
 it works on indices of the sequence not items of the sequence. More
 formally $s\circ m \not=m\circ s$. In here, the application of the
 permutation defined by map $m$ is $s\circ m$.
 Also, we need |PermutationSet| class which contains all permutations
 of $n$ element set, and a bundle of permutations |PermutationBundle|
 which contains all permutation sets up to a given number.
@s Permutation int
@s PermutationSet int
@s PermutationBundle int
@c
 #ifndef PERMUTATION_H
 #define PERMUTATION_H
 #include "int_sequence.h"
 #include "equivalence.h"
 #include <vector>
@<|Permutation| class declaration@>;
@<|PermutationSet| class declaration@>;
@<|PermutationBundle| class declaration@>;
 #endif
@ The permutation object will have a map, which defines mapping of
 indices $(0,1,\ldots,n-1)\mapsto(m_0,m_1,\ldots, m_{n-1})$. The map is
 the sequence $(m_0,m_1,\ldots, m_{n-1}$. When the permutation with the
 map $m$ is applied on sequence $s$, it permutes its indices:
 $s\circ\hbox{id}\mapsto s\circ m$.
 So we have one constructor from equivalence, then a method |apply|,
 and finally a method |tailIdentity| which returns a number of trailing
 indices which yield identity. Also we have a constructor calculating
 map, which corresponds to permutation in sort. This is, we want
 $(\hbox{sorted }s)\circ m = s$.
@<|Permutation| class declaration@>=
 class Permutation {
 protected:@;
 	IntSequence permap;
 public:@;
 	Permutation(int len)
 		: permap(len) {@+ for (int i = 0; i < len; i++) permap[i] = i;@+}
 	Permutation(const Equivalence& e)
 		: permap(e.getN()) {@+ e.trace(permap);@+}
 	Permutation(const Equivalence& e, const Permutation& per)
 		: permap(e.getN()) {@+ e.trace(permap, per);@+}
 	Permutation(const IntSequence& s)
 		: permap(s.size()) {@+ computeSortingMap(s);@+};
 	Permutation(const Permutation& p)
 		: permap(p.permap)@+ {}
 	Permutation(const Permutation& p1, const Permutation& p2)
 		: permap(p2.permap) {@+ p1.apply(permap);@+}
 	Permutation(const Permutation& p, int i)
 		: permap(p.size(), p.permap, i)@+ {}
 	const Permutation& operator=(const Permutation& p)
 		{@+ permap = p.permap;@+ return *this;@+}
 	bool operator==(const Permutation& p)
 		{@+ return permap == p.permap;@+}
 	int size() const
 		{@+ return permap.size();@+}
 	void print() const
 		{@+ permap.print();@+}
 	void apply(const IntSequence& src, IntSequence& tar) const;
 	void apply(IntSequence& tar) const;
 	void inverse();
 	int tailIdentity() const;
 	const IntSequence& getMap() const
 		{@+ return permap;@+}
 	IntSequence& getMap()
 		{@+ return permap;@+}
 protected:@;
 	void computeSortingMap(const IntSequence& s);
 };
@ The |PermutationSet| maintains an array of of all permutations. The
 default constructor constructs one element permutation set of one
 element sets. The second constructor constructs a new permutation set
 over $n$ from all permutations over $n-1$. The parameter $n$ need not
 to be provided, but it serves to distinguish the constructor from copy
 constructor, which is not provided.
 The method |getPreserving| returns a factor subgroup of permutations,
 which are invariants with respect to the given sequence. This are all
 permutations $p$ yielding $p\circ s = s$, where $s$ is the given
 sequence.
@<|PermutationSet| class declaration@>=
 class PermutationSet {
 	int order;
 	int size;
 	const Permutation** const pers;
 public:@;
 	PermutationSet();
 	PermutationSet(const PermutationSet& ps, int n);
 	~PermutationSet();
 	int getNum() const
 		{@+ return size;@+}
 	const Permutation& get(int i) const
 		{@+ return *(pers[i]);@+}
 	vector<const Permutation*> getPreserving(const IntSequence& s) const;
 };
@ The permutation bundle encapsulates all permutations sets up to some
 given dimension.
@<|PermutationBundle| class declaration@>=
 class PermutationBundle {
 	vector<PermutationSet*> bundle;
 public:@;
 	PermutationBundle(int nmax);
 	~PermutationBundle(); 
 	const PermutationSet& get(int n) const;
 	void generateUpTo(int nmax);
 };
@ End of {\tt permutation.h} file.
--- a/dynare++/tl/cc/ps_tensor.cc
+++ b/dynare++/tl/cc/ps_tensor.cc
@ -0,0 +1,399 @@
 // Copyright 2004, Ondra Kamenik
 #include "ps_tensor.hh"
 #include "fs_tensor.hh"
 #include "tl_exception.hh"
 #include "tl_static.hh"
 #include "stack_container.hh"
 /* Here we decide, what method for filling a slice in slicing
   constructor to use. A few experiments suggest, that if the tensor is
   more than 8\% filled, the first method (|fillFromSparseOne|) is
   better. For fill factors less than 1\%, the second can be 3 times
   quicker. */
 UPSTensor::fill_method
 UPSTensor::decideFillMethod(const FSSparseTensor &t)
 {
  if (t.getFillFactor() > 0.08)
    return first;
  else
    return second;
 }
 /* Here we make a slice. We decide what fill method to use and set it. */
 UPSTensor::UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
                     const IntSequence &coor, const PerTensorDimens &ptd)
  : UTensor(along_col, ptd.getNVX(),
            t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
    tdims(ptd)
 {
  TL_RAISE_IF(coor.size() != t.dimen(),
              "Wrong coordinates length of stacks for UPSTensor slicing constructor");
  TL_RAISE_IF(ss.sum() != t.nvar(),
              "Wrong length of stacks for UPSTensor slicing constructor");
  if (first == decideFillMethod(t))
    fillFromSparseOne(t, ss, coor);
  else
    fillFromSparseTwo(t, ss, coor);
 }
 void
 UPSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UPSTensor::increment");
  UTensor::increment(v, tdims.getNVX());
 }
 void
 UPSTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in UPSTensor::decrement");
  UTensor::decrement(v, tdims.getNVX());
 }
 FTensor &
 UPSTensor::fold() const
 {
  TL_RAISE("Never should come to this place in UPSTensor::fold");
  FFSTensor *nothing = new FFSTensor(0, 0, 0);
  return *nothing;
 }
 int
 UPSTensor::getOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in UPSTensor::getOffset");
  return UTensor::getOffset(v, tdims.getNVX());
 }
 void
 UPSTensor::addTo(FGSTensor &out) const
 {
  TL_RAISE_IF(out.getDims() != tdims,
              "Tensors have incompatible dimens in UPSTensor::addTo");
  for (index in = out.begin(); in != out.end(); ++in)
    {
      IntSequence vtmp(dimen());
      tdims.getPer().apply(in.getCoor(), vtmp);
      index tin(this, vtmp);
      out.addColumn(*this, *tin, *in);
    }
 }
 /* In here, we have to add this permuted symmetry unfolded tensor to an
   unfolded not permuted tensor. One easy way would be to go through the
   target tensor, permute each index, and add the column.
   However, it may happen, that the permutation has some non-empty
   identity tail. In this case, we can add not only individual columns,
   but much bigger data chunks, which is usually more
   efficient. Therefore, the code is quite dirty, because we have not an
   iterator, which iterates over tensor at some higher levels. So we
   simulate it by the following code.
   First we set |cols| to the length of the data chunk and |off| to its
   dimension. Then we need a front part of |nvmax| of |out|, which is
   |nvmax_part|. Our iterator here is an integer sequence |outrun| with
   full length, and |outrun_part| its front part. The |outrun| is
   initialized to zeros. In each step we need to increment |outrun|
   |cols|-times, this is done by incrementing its prefix |outrun_part|.
   So we loop over all |cols|wide partitions of |out|, permute |outrun|
   to obtain |perrun| to obtain column of this matrix. (note that the
   trailing part of |perrun| is the same as of |outrun|. Then we
   construct submatrices, add them, and increment |outrun|. */
 void
 UPSTensor::addTo(UGSTensor &out) const
 {
  TL_RAISE_IF(out.getDims() != tdims,
              "Tensors have incompatible dimens in UPSTensor::addTo");
  int cols = tailIdentitySize();
  int off = tdims.tailIdentity();
  IntSequence outrun(out.dimen(), 0);
  IntSequence outrun_part(outrun, 0, out.dimen()-off);
  IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
  for (int out_col = 0; out_col < out.ncols(); out_col += cols)
    {
      // permute |outrun|
      IntSequence perrun(out.dimen());
      tdims.getPer().apply(outrun, perrun);
      index from(this, perrun);
      // construct submatrices
      ConstTwoDMatrix subfrom(*this, *from, cols);
      TwoDMatrix subout(out, out_col, cols);
      // add
      subout.add(1, subfrom);
      // increment |outrun| by cols
      UTensor::increment(outrun_part, nvmax_part);
    }
 }
 /* This returns a product of all items in |nvmax| which make up the
   trailing identity part. */
 int
 UPSTensor::tailIdentitySize() const
 {
  return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
 }
 /* This fill method is pretty dumb. We go through all columns in |this|
   tensor, translate coordinates to sparse tensor, sort them and find an
   item in the sparse tensor. There are many not successful lookups for
   really sparse tensor, that is why the second method works better for
   really sparse tensors. */
 void
 UPSTensor::fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
                             const IntSequence &coor)
 {
  IntSequence cumtmp(ss.size());
  cumtmp[0] = 0;
  for (int i = 1; i < ss.size(); i++)
    cumtmp[i] = cumtmp[i-1] + ss[i-1];
  IntSequence cum(coor.size());
  for (int i = 0; i < coor.size(); i++)
    cum[i] = cumtmp[coor[i]];
  zeros();
  for (Tensor::index run = begin(); run != end(); ++run)
    {
      IntSequence c(run.getCoor());
      c.add(1, cum);
      c.sort();
      FSSparseTensor::const_iterator sl = t.getMap().lower_bound(c);
      if (sl != t.getMap().end())
        {
          FSSparseTensor::const_iterator su = t.getMap().upper_bound(c);
          for (FSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
            get((*srun).second.first, *run) = (*srun).second.second;
        }
    }
 }
 /* This is the second way of filling the slice. For instance, let the
   slice correspond to partitions $abac$. In here we first calculate
   lower and upper bounds for index of the sparse tensor for the
   slice. These are |lb_srt| and |ub_srt| respectively. They corresponds
   to ordering $aabc$. Then we go through that interval, and select items
   which are really between the bounds.  Then we take the index, subtract
   the lower bound to get it to coordinates of the slice. We get
   something like $(i_a,j_a,k_b,l_c)$. Then we apply the inverse
   permutation as of the sorting form $abac\mapsto aabc$ to get index
   $(i_a,k_b,j_a,l_c)$. Recall that the slice is unfolded, so we have to
   apply all permutations preserving the stack coordinates $abac$. In our
   case we get list of indices $(i_a,k_b,j_a,l_c)$ and
   $(j_a,k_b,i_a,l_c)$. For all we copy the item of the sparse tensor to
   the appropriate column. */
 void
 UPSTensor::fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
                             const IntSequence &coor)
 {
  IntSequence coor_srt(coor);
  coor_srt.sort();
  IntSequence cum(ss.size());
  cum[0] = 0;
  for (int i = 1; i < ss.size(); i++)
    cum[i] = cum[i-1] + ss[i-1];
  IntSequence lb_srt(coor.size());
  IntSequence ub_srt(coor.size());
  for (int i = 0; i < coor.size(); i++)
    {
      lb_srt[i] = cum[coor_srt[i]];
      ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
    }
  const PermutationSet &pset = tls.pbundle->get(coor.size());
  vector<const Permutation *> pp = pset.getPreserving(coor);
  Permutation unsort(coor);
  zeros();
  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb_srt);
  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub_srt);
  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
    {
      if (lb_srt.lessEq((*run).first) && (*run).first.lessEq(ub_srt))
        {
          IntSequence c((*run).first);
          c.add(-1, lb_srt);
          unsort.apply(c);
          for (unsigned int i = 0; i < pp.size(); i++)
            {
              IntSequence cp(coor.size());
              pp[i]->apply(c, cp);
              Tensor::index ind(this, cp);
              TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
                          "Internal error in slicing constructor of UPSTensor");
              get((*run).second.first, *ind) = (*run).second.second;
            }
        }
    }
 }
 /* Here we calculate the maximum offsets in each folded dimension
   (dimension sizes, hence |ds|). */
 void
 PerTensorDimens2::setDimensionSizes()
 {
  const IntSequence &nvs = getNVS();
  for (int i = 0; i < numSyms(); i++)
    {
      TensorDimens td(syms[i], nvs);
      ds[i] = td.calcFoldMaxOffset();
    }
 }
 /* If there are two folded dimensions, the offset in such a dimension
   is offset of the second plus offset of the first times the maximum
   offset of the second. If there are $n+1$ dimensions, the offset is a
   sum of offsets of the last dimension plus the offset in the first $n$
   dimensions multiplied by the maximum offset of the last
   dimension. This is exactly what the following code does. */
 int
 PerTensorDimens2::calcOffset(const IntSequence &coor) const
 {
  TL_RAISE_IF(coor.size() != dimen(),
              "Wrong length of coordinates in PerTensorDimens2::calcOffset");
  IntSequence cc(coor);
  int ret = 0;
  int off = 0;
  for (int i = 0; i < numSyms(); i++)
    {
      TensorDimens td(syms[i], getNVS());
      IntSequence c(cc, off, off+syms[i].dimen());
      int a = td.calcFoldOffset(c);
      ret = ret*ds[i] + a;
      off += syms[i].dimen();
    }
  return ret;
 }
 void
 PerTensorDimens2::print() const
 {
  printf("nvmax: "); nvmax.print();
  printf("per:   "); per.print();
  printf("syms:  "); syms.print();
  printf("dims:  "); ds.print();
 }
 /* Here we increment the given integer sequence. It corresponds to
   |UTensor::increment| of the whole sequence, and then partial
   monotonizing of the subsequences with respect to the
   symmetries of each dimension. */
 void
 FPSTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong length of coordinates in FPSTensor::increment");
  UTensor::increment(v, tdims.getNVX());
  int off = 0;
  for (int i = 0; i < tdims.numSyms(); i++)
    {
      IntSequence c(v, off, off+tdims.getSym(i).dimen());
      c.pmonotone(tdims.getSym(i));
      off += tdims.getSym(i).dimen();
    }
 }
 void
 FPSTensor::decrement(IntSequence &v) const
 {
  TL_RAISE("FPSTensor::decrement not implemented");
 }
 UTensor &
 FPSTensor::unfold() const
 {
  TL_RAISE("Unfolding of FPSTensor not implemented");
  UFSTensor *nothing = new UFSTensor(0, 0, 0);
  return *nothing;
 }
 /* We only call |calcOffset| of the |PerTensorDimens2|. */
 int
 FPSTensor::getOffset(const IntSequence &v) const
 {
  return tdims.calcOffset(v);
 }
 /* Here we add the tensor to |out|. We go through all columns of the
   |out|, apply the permutation to get index in the tensor, and add the
   column. Note that if the permutation is identity, then the dimensions
   of the tensors might not be the same (since this tensor is partially
   folded). */
 void
 FPSTensor::addTo(FGSTensor &out) const
 {
  for (index tar = out.begin(); tar != out.end(); ++tar)
    {
      IntSequence coor(dimen());
      tdims.getPer().apply(tar.getCoor(), coor);
      index src(this, coor);
      out.addColumn(*this, *src, *tar);
    }
 }
 /* Here is the constructor which multiplies the Kronecker product with
   the general symmetry sparse tensor |GSSparseTensor|. The main idea is
   to go through items in the sparse tensor (each item selects rows in
   the matrices form the Kornecker product), then to Kronecker multiply
   the rows and multiply with the item, and to add the resulting row to
   the appropriate row of the resulting |FPSTensor|.
   The realization of this idea is a bit more complicated since we have
   to go through all items, and each item must be added as many times as
   it has its symmetric elements. Moreover, the permutations shuffle
   order of rows in their Kronecker product.
   So, we through all unfolded indices in a tensor with the same
   dimensions as the |GSSparseTensor| (sparse slice). For each such index
   we calculate its folded version (corresponds to ordering of
   subsequences within symmetries), we test if there is an item in the
   sparse slice with such coordinates, and if there is, we construct the
   Kronecker product of the rows, and go through all of items with the
   coordinates, and add to appropriate rows of |this| tensor. */
 FPSTensor::FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
                     const GSSparseTensor &a, const KronProdAll &kp)
  : FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
            a.nrows(), kp.ncols(), td.dimen()),
    tdims(td, e, p)
 {
  zeros();
  UGSTensor dummy(0, a.getDims());
  for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run)
    {
      Tensor::index fold_ind = dummy.getFirstIndexOf(run);
      const IntSequence &c = fold_ind.getCoor();
      GSSparseTensor::const_iterator sl = a.getMap().lower_bound(c);
      if (sl != a.getMap().end())
        {
          Vector *row_prod = kp.multRows(run.getCoor());
          GSSparseTensor::const_iterator su = a.getMap().upper_bound(c);
          for (GSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
            {
              Vector out_row((*srun).second.first, *this);
              out_row.add((*srun).second.second, *row_prod);
            }
          delete row_prod;
        }
    }
 }
--- a/dynare++/tl/cc/ps_tensor.cweb
+++ b/dynare++/tl/cc/ps_tensor.cweb
@ -1,422 +0,0 @@
@q $Id: ps_tensor.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt ps\_tensor.cpp} file.
@c
 #include "ps_tensor.h"
 #include "fs_tensor.h"
 #include "tl_exception.h"
 #include "tl_static.h"
 #include "stack_container.h"
@<|UPSTensor::decideFillMethod| code@>;
@<|UPSTensor| slicing constructor code@>;
@<|UPSTensor| increment and decrement@>;
@<|UPSTensor::fold| code@>;
@<|UPSTensor::getOffset| code@>;
@<|UPSTensor::addTo| folded code@>;
@<|UPSTensor::addTo| unfolded code@>;
@<|UPSTensor::tailIdentitySize| code@>;
@<|UPSTensor::fillFromSparseOne| code@>;
@<|UPSTensor::fillFromSparseTwo| code@>;
@<|PerTensorDimens2::setDimensionSizes| code@>;
@<|PerTensorDimens2::calcOffset| code@>;
@<|PerTensorDimens2::print| code@>;
@<|FPSTensor::increment| code@>;
@<|FPSTensor::decrement| code@>;
@<|FPSTensor::unfold| code@>;
@<|FPSTensor::getOffset| code@>;
@<|FPSTensor::addTo| code@>;
@<|FPSTensor| sparse constructor@>;
@ Here we decide, what method for filling a slice in slicing
 constructor to use. A few experiments suggest, that if the tensor is
 more than 8\% filled, the first method (|fillFromSparseOne|) is
 better. For fill factors less than 1\%, the second can be 3 times
 quicker.
@<|UPSTensor::decideFillMethod| code@>=
 UPSTensor::fill_method UPSTensor::decideFillMethod(const FSSparseTensor& t)
 {
 	if (t.getFillFactor() > 0.08)
 		return first;
 	else
 		return second;
 }
@ Here we make a slice. We decide what fill method to use and set it.
@<|UPSTensor| slicing constructor code@>=
 UPSTensor::UPSTensor(const FSSparseTensor& t, const IntSequence& ss,
 					 const IntSequence& coor, const PerTensorDimens& ptd)
 	: UTensor(along_col, ptd.getNVX(),
 			  t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
 	  tdims(ptd)
 {
 	TL_RAISE_IF(coor.size() != t.dimen(),
 				"Wrong coordinates length of stacks for UPSTensor slicing constructor");
 	TL_RAISE_IF(ss.sum() != t.nvar(),
 				"Wrong length of stacks for UPSTensor slicing constructor");
 	if (first == decideFillMethod(t))
 		fillFromSparseOne(t, ss, coor);
 	else
 		fillFromSparseTwo(t, ss, coor);
 }
@ 
@<|UPSTensor| increment and decrement@>=
 void UPSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UPSTensor::increment");
 	UTensor::increment(v, tdims.getNVX());
 }
 void UPSTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in UPSTensor::decrement");
 	UTensor::decrement(v, tdims.getNVX());
 }
@ 
@<|UPSTensor::fold| code@>=
 FTensor& UPSTensor::fold() const
 {
 	TL_RAISE("Never should come to this place in UPSTensor::fold");
 	FFSTensor* nothing = new FFSTensor(0,0,0);
 	return *nothing;
 }
@ 
@<|UPSTensor::getOffset| code@>=
 int UPSTensor::getOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in UPSTensor::getOffset");
 	return UTensor::getOffset(v, tdims.getNVX());
 }
@ 
@<|UPSTensor::addTo| folded code@>=
 void UPSTensor::addTo(FGSTensor& out) const
 {
 	TL_RAISE_IF(out.getDims() != tdims,
 				"Tensors have incompatible dimens in UPSTensor::addTo");
 	for (index in = out.begin(); in != out.end(); ++in) {
 		IntSequence vtmp(dimen());
 		tdims.getPer().apply(in.getCoor(), vtmp);
 		index tin(this, vtmp);
 		out.addColumn(*this, *tin, *in);
 	}
 }
@ In here, we have to add this permuted symmetry unfolded tensor to an
 unfolded not permuted tensor. One easy way would be to go through the
 target tensor, permute each index, and add the column.
 However, it may happen, that the permutation has some non-empty
 identity tail. In this case, we can add not only individual columns,
 but much bigger data chunks, which is usually more
 efficient. Therefore, the code is quite dirty, because we have not an
 iterator, which iterates over tensor at some higher levels. So we
 simulate it by the following code.
 First we set |cols| to the length of the data chunk and |off| to its
 dimension. Then we need a front part of |nvmax| of |out|, which is
 |nvmax_part|. Our iterator here is an integer sequence |outrun| with
 full length, and |outrun_part| its front part. The |outrun| is
 initialized to zeros. In each step we need to increment |outrun|
 |cols|-times, this is done by incrementing its prefix |outrun_part|.
 So we loop over all |cols|wide partitions of |out|, permute |outrun|
 to obtain |perrun| to obtain column of this matrix. (note that the
 trailing part of |perrun| is the same as of |outrun|. Then we
 construct submatrices, add them, and increment |outrun|.
@<|UPSTensor::addTo| unfolded code@>=
 void UPSTensor::addTo(UGSTensor& out) const
 {
 	TL_RAISE_IF(out.getDims() != tdims,
 				"Tensors have incompatible dimens in UPSTensor::addTo");
 	int cols = tailIdentitySize();
 	int off = tdims.tailIdentity();
 	IntSequence outrun(out.dimen(), 0);
 	IntSequence outrun_part(outrun, 0, out.dimen()-off);
 	IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
 	for (int out_col = 0; out_col < out.ncols(); out_col+=cols) {
 		// permute |outrun|
 		IntSequence perrun(out.dimen());
 		tdims.getPer().apply(outrun, perrun);
 		index from(this, perrun);
 		// construct submatrices
 		ConstTwoDMatrix subfrom(*this, *from, cols);
 		TwoDMatrix subout(out, out_col, cols);
 		// add
 		subout.add(1, subfrom);
 		// increment |outrun| by cols
 		UTensor::increment(outrun_part, nvmax_part);
 	}
 }
@ This returns a product of all items in |nvmax| which make up the
 trailing identity part.
@<|UPSTensor::tailIdentitySize| code@>=
 int UPSTensor::tailIdentitySize() const
 {
 	return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
 }
@ This fill method is pretty dumb. We go through all columns in |this|
 tensor, translate coordinates to sparse tensor, sort them and find an
 item in the sparse tensor. There are many not successful lookups for
 really sparse tensor, that is why the second method works better for
 really sparse tensors.
@<|UPSTensor::fillFromSparseOne| code@>=
 void UPSTensor::fillFromSparseOne(const FSSparseTensor& t, const IntSequence& ss,
 								  const IntSequence& coor)
 {
 	IntSequence cumtmp(ss.size());
 	cumtmp[0] = 0;
 	for (int i = 1; i < ss.size(); i++)
 		cumtmp[i] = cumtmp[i-1] + ss[i-1];
 	IntSequence cum(coor.size());
 	for (int i = 0; i < coor.size(); i++)
 		cum[i] = cumtmp[coor[i]];
 	zeros();
 	for (Tensor::index run = begin(); run != end(); ++run) {
 		IntSequence c(run.getCoor());
 		c.add(1, cum);
 		c.sort();
 		FSSparseTensor::const_iterator sl = t.getMap().lower_bound(c);
 		if (sl != t.getMap().end()) {
 			FSSparseTensor::const_iterator su = t.getMap().upper_bound(c);
 			for (FSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
 				get((*srun).second.first, *run) = (*srun).second.second;
 		}
 	}
 }
@ This is the second way of filling the slice. For instance, let the
 slice correspond to partitions $abac$. In here we first calculate
 lower and upper bounds for index of the sparse tensor for the
 slice. These are |lb_srt| and |ub_srt| respectively. They corresponds
 to ordering $aabc$. Then we go through that interval, and select items
 which are really between the bounds.  Then we take the index, subtract
 the lower bound to get it to coordinates of the slice. We get
 something like $(i_a,j_a,k_b,l_c)$. Then we apply the inverse
 permutation as of the sorting form $abac\mapsto aabc$ to get index
 $(i_a,k_b,j_a,l_c)$. Recall that the slice is unfolded, so we have to
 apply all permutations preserving the stack coordinates $abac$. In our
 case we get list of indices $(i_a,k_b,j_a,l_c)$ and
 $(j_a,k_b,i_a,l_c)$. For all we copy the item of the sparse tensor to
 the appropriate column.
@<|UPSTensor::fillFromSparseTwo| code@>=
 void UPSTensor::fillFromSparseTwo(const FSSparseTensor& t, const IntSequence& ss,
 								  const IntSequence& coor)
 {
 	IntSequence coor_srt(coor);
 	coor_srt.sort();
 	IntSequence cum(ss.size());
 	cum[0] = 0;
 	for (int i = 1; i < ss.size(); i++)
 		cum[i] = cum[i-1] + ss[i-1];
 	IntSequence lb_srt(coor.size());
 	IntSequence ub_srt(coor.size());
 	for (int i = 0; i < coor.size(); i++) {
 		lb_srt[i] = cum[coor_srt[i]];
 		ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
 	}
 	const PermutationSet& pset = tls.pbundle->get(coor.size());
 	vector<const Permutation*> pp = pset.getPreserving(coor);
 	Permutation unsort(coor);
 	zeros();
 	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb_srt);
 	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub_srt);
 	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
 		if (lb_srt.lessEq((*run).first) && (*run).first.lessEq(ub_srt)) {
 			IntSequence c((*run).first);
 			c.add(-1, lb_srt);
 			unsort.apply(c);
 			for (unsigned int i = 0; i < pp.size(); i++) {
 				IntSequence cp(coor.size());
 				pp[i]->apply(c, cp);
 				Tensor::index ind(this, cp);
 				TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
 							"Internal error in slicing constructor of UPSTensor");
 				get((*run).second.first, *ind) = (*run).second.second;
 			}
 		}
 	}
 }
@ Here we calculate the maximum offsets in each folded dimension
 (dimension sizes, hence |ds|).
@<|PerTensorDimens2::setDimensionSizes| code@>=
 void PerTensorDimens2::setDimensionSizes()
 {
 	const IntSequence& nvs = getNVS();
 	for (int i = 0; i < numSyms(); i++) {
 		TensorDimens td(syms[i], nvs);
 		ds[i] = td.calcFoldMaxOffset();
 	}
 }
@ If there are two folded dimensions, the offset in such a dimension
 is offset of the second plus offset of the first times the maximum
 offset of the second. If there are $n+1$ dimensions, the offset is a
 sum of offsets of the last dimension plus the offset in the first $n$
 dimensions multiplied by the maximum offset of the last
 dimension. This is exactly what the following code does.
@<|PerTensorDimens2::calcOffset| code@>=
 int PerTensorDimens2::calcOffset(const IntSequence& coor) const
 {
 	TL_RAISE_IF(coor.size() != dimen(),
 				"Wrong length of coordinates in PerTensorDimens2::calcOffset");
 	IntSequence cc(coor);
 	int ret = 0;
 	int off = 0;
 	for (int i = 0; i < numSyms(); i++) {
 		TensorDimens td(syms[i], getNVS());
 		IntSequence c(cc, off, off+syms[i].dimen());
 		int a = td.calcFoldOffset(c);
 		ret = ret*ds[i] + a;
 		off += syms[i].dimen();
 	}
 	return ret;
 }
@ 
@<|PerTensorDimens2::print| code@>=
 void PerTensorDimens2::print() const
 {
 	printf("nvmax: "); nvmax.print();
 	printf("per:   "); per.print();
 	printf("syms:  "); syms.print();
 	printf("dims:  "); ds.print();
 }
@ Here we increment the given integer sequence. It corresponds to
 |UTensor::increment| of the whole sequence, and then partial
 monotonizing of the subsequences with respect to the
 symmetries of each dimension.
@<|FPSTensor::increment| code@>=
 void FPSTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong length of coordinates in FPSTensor::increment");
 	UTensor::increment(v, tdims.getNVX());
 	int off = 0;
 	for (int i = 0; i < tdims.numSyms(); i++) {
 		IntSequence c(v, off, off+tdims.getSym(i).dimen());
 		c.pmonotone(tdims.getSym(i));
 		off += tdims.getSym(i).dimen();
 	}
 }
@ 
@<|FPSTensor::decrement| code@>=
 void FPSTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE("FPSTensor::decrement not implemented");
 }
@ 
@<|FPSTensor::unfold| code@>=
 UTensor& FPSTensor::unfold() const
 {
 	TL_RAISE("Unfolding of FPSTensor not implemented");
 	UFSTensor* nothing = new UFSTensor(0,0,0);
 	return *nothing;
 }
@ We only call |calcOffset| of the |PerTensorDimens2|.
@<|FPSTensor::getOffset| code@>=
 int FPSTensor::getOffset(const IntSequence& v) const
 {
 	return tdims.calcOffset(v);
 }
@ Here we add the tensor to |out|. We go through all columns of the
 |out|, apply the permutation to get index in the tensor, and add the
 column. Note that if the permutation is identity, then the dimensions
 of the tensors might not be the same (since this tensor is partially
 folded).
@<|FPSTensor::addTo| code@>=
 void FPSTensor::addTo(FGSTensor& out) const
 {
 	for (index tar = out.begin(); tar != out.end(); ++tar) {
 		IntSequence coor(dimen());
 		tdims.getPer().apply(tar.getCoor(), coor);
 		index src(this, coor);
 		out.addColumn(*this, *src, *tar);
 	}
 }
@ Here is the constructor which multiplies the Kronecker product with
 the general symmetry sparse tensor |GSSparseTensor|. The main idea is
 to go through items in the sparse tensor (each item selects rows in
 the matrices form the Kornecker product), then to Kronecker multiply
 the rows and multiply with the item, and to add the resulting row to
 the appropriate row of the resulting |FPSTensor|.
 The realization of this idea is a bit more complicated since we have
 to go through all items, and each item must be added as many times as
 it has its symmetric elements. Moreover, the permutations shuffle
 order of rows in their Kronecker product.
 So, we through all unfolded indices in a tensor with the same
 dimensions as the |GSSparseTensor| (sparse slice). For each such index
 we calculate its folded version (corresponds to ordering of
 subsequences within symmetries), we test if there is an item in the
 sparse slice with such coordinates, and if there is, we construct the
 Kronecker product of the rows, and go through all of items with the
 coordinates, and add to appropriate rows of |this| tensor.
@<|FPSTensor| sparse constructor@>=
 FPSTensor::FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 					 const GSSparseTensor& a, const KronProdAll& kp)
 	: FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
 			  a.nrows(), kp.ncols(), td.dimen()),
 	  tdims(td, e, p)
 {
 	zeros();
 	UGSTensor dummy(0, a.getDims());
 	for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run) {
 		Tensor::index fold_ind = dummy.getFirstIndexOf(run);
 		const IntSequence& c = fold_ind.getCoor();
 		GSSparseTensor::const_iterator sl = a.getMap().lower_bound(c);
 		if (sl != a.getMap().end()) {
 			Vector* row_prod = kp.multRows(run.getCoor());
 			GSSparseTensor::const_iterator su = a.getMap().upper_bound(c);
 			for (GSSparseTensor::const_iterator srun = sl; srun != su; ++srun) {
 				Vector out_row((*srun).second.first, *this);
 				out_row.add((*srun).second.second, *row_prod);
 			}
 			delete row_prod;
 		}
 	}
 }
@ End of {\tt ps\_tensor.cpp} file.
--- a/dynare++/tl/cc/ps_tensor.hh
+++ b/dynare++/tl/cc/ps_tensor.hh
@ -0,0 +1,384 @@
 // Copyright 2004, Ondra Kamenik
 // Even more general symmetry tensor.
 /* Here we define an abstraction for a tensor, which has a general
   symmetry, but the symmetry is not of what is modelled by
   |Symmetry|. This kind of tensor comes to existence when we evaluate
   something like:
   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
   \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
   \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
   $$
   If the tensors are unfolded, we obtain a tensor
   $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)$$
   Obviously, this tensor can have a symmetry not compatible with
   ordering $\alpha_1\alpha_2\beta_1\beta_2\beta_3$, (in other words, not
   compatible with symmetry $y^2u^3$). In fact, the indices are permuted.
   This kind of tensor must be added to $\left[B_{y^2u^3}\right]$. Its
   dimensions are the same as of $\left[B_{y^2u^3}\right]$, but some
   coordinates are permuted. The addition is the only action we need to
   do with the tensor.
   Another application where this permuted symmetry tensor appears is a
   slice of a fully symmetric tensor. If the symmetric dimension of the
   tensor is partitioned to continuous parts, and we are interested only
   in data with a given symmetry (permuted) of the partitions, then we
   have the permuted symmetry tensor. For instance, if $x$ is partitioned
   $x=[a,b,c,d]$, and having tensor $\left[f_{x^3}\right]$, one can d a
   slice (subtensor) $\left[f_{aca}\right]$. The data of this tensor are
   permuted of $\left[f_{a^c}\right]$.
   Here we also define the folded version of permuted symmetry tensor. It
   has permuted symmetry and is partially folded. One can imagine it as a
   product of a few dimensions, each of them is folded and having a few
   variables. The underlying variables are permuted. The product of such
   dimensions is described by |PerTensorDimens2|. The tensor holding the
   underlying data is |FPSTensor|. */
 #ifndef PS_TENSOR_H
 #define PS_TENSOR_H
 #include "tensor.hh"
 #include "gs_tensor.hh"
 #include "equivalence.hh"
 #include "permutation.hh"
 #include "kron_prod.hh"
 #include "sparse_tensor.hh"
 /* This is just a helper class for ordering a sequence on call stack. */
 class SortIntSequence : public IntSequence
 {
 public:
  SortIntSequence(const IntSequence &s)
    : IntSequence(s)
  {
    sort();
  }
 };
 /* Here we declare a class describing dimensions of permuted symmetry
   tensor. It inherits from |TensorDimens| and adds a permutation which
   permutes |nvmax|. It has two constructors, each corresponds to a
   context where the tensor appears.
   The first constructor calculates the permutation from a given equivalence.
   The second constructor corresponds to dimensions of a slice. Let us
   take $\left[f_{aca}\right]$ as an example. First it calculates
   |TensorDimens| of $\left[f_{a^c}\right]$, then it calculates a
   permutation corresponding to ordering of $aca$ to $a^2c$, and applies
   this permutation on the dimensions as the first constructor. The
   constructor takes only stack sizes (lengths of $a$, $b$, $c$, and
   $d$), and coordinates of picked partitions.
   Note that inherited methods |calcUnfoldColumns| and |calcFoldColumns|
   work, since number of columns is independent on the permutation, and
   |calcFoldColumns| does not use changed |nvmax|, it uses |nvs|, so it
   is OK. */
 class PerTensorDimens : public TensorDimens
 {
 protected:
  Permutation per;
 public:
  PerTensorDimens(const Symmetry &s, const IntSequence &nvars,
                  const Equivalence &e)
    : TensorDimens(s, nvars), per(e)
  {
    per.apply(nvmax);
  }
  PerTensorDimens(const TensorDimens &td, const Equivalence &e)
    : TensorDimens(td), per(e)
  {
    per.apply(nvmax);
  }
  PerTensorDimens(const TensorDimens &td, const Permutation &p)
    : TensorDimens(td), per(p)
  {
    per.apply(nvmax);
  }
  PerTensorDimens(const IntSequence &ss, const IntSequence &coor)
    : TensorDimens(ss, SortIntSequence(coor)), per(coor)
  {
    per.apply(nvmax);
  }
  PerTensorDimens(const PerTensorDimens &td)
    : TensorDimens(td), per(td.per)
  {
  }
  const PerTensorDimens &
  operator=(const PerTensorDimens &td)
  {
    TensorDimens::operator=(td); per = td.per; return *this;
  }
  bool
  operator==(const PerTensorDimens &td)
  {
    return TensorDimens::operator==(td) && per == td.per;
  }
  int
  tailIdentity() const
  {
    return per.tailIdentity();
  }
  const Permutation &
  getPer() const
  {
    return per;
  }
 };
 /* Here we declare the permuted symmetry unfolded tensor. It has
   |PerTensorDimens| as a member. It inherits from |UTensor| which
   requires to implement |fold| method. There is no folded counterpart,
   so in our implementation we raise unconditional exception, and return
   some dummy object (just to make it compilable without warnings).
   The class has two sorts of constructors corresponding to a context where it
   appears. The first constructs object from a given matrix, and
   Kronecker product. Within the constructor, all the calculations are
   performed. Also we need to define dimensions, these are the same of
   the resulting matrix (in our example $\left[B_{y^2u^3}\right]$) but
   permuted. The permutation is done in |PerTensorDimens| constructor.
   The second type of constructor is slicing. It makes a slice from
   |FSSparseTensor|. The slice is given by stack sizes, and coordinates of
   picked stacks.
   There are two algorithms for filling a slice of a sparse tensor. The
   first |fillFromSparseOne| works well for more dense tensors, the
   second |fillFromSparseTwo| is better for very sparse tensors. We
   provide a static method, which decides what of the two algorithms is
   better. */
 class UPSTensor : public UTensor
 {
  const PerTensorDimens tdims;
 public:
  // |UPSTensor| constructors from Kronecker product
  /* Here we have four constructors making an |UPSTensor| from a product
     of matrix and Kronecker product. The first constructs the tensor from
     equivalence classes of the given equivalence in an order given by the
     equivalence. The second does the same but with optimized
     |KronProdAllOptim|, which has a different order of matrices than given
     by the classes in the equivalence. This permutation is projected to
     the permutation of the |UPSTensor|. The third, is the same as the
     first, but the classes of the equivalence are permuted by the given
     permutation. Finally, the fourth is the most general combination. It
     allows for a permutation of equivalence classes, and for optimized
     |KronProdAllOptim|, which permutes the permuted equivalence classes. */
  UPSTensor(const TensorDimens &td, const Equivalence &e,
            const ConstTwoDMatrix &a, const KronProdAll &kp)
    : UTensor(along_col, PerTensorDimens(td, e).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
  {
    kp.mult(a, *this);
  }
  UPSTensor(const TensorDimens &td, const Equivalence &e,
            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
    : UTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, kp.getPer()))
  {
    kp.mult(a, *this);
  }
  UPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
            const ConstTwoDMatrix &a, const KronProdAll &kp)
    : UTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, p))
  {
    kp.mult(a, *this);
  }
  UPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
    : UTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, Permutation(p, kp.getPer())))
  {
    kp.mult(a, *this);
  }
  UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
            const IntSequence &coor, const PerTensorDimens &ptd);
  UPSTensor(const UPSTensor &ut)
    : UTensor(ut), tdims(ut.tdims)
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  FTensor&fold() const;
  int getOffset(const IntSequence &v) const;
  void addTo(FGSTensor &out) const;
  void addTo(UGSTensor &out) const;
  enum fill_method {first, second};
  static fill_method decideFillMethod(const FSSparseTensor &t);
 private:
  int tailIdentitySize() const;
  void fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
                         const IntSequence &coor);
  void fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
                         const IntSequence &coor);
 };
 /* Here we define an abstraction for the tensor dimension with the
   symmetry like $xuv\vert uv\vert xu\vert y\vert y\vert x\vert x\vert
   y$. These symmetries come as induces symmetries of equivalence and
   some outer symmetry. Thus the underlying variables are permuted. One
   can imagine the dimensions as an unfolded product of dimensions which
   consist of folded products of variables.
   We inherit from |PerTensorDimens| since we need the permutation
   implied by the equivalence. The new member are the induced symmetries
   (symmetries of each folded dimensions) and |ds| which are sizes of the
   dimensions. The number of folded dimensions is return by |numSyms|.
   The object is constructed from outer tensor dimensions and from
   equivalence with optionally permuted classes. */
 class PerTensorDimens2 : public PerTensorDimens
 {
  InducedSymmetries syms;
  IntSequence ds;
 public:
  PerTensorDimens2(const TensorDimens &td, const Equivalence &e,
                   const Permutation &p)
    : PerTensorDimens(td, Permutation(e, p)),
      syms(e, p, td.getSym()),
      ds(syms.size())
  {
    setDimensionSizes();
  }
  PerTensorDimens2(const TensorDimens &td, const Equivalence &e)
    : PerTensorDimens(td, e),
      syms(e, td.getSym()),
      ds(syms.size())
  {
    setDimensionSizes();
  }
  int
  numSyms() const
  {
    return (int) syms.size();
  }
  const Symmetry &
  getSym(int i) const
  {
    return syms[i];
  }
  int
  calcMaxOffset() const
  {
    return ds.mult();
  }
  int calcOffset(const IntSequence &coor) const;
  void print() const;
 protected:
  void setDimensionSizes();
 };
 /* Here we define an abstraction of the permuted symmetry folded
   tensor. It is needed in context of the Faa Di Bruno formula for folded
   stack container multiplied with container of dense folded tensors, or
   multiplied by one full symmetry sparse tensor.
   For example, if we perform the Faa Di Bruno for $F=f(z)$, where
   $z=[g(x,y,u,v), h(x,y,u), x, y]^T$, we get for one concrete
   equivalence:
   $$
   \left[F_{x^4y^3u^3v^2}\right]=\ldots+
   \left[f_{g^2h^2x^2y}\right]\left(
   [g]_{xv}\otimes[g]_{u^2v}\otimes
   [h]_{xu}\otimes[h]_{y^2}\otimes
   \left[\vphantom{\sum}[I]_x\otimes[I]_x\right]\otimes
   \left[\vphantom{\sum}[I]_y\right]
   \right)
   +\ldots
   $$
   The class |FPSTensor| represents the tensor at the right. Its
   dimension corresponds to a product of 7 dimensions with the following
   symmetries: $xv\vert u^v\vert xu\vert y^2\vert x\vert x\vert y$. Such
   the dimension is described by |PerTensorDimens2|.
   The tensor is constructed in a context of stack container
   multiplication, so, it is constructed from dimensions |td| (dimensions
   of the output tensor), stack product |sp| (implied symmetries picking
   tensors from a stack container, here it is $z$), then a sorted integer
   sequence of the picked stacks of the stack product (it is always
   sorted, here it is $(0,0,1,1,2,2,3)$), then the tensor
   $\left[f_{g^2h^2x^2y}\right]$ (its symmetry must be the same as
   symmetry given by the |istacks|), and finally from the equivalence
   with permuted classes.
   We implement |increment| and |getOffset| methods, |decrement| and
   |unfold| raise an exception. Also, we implement |addTo| method, which
   adds the tensor data (partially unfolded) to folded general symmetry
   tensor. */
 template<typename _Ttype>
 class StackProduct;
 class FPSTensor : public FTensor
 {
  const PerTensorDimens2 tdims;
 public:
  /* As for |UPSTensor|, we provide four constructors allowing for
     combinations of permuting equivalence classes, and optimization of
     |KronProdAllOptim|. These constructors multiply with dense general
     symmetry tensor (coming from the dense container, or as a dense slice
     of the full symmetry sparse tensor). In addition to these 4
     constructors, we have one constructor multiplying with general
     symmetry sparse tensor (coming as a sparse slice of the full symmetry
     sparse tensor). */
  FPSTensor(const TensorDimens &td, const Equivalence &e,
            const ConstTwoDMatrix &a, const KronProdAll &kp)
    : FTensor(along_col, PerTensorDimens(td, e).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
  {
    kp.mult(a, *this);
  }
  FPSTensor(const TensorDimens &td, const Equivalence &e,
            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
    : FTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, kp.getPer())
  {
    kp.mult(a, *this);
  }
  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
            const ConstTwoDMatrix &a, const KronProdAll &kp)
    : FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, p)
  {
    kp.mult(a, *this);
  }
  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
    : FTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, Permutation(p, kp.getPer()))
  {
    kp.mult(a, *this);
  }
  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
            const GSSparseTensor &t, const KronProdAll &kp);
  FPSTensor(const FPSTensor &ft)
    : FTensor(ft), tdims(ft.tdims)
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  UTensor&unfold() const;
  int getOffset(const IntSequence &v) const;
  void addTo(FGSTensor &out) const;
 };
 #endif
--- a/dynare++/tl/cc/ps_tensor.hweb
+++ b/dynare++/tl/cc/ps_tensor.hweb
@ -1,351 +0,0 @@
@q $Id: ps_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Even more general symmetry tensor. Start of {\tt ps\_tensor.h} file.
 Here we define an abstraction for a tensor, which has a general
 symmetry, but the symmetry is not of what is modelled by
 |Symmetry|. This kind of tensor comes to existence when we evaluate
 something like:
 $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
 \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
 \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
 \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
 \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
 $$ 
 If the tensors are unfolded, we obtain a tensor
 $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)$$
 Obviously, this tensor can have a symmetry not compatible with
 ordering $\alpha_1\alpha_2\beta_1\beta_2\beta_3$, (in other words, not
 compatible with symmetry $y^2u^3$). In fact, the indices are permuted.
 This kind of tensor must be added to $\left[B_{y^2u^3}\right]$. Its
 dimensions are the same as of $\left[B_{y^2u^3}\right]$, but some
 coordinates are permuted. The addition is the only action we need to
 do with the tensor.
 Another application where this permuted symmetry tensor appears is a
 slice of a fully symmetric tensor. If the symmetric dimension of the
 tensor is partitioned to continuous parts, and we are interested only
 in data with a given symmetry (permuted) of the partitions, then we
 have the permuted symmetry tensor. For instance, if $x$ is partitioned
 $x=[a,b,c,d]$, and having tensor $\left[f_{x^3}\right]$, one can d a
 slice (subtensor) $\left[f_{aca}\right]$. The data of this tensor are
 permuted of $\left[f_{a^c}\right]$.
 Here we also define the folded version of permuted symmetry tensor. It
 has permuted symmetry and is partially folded. One can imagine it as a
 product of a few dimensions, each of them is folded and having a few
 variables. The underlying variables are permuted. The product of such
 dimensions is described by |PerTensorDimens2|. The tensor holding the
 underlying data is |FPSTensor|.
@s SortIntSequence int
@s PerTensorDimens int
@s UPSTensor int
@s PerTensorDimens2 int
@s FPSTensor int
@s KronProdFoldStacks int
@c
 #ifndef PS_TENSOR_H
 #define PS_TENSOR_H
 #include "tensor.h"
 #include "gs_tensor.h"
 #include "equivalence.h"
 #include "permutation.h"
 #include "kron_prod.h"
 #include "sparse_tensor.h"
@<|SortIntSequence| class declaration@>;
@<|PerTensorDimens| class declaration@>;
@<|UPSTensor| class declaration@>;
@<|PerTensorDimens2| class declaration@>;
@<|FPSTensor| class declaration@>;
 #endif
@ This is just a helper class for ordering a sequence on call stack.
@<|SortIntSequence| class declaration@>=
 class SortIntSequence : public IntSequence {
 public:@;
 	SortIntSequence(const IntSequence& s)
 		: IntSequence(s) {@+ sort();@+}
 };
@ Here we declare a class describing dimensions of permuted symmetry
 tensor. It inherits from |TensorDimens| and adds a permutation which
 permutes |nvmax|. It has two constructors, each corresponds to a
 context where the tensor appears.
 The first constructor calculates the permutation from a given equivalence.
 The second constructor corresponds to dimensions of a slice. Let us
 take $\left[f_{aca}\right]$ as an example. First it calculates
 |TensorDimens| of $\left[f_{a^c}\right]$, then it calculates a
 permutation corresponding to ordering of $aca$ to $a^2c$, and applies
 this permutation on the dimensions as the first constructor. The
 constructor takes only stack sizes (lengths of $a$, $b$, $c$, and
 $d$), and coordinates of picked partitions.
 Note that inherited methods |calcUnfoldColumns| and |calcFoldColumns|
 work, since number of columns is independent on the permutation, and
 |calcFoldColumns| does not use changed |nvmax|, it uses |nvs|, so it
 is OK.
@<|PerTensorDimens| class declaration@>=
 class PerTensorDimens : public TensorDimens {
 protected:@;
 	Permutation per;
 public:@;
 	PerTensorDimens(const Symmetry& s, const IntSequence& nvars,
 					const Equivalence& e)
 		: TensorDimens(s, nvars), per(e)
 		{@+ per.apply(nvmax);@+}
 	PerTensorDimens(const TensorDimens& td, const Equivalence& e)
 		: TensorDimens(td), per(e)
 		{@+ per.apply(nvmax);@+}
 	PerTensorDimens(const TensorDimens& td, const Permutation& p)
 		: TensorDimens(td), per(p)
 		{@+ per.apply(nvmax);@+}
 	PerTensorDimens(const IntSequence& ss, const IntSequence& coor)
 		: TensorDimens(ss, SortIntSequence(coor)), per(coor)
 		{@+ per.apply(nvmax);@+}
 	PerTensorDimens(const PerTensorDimens& td)
 		: TensorDimens(td), per(td.per)@+ {}
 	const PerTensorDimens& operator=(const PerTensorDimens& td)
 		{@+ TensorDimens::operator=(td);@+ per = td.per;@+ return *this;@+}
 	bool operator==(const PerTensorDimens& td)
 		{@+ return TensorDimens::operator==(td) && per == td.per;@+}
 	int tailIdentity() const
 		{@+ return per.tailIdentity();@+}
 	const Permutation& getPer() const
 		{@+ return per;@+}
 };
@ Here we declare the permuted symmetry unfolded tensor. It has
 |PerTensorDimens| as a member. It inherits from |UTensor| which
 requires to implement |fold| method. There is no folded counterpart,
 so in our implementation we raise unconditional exception, and return
 some dummy object (just to make it compilable without warnings).
 The class has two sorts of constructors corresponding to a context where it
 appears. The first constructs object from a given matrix, and
 Kronecker product. Within the constructor, all the calculations are
 performed. Also we need to define dimensions, these are the same of
 the resulting matrix (in our example $\left[B_{y^2u^3}\right]$) but
 permuted. The permutation is done in |PerTensorDimens| constructor.
 The second type of constructor is slicing. It makes a slice from
 |FSSparseTensor|. The slice is given by stack sizes, and coordinates of
 picked stacks.
 There are two algorithms for filling a slice of a sparse tensor. The
 first |fillFromSparseOne| works well for more dense tensors, the
 second |fillFromSparseTwo| is better for very sparse tensors. We
 provide a static method, which decides what of the two algorithms is
 better.
@<|UPSTensor| class declaration@>=
 class UPSTensor : public UTensor {
 	const PerTensorDimens tdims;
 public:@;
 	@<|UPSTensor| constructors from Kronecker product@>;
 	UPSTensor(const FSSparseTensor& t, const IntSequence& ss,
 			  const IntSequence& coor, const PerTensorDimens& ptd);
 	UPSTensor(const UPSTensor& ut)
 		: UTensor(ut), tdims(ut.tdims)@+ {}
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	FTensor& fold() const;
 	int getOffset(const IntSequence& v) const;
 	void addTo(FGSTensor& out) const;
 	void addTo(UGSTensor& out) const;
 	enum fill_method {first, second};
 	static fill_method decideFillMethod(const FSSparseTensor& t);
 private:@;
 	int tailIdentitySize() const;
 	void fillFromSparseOne(const FSSparseTensor& t, const IntSequence& ss,
 						   const IntSequence& coor);
 	void fillFromSparseTwo(const FSSparseTensor& t, const IntSequence& ss,
 						   const IntSequence& coor);
 };
@ Here we have four constructors making an |UPSTensor| from a product
 of matrix and Kronecker product. The first constructs the tensor from
 equivalence classes of the given equivalence in an order given by the
 equivalence. The second does the same but with optimized
 |KronProdAllOptim|, which has a different order of matrices than given
 by the classes in the equivalence. This permutation is projected to
 the permutation of the |UPSTensor|. The third, is the same as the
 first, but the classes of the equivalence are permuted by the given
 permutation. Finally, the fourth is the most general combination. It
 allows for a permutation of equivalence classes, and for optimized
 |KronProdAllOptim|, which permutes the permuted equivalence classes.
@<|UPSTensor| constructors from Kronecker product@>=
 	UPSTensor(const TensorDimens& td, const Equivalence& e,
 			  const ConstTwoDMatrix& a, const KronProdAll& kp)
 		: UTensor(along_col, PerTensorDimens(td, e).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
 		{@+ kp.mult(a, *this);@+}
 	UPSTensor(const TensorDimens& td, const Equivalence& e,
 			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
 		: UTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, kp.getPer()))
 		{@+ kp.mult(a, *this);@+}
 	UPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 			  const ConstTwoDMatrix& a, const KronProdAll& kp)
 		: UTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, p))
 		{@+ kp.mult(a, *this);@+}
 	UPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
 		: UTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, Permutation(p, kp.getPer())))
 		{@+ kp.mult(a, *this);@+}
@ Here we define an abstraction for the tensor dimension with the
 symmetry like $xuv\vert uv\vert xu\vert y\vert y\vert x\vert x\vert
 y$. These symmetries come as induces symmetries of equivalence and
 some outer symmetry. Thus the underlying variables are permuted. One
 can imagine the dimensions as an unfolded product of dimensions which
 consist of folded products of variables.
 We inherit from |PerTensorDimens| since we need the permutation
 implied by the equivalence. The new member are the induced symmetries
 (symmetries of each folded dimensions) and |ds| which are sizes of the
 dimensions. The number of folded dimensions is return by |numSyms|.
 The object is constructed from outer tensor dimensions and from
 equivalence with optionally permuted classes.
@<|PerTensorDimens2| class declaration@>=
 class PerTensorDimens2 : public PerTensorDimens {
 	InducedSymmetries syms;
 	IntSequence ds;
 public:@;
 	PerTensorDimens2(const TensorDimens& td, const Equivalence& e,
 					 const Permutation& p)
 		: PerTensorDimens(td, Permutation(e, p)),
 		  syms(e, p, td.getSym()),
 		  ds(syms.size())
 		{@+ setDimensionSizes();@+}
 	PerTensorDimens2(const TensorDimens& td, const Equivalence& e)
 		: PerTensorDimens(td, e),
 		  syms(e, td.getSym()),
 		  ds(syms.size())
 		{@+ setDimensionSizes();@+}
 	int numSyms() const
 		{@+ return (int)syms.size();@+}
 	const Symmetry& getSym(int i) const
 		{@+ return syms[i];@+}
 	int calcMaxOffset() const
 		{@+ return ds.mult(); @+}
 	int calcOffset(const IntSequence& coor) const;
 	void print() const;
 protected:@;
 	void setDimensionSizes();
 };
@ Here we define an abstraction of the permuted symmetry folded
 tensor. It is needed in context of the Faa Di Bruno formula for folded
 stack container multiplied with container of dense folded tensors, or
 multiplied by one full symmetry sparse tensor.
 For example, if we perform the Faa Di Bruno for $F=f(z)$, where
 $z=[g(x,y,u,v), h(x,y,u), x, y]^T$, we get for one concrete
 equivalence:
 $$
 \left[F_{x^4y^3u^3v^2}\right]=\ldots+
 \left[f_{g^2h^2x^2y}\right]\left(
 [g]_{xv}\otimes[g]_{u^2v}\otimes
 [h]_{xu}\otimes[h]_{y^2}\otimes
 \left[\vphantom{\sum}[I]_x\otimes[I]_x\right]\otimes
 \left[\vphantom{\sum}[I]_y\right]
 \right)
 +\ldots
 $$
 The class |FPSTensor| represents the tensor at the right. Its
 dimension corresponds to a product of 7 dimensions with the following
 symmetries: $xv\vert u^v\vert xu\vert y^2\vert x\vert x\vert y$. Such
 the dimension is described by |PerTensorDimens2|.
 The tensor is constructed in a context of stack container
 multiplication, so, it is constructed from dimensions |td| (dimensions
 of the output tensor), stack product |sp| (implied symmetries picking
 tensors from a stack container, here it is $z$), then a sorted integer
 sequence of the picked stacks of the stack product (it is always
 sorted, here it is $(0,0,1,1,2,2,3)$), then the tensor
 $\left[f_{g^2h^2x^2y}\right]$ (its symmetry must be the same as
 symmetry given by the |istacks|), and finally from the equivalence
 with permuted classes.
 We implement |increment| and |getOffset| methods, |decrement| and
 |unfold| raise an exception. Also, we implement |addTo| method, which
 adds the tensor data (partially unfolded) to folded general symmetry
 tensor.
@<|FPSTensor| class declaration@>=
 template<typename _Ttype> class StackProduct;
 class FPSTensor : public FTensor {
 	const PerTensorDimens2 tdims;
 public:@;
 	@<|FPSTensor| constructors@>;
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	UTensor& unfold() const;
 	int getOffset(const IntSequence& v) const;
 	void addTo(FGSTensor& out) const;
 };
@ As for |UPSTensor|, we provide four constructors allowing for
 combinations of permuting equivalence classes, and optimization of
 |KronProdAllOptim|. These constructors multiply with dense general
 symmetry tensor (coming from the dense container, or as a dense slice
 of the full symmetry sparse tensor). In addition to these 4
 constructors, we have one constructor multiplying with general
 symmetry sparse tensor (coming as a sparse slice of the full symmetry
 sparse tensor).
@<|FPSTensor| constructors@>=
 	FPSTensor(const TensorDimens& td, const Equivalence& e,
 			  const ConstTwoDMatrix& a, const KronProdAll& kp)
 		: FTensor(along_col, PerTensorDimens(td, e).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
 		{@+ kp.mult(a, *this);@+}
 	FPSTensor(const TensorDimens& td, const Equivalence& e,
 			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
 		: FTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, kp.getPer())
 		{@+ kp.mult(a, *this);@+}
 	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 			  const ConstTwoDMatrix& a, const KronProdAll& kp)
 		: FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, p)
 		{@+ kp.mult(a, *this);@+}
 	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
 		: FTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
 				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, Permutation(p, kp.getPer()))
 		{@+ kp.mult(a, *this);@+}
 	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
 			  const GSSparseTensor& t, const KronProdAll& kp);
 	FPSTensor(const FPSTensor& ft)
 		: FTensor(ft), tdims(ft.tdims)@+ {}
@ End of {\tt ps\_tensor.h} file.
--- a/dynare++/tl/cc/pyramid_prod.cc
+++ b/dynare++/tl/cc/pyramid_prod.cc
@ -0,0 +1,78 @@
 // Copyright 2004, Ondra Kamenik
 #include "pyramid_prod.hh"
 #include "permutation.hh"
 #include "tl_exception.hh"
 /* Here we construct the |USubTensor| object. We allocate space via the
   parent |URTensor|. Number of columns is a length of the list of
   indices |lst|, number of variables and dimensions are of the tensor
   $h$, this is given by |hdims|.
   We go through all equivalences with number of classes equal to
   dimension of $B$. For each equivalence we make a permutation
   |per|. Then we fetch all the necessary tensors $g$ with symmetries
   implied by symmetry of $B$ and the equivalence. Then we go through the
   list of indices, permute them by the permutation and add the Kronecker
   product of the selected columns. This is done by |addKronColumn|. */
 USubTensor::USubTensor(const TensorDimens &bdims,
                       const TensorDimens &hdims,
                       const FGSContainer &cont,
                       const vector<IntSequence> &lst)
  : URTensor(lst.size(), hdims.getNVX()[0], hdims.dimen())
 {
  TL_RAISE_IF(!hdims.getNVX().isConstant(),
              "Tensor has not full symmetry in USubTensor()");
  const EquivalenceSet &eset = cont.getEqBundle().get(bdims.dimen());
  zeros();
  for (EquivalenceSet::const_iterator it = eset.begin();
       it != eset.end(); ++it)
    {
      if ((*it).numClasses() == hdims.dimen())
        {
          Permutation per(*it);
          vector<const FGSTensor *> ts
            = cont.fetchTensors(bdims.getSym(), *it);
          for (int i = 0; i < (int) lst.size(); i++)
            {
              IntSequence perindex(lst[i].size());
              per.apply(lst[i], perindex);
              addKronColumn(i, ts, perindex);
            }
        }
    }
 }
 /* This makes a Kronecker product of appropriate columns from tensors
   in |fs| and adds such data to |i|-th column of this matrix. The
   appropriate columns are defined by |pindex| sequence. A column of a
   tensor has index created from a corresponding part of |pindex|. The
   sizes of these parts are given by dimensions of the tensors in |ts|.
   Here we break the given index |pindex| according to the dimensions of
   the tensors in |ts|, and for each subsequence of the |pindex| we find
   an index of the folded tensor, which involves calling |getOffset| for
   folded tensor, which might be costly. We gather all columns to a
   vector |tmpcols| which are Kronecker multiplied in constructor of
   |URSingleTensor|. Finally we add data of |URSingleTensor| to the
   |i|-th column. */
 void
 USubTensor::addKronColumn(int i, const vector<const FGSTensor *> &ts,
                          const IntSequence &pindex)
 {
  vector<ConstVector> tmpcols;
  int lastdim = 0;
  for (unsigned int j = 0; j < ts.size(); j++)
    {
      IntSequence ind(pindex, lastdim, lastdim+ts[j]->dimen());
      lastdim += ts[j]->dimen();
      index in(ts[j], ind);
      tmpcols.push_back(ConstVector(*(ts[j]), *in));
    }
  URSingleTensor kronmult(tmpcols);
  Vector coli(*this, i);
  coli.add(1.0, kronmult.getData());
 }
--- a/dynare++/tl/cc/pyramid_prod.cweb
+++ b/dynare++/tl/cc/pyramid_prod.cweb
@ -1,86 +0,0 @@
@q $Id: pyramid_prod.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt pyramid\_prod.cpp} file.
@c
 #include "pyramid_prod.h"
 #include "permutation.h"
 #include "tl_exception.h"
@<|USubTensor| constructor code@>;
@<|USubTensor::addKronColumn| code@>;
@ Here we construct the |USubTensor| object. We allocate space via the
 parent |URTensor|. Number of columns is a length of the list of
 indices |lst|, number of variables and dimensions are of the tensor
 $h$, this is given by |hdims|.
 We go through all equivalences with number of classes equal to
 dimension of $B$. For each equivalence we make a permutation
 |per|. Then we fetch all the necessary tensors $g$ with symmetries
 implied by symmetry of $B$ and the equivalence. Then we go through the
 list of indices, permute them by the permutation and add the Kronecker
 product of the selected columns. This is done by |addKronColumn|.
@<|USubTensor| constructor code@>=
 USubTensor::USubTensor(const TensorDimens& bdims,
 					   const TensorDimens& hdims,
 					   const FGSContainer& cont,
 					   const vector<IntSequence>& lst)
 	: URTensor(lst.size(), hdims.getNVX()[0], hdims.dimen())
 {
 	TL_RAISE_IF(! hdims.getNVX().isConstant(),
 				"Tensor has not full symmetry in USubTensor()");
 	const EquivalenceSet& eset = cont.getEqBundle().get(bdims.dimen());
 	zeros();
 	for (EquivalenceSet::const_iterator it = eset.begin();
 		 it != eset.end(); ++it) {
 		if ((*it).numClasses() == hdims.dimen()) {
 			Permutation per(*it);
 			vector<const FGSTensor*> ts =
 				cont.fetchTensors(bdims.getSym(), *it);
 			for (int i = 0; i < (int)lst.size(); i++) {
 				IntSequence perindex(lst[i].size());
 				per.apply(lst[i], perindex);
 				addKronColumn(i, ts, perindex); 
 			}
 		}
 	}
 }
@ This makes a Kronecker product of appropriate columns from tensors
 in |fs| and adds such data to |i|-th column of this matrix. The
 appropriate columns are defined by |pindex| sequence. A column of a
 tensor has index created from a corresponding part of |pindex|. The
 sizes of these parts are given by dimensions of the tensors in |ts|.
 Here we break the given index |pindex| according to the dimensions of
 the tensors in |ts|, and for each subsequence of the |pindex| we find
 an index of the folded tensor, which involves calling |getOffset| for
 folded tensor, which might be costly. We gather all columns to a
 vector |tmpcols| which are Kronecker multiplied in constructor of
 |URSingleTensor|. Finally we add data of |URSingleTensor| to the
 |i|-th column.
@<|USubTensor::addKronColumn| code@>=
 void USubTensor::addKronColumn(int i, const vector<const FGSTensor*>& ts,
 							   const IntSequence& pindex)
 {
 	vector<ConstVector> tmpcols;
 	int lastdim = 0;
 	for (unsigned int j = 0; j < ts.size(); j++) {
 		IntSequence ind(pindex, lastdim, lastdim+ts[j]->dimen());
 		lastdim += ts[j]->dimen();
 		index in(ts[j], ind);
 		tmpcols.push_back(ConstVector(*(ts[j]), *in));
 	}
 	URSingleTensor kronmult(tmpcols);
 	Vector coli(*this, i);
 	coli.add(1.0, kronmult.getData());
 }
@ End of {\tt pyramid\_prod.cpp} file.
--- a/dynare++/tl/cc/pyramid_prod.hh
+++ b/dynare++/tl/cc/pyramid_prod.hh
@ -0,0 +1,74 @@
 // Copyright 2004, Ondra Kamenik
 // Multiplying tensor columns.
 /* In here, we implement the Faa Di Bruno for folded
   tensors. Recall, that one step of the Faa Di Bruno is a formula:
   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
   [h_{y^l}]_{\gamma_1\ldots\gamma_l}
   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
   $$
   In contrast to unfolded implementation of |UGSContainer::multAndAdd|
   with help of |KronProdAll| and |UPSTensor|, we take a completely
   different strategy. We cannot afford full instantiation of
   $$\sum_{c\in M_{l,k}}
   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
   and therefore we do it per partes. We select some number of columns,
   for instance 10, calculate 10 continuous iterators of tensor $B$. Then we
   form unfolded tensor
   $$[G]_S^{\gamma_1\ldots\gamma_l}=\left[\sum_{c\in M_{l,k}}
   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
   \right]_S$$
   where $S$ is the selected set of 10 indices. This is done as Kronecker
   product of vectors corresponding to selected columns. Note that, in
   general, there is no symmetry in $G$, its type is special class for
   this purpose.
   If $g$ is folded, then we have to form folded version of $G$. There is
   no symmetry in $G$ data, so we sum all unfolded indices corresponding
   to folded index together. This is perfectly OK, since we multiply
   these groups of (equivalent) items with the same number in fully
   symmetric $g$.
   After this, we perform ordinary matrix multiplication to obtain a
   selected set of columns of $B$.
   In here, we define a class for forming and representing
   $[G]_S^{\gamma_1\ldots\gamma_l}$. Basically, this tensor is
   row-oriented (multidimensional index is along rows), and it is fully
   symmetric. So we inherit from |URTensor|. If we need its folded
   version, we simply use a suitable conversion. The new abstraction will
   have only a new constructor allowing a construction from the given set
   of indices $S$, and given set of tensors $g$. The rest of the process
   is implemented in |@<|FGSContainer::multAndAdd| unfolded code@>| or
   |@<|FGSContainer::multAndAdd| folded code@>|. */
 #ifndef PYRAMID_PROD_H
 #define PYRAMID_PROD_H
 #include "int_sequence.hh"
 #include "rfs_tensor.hh"
 #include "gs_tensor.hh"
 #include "t_container.hh"
 #include <vector>
 using namespace std;
 /* Here we define the new tensor for representing
   $[G]_S^{\gamma_1\ldots\gamma_l}$. It allows a construction from
   container of folded general symmetry tensors |cont|, and set of
   indices |ts|. Also we have to supply dimensions of resulting tensor
   $B$, and dimensions of tensor $h$. */
 class USubTensor : public URTensor
 {
 public:
  USubTensor(const TensorDimens &bdims, const TensorDimens &hdims,
             const FGSContainer &cont, const vector<IntSequence> &lst);
  void addKronColumn(int i, const vector<const FGSTensor *> &ts,
                     const IntSequence &pindex);
 };
 #endif
--- a/dynare++/tl/cc/pyramid_prod.hweb
+++ b/dynare++/tl/cc/pyramid_prod.hweb
@ -1,80 +0,0 @@
@q $Id: pyramid_prod.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Multiplying tensor columns. Start of {\tt pyramid\_prod.h} file.
 In here, we implement the Faa Di Bruno for folded
 tensors. Recall, that one step of the Faa Di Bruno is a formula:
 $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
 [h_{y^l}]_{\gamma_1\ldots\gamma_l}
 \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
 $$
 In contrast to unfolded implementation of |UGSContainer::multAndAdd|
 with help of |KronProdAll| and |UPSTensor|, we take a completely
 different strategy. We cannot afford full instantiation of
 $$\sum_{c\in M_{l,k}}
 \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
 and therefore we do it per partes. We select some number of columns,
 for instance 10, calculate 10 continuous iterators of tensor $B$. Then we
 form unfolded tensor
 $$[G]_S^{\gamma_1\ldots\gamma_l}=\left[\sum_{c\in M_{l,k}}
 \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
 \right]_S$$
 where $S$ is the selected set of 10 indices. This is done as Kronecker
 product of vectors corresponding to selected columns. Note that, in
 general, there is no symmetry in $G$, its type is special class for
 this purpose.
 If $g$ is folded, then we have to form folded version of $G$. There is
 no symmetry in $G$ data, so we sum all unfolded indices corresponding
 to folded index together. This is perfectly OK, since we multiply
 these groups of (equivalent) items with the same number in fully
 symmetric $g$.
 After this, we perform ordinary matrix multiplication to obtain a
 selected set of columns of $B$.
 In here, we define a class for forming and representing
 $[G]_S^{\gamma_1\ldots\gamma_l}$. Basically, this tensor is
 row-oriented (multidimensional index is along rows), and it is fully
 symmetric. So we inherit from |URTensor|. If we need its folded
 version, we simply use a suitable conversion. The new abstraction will
 have only a new constructor allowing a construction from the given set
 of indices $S$, and given set of tensors $g$. The rest of the process
 is implemented in |@<|FGSContainer::multAndAdd| unfolded code@>| or
 |@<|FGSContainer::multAndAdd| folded code@>|.
@c
 #ifndef PYRAMID_PROD_H
 #define PYRAMID_PROD_H
 #include "int_sequence.h"
 #include "rfs_tensor.h"
 #include "gs_tensor.h"
 #include "t_container.h"
 #include <vector>
 using namespace std;
@<|USubTensor| class declaration@>;
 #endif
@ Here we define the new tensor for representing
 $[G]_S^{\gamma_1\ldots\gamma_l}$. It allows a construction from
 container of folded general symmetry tensors |cont|, and set of
 indices |ts|. Also we have to supply dimensions of resulting tensor
 $B$, and dimensions of tensor $h$.
@<|USubTensor| class declaration@>=
 class USubTensor : public URTensor {
 public:@;
 	USubTensor(const TensorDimens& bdims, const TensorDimens& hdims,
 			   const FGSContainer& cont, const vector<IntSequence>& lst);
 	void addKronColumn(int i, const vector<const FGSTensor*>& ts,
 					   const IntSequence& pindex);
 };
@ End of {\tt pyramid\_prod.h} file.
--- a/dynare++/tl/cc/pyramid_prod2.cc
+++ b/dynare++/tl/cc/pyramid_prod2.cc
@ -0,0 +1,116 @@
 // Copyright 2004, Ondra Kamenik
 #include "pyramid_prod2.hh"
 #include "rfs_tensor.hh"
 /* Here we only call |sp.createPackedColumns(c, cols, unit_flag)| which
   fills |cols| and |unit_flag| for the given column |c|. Then we set
   |end_seq| according to |unit_flag| and columns lengths. */
 IrregTensorHeader::IrregTensorHeader(const StackProduct<FGSTensor> &sp,
                                     const IntSequence &c)
  : nv(sp.getAllSize()),
    unit_flag(sp.dimen()),
    cols(new Vector *[sp.dimen()]),
    end_seq(sp.dimen())
 {
  sp.createPackedColumns(c, cols, unit_flag);
  for (int i = 0; i < sp.dimen(); i++)
    {
      end_seq[i] = cols[i]->length();
      if (unit_flag[i] != -1)
        end_seq[i] = unit_flag[i]+1;
    }
 }
 /* Here we have to increment the given integer sequence. We do it by
   the following code, whose pattern is valid for all tensor. The only
   difference is how we increment item of coordinates. */
 void
 IrregTensorHeader::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong size of coordinates in IrregTensorHeader::increment");
  if (v.size() == 0)
    return;
  int i = v.size()-1;
  // increment |i|-th item in coordinate |v|
  /* Here we increment item of coordinates. Whenever we reached end of
     column coming from matrices, and |unit_flag| is not $-1$, we have to
     jump to that |unit_flag|. */
  v[i]++;
  if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
    v[i] = unit_flag[i];
  while (i > 0 && v[i] == end_seq[i])
    {
      v[i] = 0;
      i--;
      // increment |i|-th item in coordinate |v|
      /* Same code as above */
      v[i]++;
      if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
        v[i] = unit_flag[i];
    }
 }
 IrregTensorHeader::~IrregTensorHeader()
 {
  for (int i = 0; i < dimen(); i++)
    delete cols[i];
  delete [] cols;
 }
 /* It is a product of all column lengths. */
 int
 IrregTensorHeader::calcMaxOffset() const
 {
  int res = 1;
  for (int i = 0; i < dimen(); i++)
    res *= cols[i]->length();
  return res;
 }
 /* Everything is done in |IrregTensorHeader|, only we have to Kronecker
   multiply all columns of the header. */
 IrregTensor::IrregTensor(const IrregTensorHeader &h)
  : Tensor(along_row, IntSequence(h.dimen(), 0), h.end_seq,
           h.calcMaxOffset(), 1, h.dimen()),
    header(h)
 {
  if (header.dimen() == 1)
    {
      getData() = *(header.cols[0]);
      return;
    }
  Vector *last = new Vector(*(header.cols[header.dimen()-1]));
  for (int i = header.dimen()-2; i > 0; i--)
    {
      Vector *newlast = new Vector(last->length()*header.cols[i]->length());
      KronProd::kronMult(ConstVector(*(header.cols[i])),
                         ConstVector(*last), *newlast);
      delete last;
      last = newlast;
    }
  KronProd::kronMult(ConstVector(*(header.cols[0])),
                     ConstVector(*last), getData());
  delete last;
 }
 void
 IrregTensor::addTo(FRSingleTensor &out) const
 {
  for (index it = begin(); it != end(); ++it)
    {
      IntSequence tmp(it.getCoor());
      tmp.sort();
      Tensor::index ind(&out, tmp);
      out.get(*ind, 0) += get(*it, 0);
    }
 }
--- a/dynare++/tl/cc/pyramid_prod2.cweb
+++ b/dynare++/tl/cc/pyramid_prod2.cweb
@ -1,129 +0,0 @@
@q $Id: pyramid_prod2.cweb 332 2005-07-15 13:41:48Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt pyramid\_prod2.cpp} file.
@c
 #include "pyramid_prod2.h"
 #include "rfs_tensor.h"
@<|IrregTensorHeader| constructor code@>;
@<|IrregTensorHeader::increment| code@>;
@<|IrregTensorHeader| destructor code@>;
@<|IrregTensorHeader::calcMaxOffset| code@>;
@<|IrregTensor| constructor code@>;
@<|IrregTensor::addTo| code@>;
@ Here we only call |sp.createPackedColumns(c, cols, unit_flag)| which
 fills |cols| and |unit_flag| for the given column |c|. Then we set
 |end_seq| according to |unit_flag| and columns lengths.
@<|IrregTensorHeader| constructor code@>=
 IrregTensorHeader::IrregTensorHeader(const StackProduct<FGSTensor>& sp,
 									 const IntSequence& c)
 	: nv(sp.getAllSize()),
 	  unit_flag(sp.dimen()),
 	  cols(new Vector*[sp.dimen()]),
 	  end_seq(sp.dimen())
 {
 	sp.createPackedColumns(c, cols, unit_flag);
 	for (int i = 0; i < sp.dimen(); i++) {
 		end_seq[i] = cols[i]->length();
 		if (unit_flag[i] != -1)
 			end_seq[i] = unit_flag[i]+1;
 	}
 }
@ Here we have to increment the given integer sequence. We do it by
 the following code, whose pattern is valid for all tensor. The only
 difference is how we increment item of coordinates.
@<|IrregTensorHeader::increment| code@>=
 void IrregTensorHeader::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong size of coordinates in IrregTensorHeader::increment");
 	if (v.size() == 0)
 		return;
 	int i = v.size()-1;
 	@<increment |i|-th item in coordinate |v|@>;
 	while (i > 0 && v[i] == end_seq[i]) {
 		v[i] = 0;
 		i--;
 		@<increment |i|-th item in coordinate |v|@>;
 	}
 }
@ Here we increment item of coordinates. Whenever we reached end of
 column coming from matrices, and |unit_flag| is not $-1$, we have to
 jump to that |unit_flag|.
@<increment |i|-th item in coordinate |v|@>=
 	v[i]++;
 	if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
 		v[i] = unit_flag[i];
@ 
@<|IrregTensorHeader| destructor code@>=
 IrregTensorHeader::~IrregTensorHeader()
 {
  for (int i = 0; i < dimen(); i++)
 	  delete cols[i];
  delete [] cols;
 }
@ It is a product of all column lengths.
@<|IrregTensorHeader::calcMaxOffset| code@>=
 int IrregTensorHeader::calcMaxOffset() const
 {
 	int res = 1;
 	for (int i = 0; i < dimen(); i++)
 		res *= cols[i]->length();
 	return res;
 }
@ Everything is done in |IrregTensorHeader|, only we have to Kronecker
 multiply all columns of the header.
@<|IrregTensor| constructor code@>=
 IrregTensor::IrregTensor(const IrregTensorHeader& h)
 	: Tensor(along_row, IntSequence(h.dimen(), 0), h.end_seq,
 			 h.calcMaxOffset(), 1, h.dimen()),
 	  header(h)
 {
 	if (header.dimen() == 1) {
 		getData() = *(header.cols[0]);
 		return;
 	}
 	Vector* last = new Vector(*(header.cols[header.dimen()-1]));
 	for (int i = header.dimen()-2; i > 0; i--) {
 		Vector* newlast = new Vector(last->length()*header.cols[i]->length());
 		KronProd::kronMult(ConstVector(*(header.cols[i])),
 						   ConstVector(*last), *newlast);
 		delete last;
 		last = newlast;
 	}
 	KronProd::kronMult(ConstVector(*(header.cols[0])),
 					   ConstVector(*last), getData());
 	delete last;
 }
@ Clear.
@<|IrregTensor::addTo| code@>=
 void IrregTensor::addTo(FRSingleTensor& out) const
 {
 	for (index it = begin(); it != end(); ++it) {
 		IntSequence tmp(it.getCoor());
 		tmp.sort();
 		Tensor::index ind(&out, tmp);
 		out.get(*ind, 0) += get(*it, 0);
 	}
 }
@ End of {\tt pyramid\_prod2.cpp} file.
--- a/dynare++/tl/cc/pyramid_prod2.hh
+++ b/dynare++/tl/cc/pyramid_prod2.hh
@ -0,0 +1,155 @@
 // Copyright 2004, Ondra Kamenik
 // Multiplying stacked tensor columns.
 /* We need to calculate the following tensor product:
   $$\left[f_{s^j}\right]_{\alpha_1\ldots\alpha_j}=
   \sum_{l=1}^j\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
   \sum_{c\in M_{l,j}}\prod_{m=1}^l\left[z_{c_m}\right]^{\beta_m}_{c_m(\alpha)}
   $$
   where $s=[y,u,u',\sigma]$, and $z$ is a composition of four variables,
   say $[v,w,y,u]$. Note that $z$ ends with $y$ and $u$, and the only
   non-zero derivative of the trailing part of $z$ involving $y$ or $u$
   is the first derivative and is the unit matrix $y_y=[1]$ or
   $u_u=[1]$. Also, we suppose that the dependence of $v$, and $w$ on $s$
   is such that whenever derivative of $w$ is nonzero, then also of
   $v$. This means that there for any derivative and any index there is a
   continuous part of derivatives of $v$ and optionally of $w$ followed by
   column of zeros containing at most one $1$.
   This structure can be modelled and exploited with some costs at
   programming. For example, let us consider the following product:
   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
   \ldots
   \left[f_{z^3}\right]_{\gamma_1\gamma_2\gamma_3}
   \left[z_{yu}\right]^{\gamma_1}_{\alpha_1\beta_1}
   \left[z_{y}\right]^{\gamma_2}_{\alpha_2}
   \left[z_{uu}\right]^{\gamma_3}_{\beta_2\beta_3}
   \ldots$$
   The term corresponds to equivalence $\{\{0,2\},\{1\},\{3,4\}\}$. For
   the fixed index $\alpha_1\alpha_2\beta_1\beta_2\beta_3$ we have to
   make a Kronecker product of the columns
   $$
   \left[z_{yu}\right]_{\alpha_1\beta_1}\otimes
   \left[z_{y}\right]_{\alpha_2}\otimes
   \left[z_{uu}\right]_{\beta_2\beta_3}
   $$
   which can be written as
   $$
   \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
   \left[w_{yu}\right]_{\alpha_1\beta_1}\cr 0\cr 0}\right]\otimes
   \left[\matrix{\left[v_y\right]_{\alpha_2\vphantom{(}}\cr
   \left[w_y\right]_{\alpha_2}\cr 1_{\alpha_2}\cr 0}\right]\otimes
   \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3\vphantom{(}}\cr
   \left[w_{uu}\right]_{\beta_2\beta_3}\cr 0\cr 0}\right]
   $$
   where $1_{\alpha_2}$ is a column of zeros having the only $1$ at
   $\alpha_2$ index.
   This file develops the abstraction for this Kronecker product column
   without multiplication of the zeros at the top. Basically, it will be
   a column which is a Kronecker product of the columns without the
   zeros:
   $$
   \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
   \left[w_{yu}\right]_{\alpha_1\beta_1}}\right]\otimes
   \left[\matrix{\left[v_y\right]_{\alpha_2}\cr
   \left[w_y\right]_{\alpha_2}\cr 1}\right]\otimes
   \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3}\cr
   \left[w_{uu}\right]_{\beta_2\beta_3}}\right]
   $$
   The class will have a tensor infrastructure introducing |index| which
   iterates over all items in the column with $\gamma_1\gamma_2\gamma_3$
   as coordinates in $\left[f_{z^3}\right]$. The data of such a tensor is
   not suitable for any matrix operation and will have to be accessed
   only through the |index|. Note that this does not matter, since
   $\left[f_{z^l}\right]$ are sparse. */
 #ifndef PYRAMID_PROD2_H
 #define PYRAMID_PROD2_H
 #include "permutation.hh"
 #include "tensor.hh"
 #include "tl_exception.hh"
 #include "rfs_tensor.hh"
 #include "stack_container.hh"
 #include "Vector.h"
 /* First we declare a helper class for the tensor. Its purpose is to
   gather the columns which are going to be Kronecker multiplied. The
   input of this helper class is |StackProduct<FGSTensor>| and coordinate
   |c| of the column.
   It maintains |unit_flag| array which says for what columns we must
   stack 1 below $v$ and $w$. In this case, the value of |unit_flag| is
   an index of the $1$, otherwise the value of |unit_flag| is -1.
   Also we have storage for the stacked columns |cols|. The object is
   responsible for memory management associated to this storage. That is
   why we do not allow any copy constructor, since we need to be sure
   that no accidental copies take place. We declare the copy constructor
   as private and not implement it. */
 class IrregTensor;
 class IrregTensorHeader
 {
  friend class IrregTensor;
  int nv;
  IntSequence unit_flag;
  Vector **const cols;
  IntSequence end_seq;
 public:
  IrregTensorHeader(const StackProduct<FGSTensor> &sp, const IntSequence &c);
  ~IrregTensorHeader();
  int
  dimen() const
  {
    return unit_flag.size();
  }
  void increment(IntSequence &v) const;
  int calcMaxOffset() const;
 private:
  IrregTensorHeader(const IrregTensorHeader &);
 };
 /* Here we declare the irregular tensor. There is no special logic
   here. We inherit from |Tensor| and we must implement three methods,
   |increment|, |decrement| and |getOffset|. The last two are not
   implemented now, since they are not needed, and they raise an
   exception. The first just calls |increment| of the header. Also we
   declare a method |addTo| which adds this unfolded irregular single
   column tensor to folded (regular) single column tensor.
   The header |IrregTensorHeader| lives with an object by a
   reference. This is dangerous. However, we will use this class only in
   a simple loop and both |IrregTensor| and |IrregTensorHeader| will be
   destructed at the end of a block. Since the super class |Tensor| must
   be initialized before any member, we could do either a save copy of
   |IrregTensorHeader|, or relatively dangerous the reference member. For
   the reason above we chose the latter. */
 class IrregTensor : public Tensor
 {
  const IrregTensorHeader &header;
 public:
  IrregTensor(const IrregTensorHeader &h);
  void addTo(FRSingleTensor &out) const;
  void
  increment(IntSequence &v) const
  {
    header.increment(v);
  }
  void
  decrement(IntSequence &v) const
  {
    TL_RAISE("Not implemented error in IrregTensor::decrement");
  }
  int
  getOffset(const IntSequence &v) const
  {
    TL_RAISE("Not implemented error in IrregTensor::getOffset"); return 0;
  }
 };
 #endif
--- a/dynare++/tl/cc/pyramid_prod2.hweb
+++ b/dynare++/tl/cc/pyramid_prod2.hweb
@ -1,151 +0,0 @@
@q $Id: pyramid_prod2.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Multiplying stacked tensor columns. Start of {\tt pyramid\_prod2.h} file.
 We need to calculate the following tensor product:
 $$\left[f_{s^j}\right]_{\alpha_1\ldots\alpha_j}=
 \sum_{l=1}^j\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
 \sum_{c\in M_{l,j}}\prod_{m=1}^l\left[z_{c_m}\right]^{\beta_m}_{c_m(\alpha)}
 $$
 where $s=[y,u,u',\sigma]$, and $z$ is a composition of four variables,
 say $[v,w,y,u]$. Note that $z$ ends with $y$ and $u$, and the only
 non-zero derivative of the trailing part of $z$ involving $y$ or $u$
 is the first derivative and is the unit matrix $y_y=[1]$ or
 $u_u=[1]$. Also, we suppose that the dependence of $v$, and $w$ on $s$
 is such that whenever derivative of $w$ is nonzero, then also of
 $v$. This means that there for any derivative and any index there is a
 continuous part of derivatives of $v$ and optionally of $w$ followed by
 column of zeros containing at most one $1$.
 This structure can be modelled and exploited with some costs at
 programming. For example, let us consider the following product:
 $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
 \ldots
 \left[f_{z^3}\right]_{\gamma_1\gamma_2\gamma_3}
 \left[z_{yu}\right]^{\gamma_1}_{\alpha_1\beta_1}
 \left[z_{y}\right]^{\gamma_2}_{\alpha_2}
 \left[z_{uu}\right]^{\gamma_3}_{\beta_2\beta_3}
 \ldots$$
 The term corresponds to equivalence $\{\{0,2\},\{1\},\{3,4\}\}$. For
 the fixed index $\alpha_1\alpha_2\beta_1\beta_2\beta_3$ we have to
 make a Kronecker product of the columns
 $$
 \left[z_{yu}\right]_{\alpha_1\beta_1}\otimes
 \left[z_{y}\right]_{\alpha_2}\otimes
 \left[z_{uu}\right]_{\beta_2\beta_3}
 $$
 which can be written as
 $$
 \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
              \left[w_{yu}\right]_{\alpha_1\beta_1}\cr 0\cr 0}\right]\otimes
 \left[\matrix{\left[v_y\right]_{\alpha_2\vphantom{(}}\cr
              \left[w_y\right]_{\alpha_2}\cr 1_{\alpha_2}\cr 0}\right]\otimes
 \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3\vphantom{(}}\cr
              \left[w_{uu}\right]_{\beta_2\beta_3}\cr 0\cr 0}\right]
 $$
 where $1_{\alpha_2}$ is a column of zeros having the only $1$ at
 $\alpha_2$ index.
 This file develops the abstraction for this Kronecker product column
 without multiplication of the zeros at the top. Basically, it will be
 a column which is a Kronecker product of the columns without the
 zeros:
 $$
 \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
              \left[w_{yu}\right]_{\alpha_1\beta_1}}\right]\otimes
 \left[\matrix{\left[v_y\right]_{\alpha_2}\cr
              \left[w_y\right]_{\alpha_2}\cr 1}\right]\otimes
 \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3}\cr
              \left[w_{uu}\right]_{\beta_2\beta_3}}\right]
 $$
 The class will have a tensor infrastructure introducing |index| which
 iterates over all items in the column with $\gamma_1\gamma_2\gamma_3$
 as coordinates in $\left[f_{z^3}\right]$. The data of such a tensor is
 not suitable for any matrix operation and will have to be accessed
 only through the |index|. Note that this does not matter, since
 $\left[f_{z^l}\right]$ are sparse.
@c
 #ifndef PYRAMID_PROD2_H
 #define PYRAMID_PROD2_H
 #include "permutation.h"
 #include "tensor.h"
 #include "tl_exception.h"
 #include "rfs_tensor.h"
 #include "stack_container.h"
 #include "Vector.h"
@<|IrregTensorHeader| class declaration@>;
@<|IrregTensor| class declaration@>;
 #endif
@ First we declare a helper class for the tensor. Its purpose is to
 gather the columns which are going to be Kronecker multiplied. The
 input of this helper class is |StackProduct<FGSTensor>| and coordinate
 |c| of the column.
 It maintains |unit_flag| array which says for what columns we must
 stack 1 below $v$ and $w$. In this case, the value of |unit_flag| is
 an index of the $1$, otherwise the value of |unit_flag| is -1.
 Also we have storage for the stacked columns |cols|. The object is
 responsible for memory management associated to this storage. That is
 why we do not allow any copy constructor, since we need to be sure
 that no accidental copies take place. We declare the copy constructor
 as private and not implement it.
@<|IrregTensorHeader| class declaration@>=
 class IrregTensor;
 class IrregTensorHeader {
 	friend class IrregTensor;
 	int nv;
 	IntSequence unit_flag;
    Vector** const cols;
 	IntSequence end_seq;
 public:@;
 	IrregTensorHeader(const StackProduct<FGSTensor>& sp, const IntSequence& c);
 	~IrregTensorHeader();
 	int dimen() const
 		{@+ return unit_flag.size();@+}
 	void increment(IntSequence& v) const;
 	int calcMaxOffset() const;
 private:@;
 	IrregTensorHeader(const IrregTensorHeader&);
 };
@ Here we declare the irregular tensor. There is no special logic
 here. We inherit from |Tensor| and we must implement three methods,
 |increment|, |decrement| and |getOffset|. The last two are not
 implemented now, since they are not needed, and they raise an
 exception. The first just calls |increment| of the header. Also we
 declare a method |addTo| which adds this unfolded irregular single
 column tensor to folded (regular) single column tensor.
 The header |IrregTensorHeader| lives with an object by a
 reference. This is dangerous. However, we will use this class only in
 a simple loop and both |IrregTensor| and |IrregTensorHeader| will be
 destructed at the end of a block. Since the super class |Tensor| must
 be initialized before any member, we could do either a save copy of
 |IrregTensorHeader|, or relatively dangerous the reference member. For
 the reason above we chose the latter.
@<|IrregTensor| class declaration@>=
 class IrregTensor : public Tensor {
 	const IrregTensorHeader& header;
 public:@;
 	IrregTensor(const IrregTensorHeader& h);
 	void addTo(FRSingleTensor& out) const;
 	void increment(IntSequence& v) const
 		{@+ header.increment(v);@+}
 	void decrement(IntSequence& v) const
 		{@+ TL_RAISE("Not implemented error in IrregTensor::decrement");@+}
 	int getOffset(const IntSequence& v) const
 		{@+ TL_RAISE("Not implemented error in IrregTensor::getOffset");@+return 0;@+}
 };
@ End of {\tt pyramid\_prod2.h} file.
--- a/dynare++/tl/cc/rfs_tensor.cc
+++ b/dynare++/tl/cc/rfs_tensor.cc
@ -0,0 +1,187 @@
 // Copyright 2004, Ondra Kamenik
 #include "rfs_tensor.hh"
 #include "kron_prod.hh"
 #include "tl_exception.hh"
 // |FRTensor| conversion from unfolded
 /* The conversion from unfolded to folded sums up all data from
   unfolded corresponding to one folded index. So we go through all the
   rows in the unfolded tensor |ut|, make an index of the folded tensor
   by sorting the coordinates, and add the row. */
 FRTensor::FRTensor(const URTensor &ut)
  : FTensor(along_row, IntSequence(ut.dimen(), ut.nvar()),
            FFSTensor::calcMaxOffset(ut.nvar(), ut.dimen()), ut.ncols(),
            ut.dimen()),
    nv(ut.nvar())
 {
  zeros();
  for (index in = ut.begin(); in != ut.end(); ++in)
    {
      IntSequence vtmp(in.getCoor());
      vtmp.sort();
      index tar(this, vtmp);
      addRow(ut, *in, *tar);
    }
 }
 /* Here just make a new instance and return the reference. */
 UTensor &
 FRTensor::unfold() const
 {
  return *(new URTensor(*this));
 }
 /* Incrementing is easy. The same as for |FFSTensor|. */
 void
 FRTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in FRTensor::increment");
  UTensor::increment(v, nv);
  v.monotone();
 }
 /* Decrement calls static |FTensor::decrement|. */
 void
 FRTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in FRTensor::decrement");
  FTensor::decrement(v, nv);
 }
 // |URTensor| conversion from folded
 /* Here we convert folded full symmetry tensor to unfolded. We copy all
   columns of folded tensor to unfolded and leave other columns
   (duplicates) zero. In this way, if the unfolded tensor is folded back,
   we should get the same data. */
 URTensor::URTensor(const FRTensor &ft)
  : UTensor(along_row, IntSequence(ft.dimen(), ft.nvar()),
            UFSTensor::calcMaxOffset(ft.nvar(), ft.dimen()), ft.ncols(),
            ft.dimen()),
    nv(ft.nvar())
 {
  zeros();
  for (index src = ft.begin(); src != ft.end(); ++src)
    {
      index in(this, src.getCoor());
      copyRow(ft, *src, *in);
    }
 }
 /* Here we just return a reference to new instance of folded tensor. */
 FTensor &
 URTensor::fold() const
 {
  return *(new FRTensor(*this));
 }
 /* Here we just call |UTensor| respective static methods. */
 void
 URTensor::increment(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in URTensor::increment");
  UTensor::increment(v, nv);
 }
 void
 URTensor::decrement(IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input/output vector size in URTensor::decrement");
  UTensor::decrement(v, nv);
 }
 int
 URTensor::getOffset(const IntSequence &v) const
 {
  TL_RAISE_IF(v.size() != dimen(),
              "Wrong input vector size in URTensor::getOffset");
  return UTensor::getOffset(v, nv);
 }
 /* Here we construct $v_1\otimes v_2\otimes\ldots\otimes v_n$, where
   $v_1,v_2,\ldots,v_n$ are stored in |vector<ConstVector>|. */
 URSingleTensor::URSingleTensor(const vector<ConstVector> &cols)
  : URTensor(1, cols[0].length(), cols.size())
 {
  if (dimen() == 1)
    {
      getData() = cols[0];
      return;
    }
  Vector *last = new Vector(cols[cols.size()-1]);
  for (int i = cols.size()-2; i > 0; i--)
    {
      Vector *newlast = new Vector(Tensor::power(nvar(), cols.size()-i));
      KronProd::kronMult(cols[i], ConstVector(*last), *newlast);
      delete last;
      last = newlast;
    }
  KronProd::kronMult(cols[0], ConstVector(*last), getData());
  delete last;
 }
 /* Here we construct $v\otimes\ldots\otimes v$, where the number of $v$
   copies is |d|. */
 URSingleTensor::URSingleTensor(const ConstVector &v, int d)
  : URTensor(1, v.length(), d)
 {
  if (d == 1)
    {
      getData() = v;
      return;
    }
  Vector *last = new Vector(v);
  for (int i = d-2; i > 0; i--)
    {
      Vector *newlast = new Vector(last->length()*v.length());
      KronProd::kronMult(v, ConstVector(*last), *newlast);
      delete last;
      last = newlast;
    }
  KronProd::kronMult(v, ConstVector(*last), getData());
  delete last;
 }
 /* Here we construct |FRSingleTensor| from |URSingleTensor| and return
   its reference. */
 FTensor &
 URSingleTensor::fold() const
 {
  return *(new FRSingleTensor(*this));
 }
 // |FRSingleTensor| conversion from unfolded
 /* The conversion from unfolded |URSingleTensor| to folded
   |FRSingleTensor| is completely the same as conversion from |URTensor|
   to |FRTensor|, only we do not copy rows but elements. */
 FRSingleTensor::FRSingleTensor(const URSingleTensor &ut)
  : FRTensor(1, ut.nvar(), ut.dimen())
 {
  zeros();
  for (index in = ut.begin(); in != ut.end(); ++in)
    {
      IntSequence vtmp(in.getCoor());
      vtmp.sort();
      index tar(this, vtmp);
      get(*tar, 0) += ut.get(*in, 0);
    }
 }
--- a/dynare++/tl/cc/rfs_tensor.cweb
+++ b/dynare++/tl/cc/rfs_tensor.cweb
@ -1,205 +0,0 @@
@q $Id: rfs_tensor.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt rfs\_tensor.cpp} file.
@c
 #include "rfs_tensor.h"
 #include "kron_prod.h"
 #include "tl_exception.h"
@<|FRTensor| conversion from unfolded@>;
@<|FRTensor::unfold| code@>;
@<|FRTensor::increment| code@>;
@<|FRTensor::decrement| code@>;
@<|URTensor| conversion from folded@>;
@<|URTensor::fold| code@>;
@<|URTensor| increment and decrement@>;
@<|URTensor::getOffset| code@>;
@<|URSingleTensor| constructor 1 code@>;
@<|URSingleTensor| constructor 2 code@>;
@<|URSingleTensor::fold| code@>;
@<|FRSingleTensor| conversion from unfolded@>;
@ The conversion from unfolded to folded sums up all data from
 unfolded corresponding to one folded index. So we go through all the
 rows in the unfolded tensor |ut|, make an index of the folded tensor
 by sorting the coordinates, and add the row.
@<|FRTensor| conversion from unfolded@>=
 FRTensor::FRTensor(const URTensor& ut)
 	: FTensor(along_row, IntSequence(ut.dimen(), ut.nvar()),
 			  FFSTensor::calcMaxOffset(ut.nvar(), ut.dimen()), ut.ncols(),
 			  ut.dimen()),
 	  nv(ut.nvar())
 {
 	zeros();
 	for (index in = ut.begin(); in != ut.end(); ++in) {
 		IntSequence vtmp(in.getCoor());
 		vtmp.sort();
 		index tar(this, vtmp);
 		addRow(ut, *in, *tar);
 	}
 }
@ Here just make a new instance and return the reference.
@<|FRTensor::unfold| code@>=
 UTensor& FRTensor::unfold() const
 {
 	return *(new URTensor(*this));
 }
@ Incrementing is easy. The same as for |FFSTensor|.
@<|FRTensor::increment| code@>=
 void FRTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in FRTensor::increment");
 	UTensor::increment(v, nv);
 	v.monotone();
 }
@ Decrement calls static |FTensor::decrement|.
@<|FRTensor::decrement| code@>=
 void FRTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in FRTensor::decrement");
 	FTensor::decrement(v, nv);
 }
@ Here we convert folded full symmetry tensor to unfolded. We copy all
 columns of folded tensor to unfolded and leave other columns
 (duplicates) zero. In this way, if the unfolded tensor is folded back,
 we should get the same data.
@<|URTensor| conversion from folded@>=
 URTensor::URTensor(const FRTensor& ft)
 	: UTensor(along_row, IntSequence(ft.dimen(), ft.nvar()),
 			  UFSTensor::calcMaxOffset(ft.nvar(), ft.dimen()), ft.ncols(),
 			  ft.dimen()),
 	  nv(ft.nvar())
 {
 	zeros();
 	for (index src = ft.begin(); src != ft.end(); ++src) {
 		index in(this, src.getCoor());
 		copyRow(ft, *src, *in);
 	}
 }
@ Here we just return a reference to new instance of folded tensor.
@<|URTensor::fold| code@>=
 FTensor& URTensor::fold() const
 {
 	return *(new FRTensor(*this));
 }
@ Here we just call |UTensor| respective static methods.
@<|URTensor| increment and decrement@>=
 void URTensor::increment(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in URTensor::increment");
 	UTensor::increment(v, nv);
 }
 void URTensor::decrement(IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input/output vector size in URTensor::decrement");
 	UTensor::decrement(v, nv);
 }
@ 
@<|URTensor::getOffset| code@>=
 int URTensor::getOffset(const IntSequence& v) const
 {
 	TL_RAISE_IF(v.size() != dimen(),
 				"Wrong input vector size in URTensor::getOffset");
 	return UTensor::getOffset(v, nv);
 }
@ Here we construct $v_1\otimes v_2\otimes\ldots\otimes v_n$, where
 $v_1,v_2,\ldots,v_n$ are stored in |vector<ConstVector>|.
@<|URSingleTensor| constructor 1 code@>=
 URSingleTensor::URSingleTensor(const vector<ConstVector>& cols)
 	: URTensor(1, cols[0].length(), cols.size())
 {
 	if (dimen() == 1) {
 		getData() = cols[0];
 		return;
 	}
 	Vector* last = new Vector(cols[cols.size()-1]);
 	for (int i = cols.size()-2; i > 0; i--) {
 		Vector* newlast = new Vector(Tensor::power(nvar(), cols.size()-i));
 		KronProd::kronMult(cols[i], ConstVector(*last), *newlast);
 		delete last;
 		last = newlast;
 	}
 	KronProd::kronMult(cols[0], ConstVector(*last), getData());
 	delete last;
 }
@ Here we construct $v\otimes\ldots\otimes v$, where the number of $v$
 copies is |d|.
@<|URSingleTensor| constructor 2 code@>=
 URSingleTensor::URSingleTensor(const ConstVector& v, int d)
 	: URTensor(1, v.length(), d)
 {
 	if (d == 1) {
 		getData() = v;
 		return;
 	}
 	Vector* last = new Vector(v);
 	for (int i = d-2; i > 0; i--) {
 		Vector* newlast = new Vector(last->length()*v.length());
 		KronProd::kronMult(v, ConstVector(*last), *newlast);
 		delete last;
 		last = newlast;
 	}
 	KronProd::kronMult(v, ConstVector(*last), getData());
 	delete last;
 }
@ Here we construct |FRSingleTensor| from |URSingleTensor| and return
 its reference.
@<|URSingleTensor::fold| code@>=
 FTensor& URSingleTensor::fold() const
 {
 	return *(new FRSingleTensor(*this));
 }
@ The conversion from unfolded |URSingleTensor| to folded
 |FRSingleTensor| is completely the same as conversion from |URTensor|
 to |FRTensor|, only we do not copy rows but elements.
@<|FRSingleTensor| conversion from unfolded@>=
 FRSingleTensor::FRSingleTensor(const URSingleTensor& ut)
 	: FRTensor(1, ut.nvar(), ut.dimen())
 {
 	zeros();
 	for (index in = ut.begin(); in != ut.end(); ++in) {
 		IntSequence vtmp(in.getCoor());
 		vtmp.sort();
 		index tar(this, vtmp);
 		get(*tar, 0) += ut.get(*in, 0);
 	}
 }
@ End of {\tt rfs\_tensor.cpp} file.
--- a/dynare++/tl/cc/rfs_tensor.hh
+++ b/dynare++/tl/cc/rfs_tensor.hh
@ -0,0 +1,173 @@
 // Copyright 2004, Ondra Kamenik
 // Row-wise full symmetry tensor.
 /* Here we define classes for full symmetry tensors with the
   multidimensional index identified with rows. The primary usage is for
   storage of data coming from (or from a sum of)
   $$\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
   where $\alpha$ coming from a multidimensional index go through some
   set $S$ and $c$ is some equivalence. So we model a tensor of the form:
   $$\left[\prod_{m=1}^l
   \left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
   \right]_S^{\gamma_1\ldots\gamma_l}$$
   Since all $\gamma_1,\ldots,\gamma_l$ correspond to the same variable,
   the tensor is fully symmetric.  The set of indices $S$ cannot be very
   large and sometimes it is only one element. This case is handled in a
   special subclass.
   We provide both folded and unfolded versions. Their logic is perfectly
   the same as in |UFSTensor| and |FFSTensor| with two exceptions. One
   has been already mentioned, the multidimensional index is along the
   rows. The second are conversions between the two types. Since this
   kind of tensor is used to multiply (from the right) a tensor whose
   multidimensional index is identified with columns, we will need a
   different way of a conversion. If the multiplication of two folded
   tensors is to be equivalent with multiplication of two unfolded, the
   folding of the right tensor must sum all equivalent elements since
   they are multiplied with the same number from the folded
   tensor. (Equivalent here means all elements of unfolded tensor
   corresponding to one element in folded tensor.) For this reason, it is
   necessary to calculate a column number from the given sequence, so we
   implement |getOffset|. Process of unfolding is not used, so we
   implemented it so that unfolding and then folding a tensor would yield
   the same data. */
 #ifndef RFS_TENSOR_H
 #define RFS_TENSOR_H
 #include "tensor.hh"
 #include "fs_tensor.hh"
 #include "symmetry.hh"
 /* This is straightforward and very similar to |UFSTensor|. */
 class FRTensor;
 class URTensor : public UTensor
 {
  int nv;
 public:
  URTensor(int c, int nvar, int d)
    : UTensor(along_row, IntSequence(d, nvar),
              UFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)
  {
  }
  URTensor(const URTensor &ut)
    : UTensor(ut), nv(ut.nv)
  {
  }
  URTensor(const FRTensor &ft);
  virtual ~URTensor()
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  FTensor&fold() const;
  int getOffset(const IntSequence &v) const;
  int
  nvar() const
  {
    return nv;
  }
  Symmetry
  getSym() const
  {
    return Symmetry(dimen());
  }
 };
 /* This is straightforward and very similar to |FFSTensor|. */
 class FRTensor : public FTensor
 {
  int nv;
 public:
  FRTensor(int c, int nvar, int d)
    : FTensor(along_row, IntSequence(d, nvar),
              FFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)
  {
  }
  FRTensor(const FRTensor &ft)
    : FTensor(ft), nv(ft.nv)
  {
  }
  FRTensor(const URTensor &ut);
  virtual ~FRTensor()
  {
  }
  void increment(IntSequence &v) const;
  void decrement(IntSequence &v) const;
  UTensor&unfold() const;
  int
  nvar() const
  {
    return nv;
  }
  int
  getOffset(const IntSequence &v) const
  {
    return FTensor::getOffset(v, nv);
  }
  Symmetry
  getSym() const
  {
    return Symmetry(dimen());
  }
 };
 /* The following class represents specialization of |URTensor| coming
   from Kronecker multiplication of a few vectors. So the resulting
   row-oriented tensor has one column. We provide two constructors,
   one constructs the tensor from a few vectors stored as
   |vector<ConstVector>|. The second makes the Kronecker power of one
   given vector. */
 class URSingleTensor : public URTensor
 {
 public:
  URSingleTensor(int nvar, int d)
    : URTensor(1, nvar, d)
  {
  }
  URSingleTensor(const vector<ConstVector> &cols);
  URSingleTensor(const ConstVector &v, int d);
  URSingleTensor(const URSingleTensor &ut)
    : URTensor(ut)
  {
  }
  virtual ~URSingleTensor()
  {
  }
  FTensor&fold() const;
 };
 /* This class represents one column row-oriented tensor. The only way
   how to construct it is from the |URSingleTensor| or from the
   scratch. The folding algorithm is the same as folding of general
   |URTensor|. Only its implementation is different, since we do not copy
   rows, but only elements. */
 class FRSingleTensor : public FRTensor
 {
 public:
  FRSingleTensor(int nvar, int d)
    : FRTensor(1, nvar, d)
  {
  }
  FRSingleTensor(const URSingleTensor &ut);
  FRSingleTensor(const FRSingleTensor &ft)
    : FRTensor(ft)
  {
  }
  virtual ~FRSingleTensor()
  {
  }
 };
 #endif
--- a/dynare++/tl/cc/rfs_tensor.hweb
+++ b/dynare++/tl/cc/rfs_tensor.hweb
@ -1,148 +0,0 @@
@q $Id: rfs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Row-wise full symmetry tensor. Start of {\tt rfs\_tensor.h} file.
 Here we define classes for full symmetry tensors with the
 multidimensional index identified with rows. The primary usage is for
 storage of data coming from (or from a sum of)
 $$\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
 where $\alpha$ coming from a multidimensional index go through some
 set $S$ and $c$ is some equivalence. So we model a tensor of the form:
 $$\left[\prod_{m=1}^l
 \left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
 \right]_S^{\gamma_1\ldots\gamma_l}$$
 Since all $\gamma_1,\ldots,\gamma_l$ correspond to the same variable,
 the tensor is fully symmetric.  The set of indices $S$ cannot be very
 large and sometimes it is only one element. This case is handled in a
 special subclass.
 We provide both folded and unfolded versions. Their logic is perfectly
 the same as in |UFSTensor| and |FFSTensor| with two exceptions. One
 has been already mentioned, the multidimensional index is along the
 rows. The second are conversions between the two types. Since this
 kind of tensor is used to multiply (from the right) a tensor whose
 multidimensional index is identified with columns, we will need a
 different way of a conversion. If the multiplication of two folded
 tensors is to be equivalent with multiplication of two unfolded, the
 folding of the right tensor must sum all equivalent elements since
 they are multiplied with the same number from the folded
 tensor. (Equivalent here means all elements of unfolded tensor
 corresponding to one element in folded tensor.) For this reason, it is
 necessary to calculate a column number from the given sequence, so we
 implement |getOffset|. Process of unfolding is not used, so we
 implemented it so that unfolding and then folding a tensor would yield
 the same data.
@c
 #ifndef RFS_TENSOR_H
 #define RFS_TENSOR_H
 #include "tensor.h"
 #include "fs_tensor.h"
 #include "symmetry.h"
@<|URTensor| class declaration@>;
@<|FRTensor| class declaration@>;
@<|URSingleTensor| class declaration@>;
@<|FRSingleTensor| class declaration@>;
 #endif
@ This is straightforward and very similar to |UFSTensor|.
@<|URTensor| class declaration@>=
 class FRTensor;
 class URTensor : public UTensor {
 	int nv;
 public:@;
 	@<|URTensor| constructor declaration@>;
 	virtual ~URTensor()@+ {}
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	FTensor& fold() const;
 	int getOffset(const IntSequence& v) const;
 	int nvar() const
 		{@+ return nv;@+}
 	Symmetry getSym() const
 		{@+ return Symmetry(dimen());@+}
 };
@ 
@<|URTensor| constructor declaration@>=
 	URTensor(int c, int nvar, int d)
 		: UTensor(along_row, IntSequence(d, nvar),
 				  UFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)@+ {}
 	URTensor(const URTensor& ut)
 		: UTensor(ut), nv(ut.nv)@+ {}
 	URTensor(const FRTensor& ft);
@ This is straightforward and very similar to |FFSTensor|.
@<|FRTensor| class declaration@>=
 class FRTensor : public FTensor {
 	int nv;
 public:@;
    @<|FRTensor| constructor declaration@>;
 	virtual ~FRTensor()@+ {}
 	void increment(IntSequence& v) const;
 	void decrement(IntSequence& v) const;
 	UTensor& unfold() const;
 	int nvar() const
 		{@+ return nv;@+}
 	int getOffset(const IntSequence& v) const
 		{@+ return FTensor::getOffset(v, nv);@+}
 	Symmetry getSym() const
 		{@+ return Symmetry(dimen());@+}
 };
@ 
@<|FRTensor| constructor declaration@>=
 	FRTensor(int c, int nvar, int d)
 		: FTensor(along_row, IntSequence(d, nvar),
 				  FFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)@+ {}
 	FRTensor(const FRTensor& ft)
 		: FTensor(ft), nv(ft.nv)@+ {}
 	FRTensor(const URTensor& ut);
@ The following class represents specialization of |URTensor| coming
 from Kronecker multiplication of a few vectors. So the resulting
 row-oriented tensor has one column. We provide two constructors,
 one constructs the tensor from a few vectors stored as
 |vector<ConstVector>|. The second makes the Kronecker power of one
 given vector.
@<|URSingleTensor| class declaration@>=
 class URSingleTensor : public URTensor {
 public:@;
 	URSingleTensor(int nvar, int d)
 		: URTensor(1, nvar, d)@+ {}
 	URSingleTensor(const vector<ConstVector>& cols);
 	URSingleTensor(const ConstVector& v, int d);
 	URSingleTensor(const URSingleTensor& ut)
 		: URTensor(ut)@+ {}
 	virtual ~URSingleTensor()@+ {}
 	FTensor& fold() const;
 };
@ This class represents one column row-oriented tensor. The only way
 how to construct it is from the |URSingleTensor| or from the
 scratch. The folding algorithm is the same as folding of general
 |URTensor|. Only its implementation is different, since we do not copy
 rows, but only elements.
@<|FRSingleTensor| class declaration@>=
 class FRSingleTensor : public FRTensor {
 public:@;
 	FRSingleTensor(int nvar, int d)
 		: FRTensor(1, nvar, d)@+ {}
 	FRSingleTensor(const URSingleTensor& ut);
 	FRSingleTensor(const FRSingleTensor& ft)
 		: FRTensor(ft)@+ {}
 	virtual ~FRSingleTensor()@+ {}
 };
@ End of {\tt rfs\_tensor.h} file.
--- a/dynare++/tl/cc/sparse_tensor.cc
+++ b/dynare++/tl/cc/sparse_tensor.cc
@ -0,0 +1,248 @@
 // Copyright 2004, Ondra Kamenik
 #include "sparse_tensor.hh"
 #include "fs_tensor.hh"
 #include "tl_exception.hh"
 #include <cmath>
 /* This is straightforward. Before we insert anything, we do a few
   checks. Then we reset |first_nz_row| and |last_nz_row| if necessary. */
 void
 SparseTensor::insert(const IntSequence &key, int r, double c)
 {
  TL_RAISE_IF(r < 0 || r >= nr,
              "Row number out of dimension of tensor in SparseTensor::insert");
  TL_RAISE_IF(key.size() != dimen(),
              "Wrong length of key in SparseTensor::insert");
  TL_RAISE_IF(!std::isfinite(c),
              "Insertion of non-finite value in SparseTensor::insert");
  iterator first_pos = m.lower_bound(key);
  // check that pair |key| and |r| is unique
  iterator last_pos = m.upper_bound(key);
  for (iterator it = first_pos; it != last_pos; ++it)
    if ((*it).second.first == r)
      {
        TL_RAISE("Duplicate <key, r> insertion in SparseTensor::insert");
        return;
      }
  m.insert(first_pos, Map::value_type(key, Item(r, c)));
  if (first_nz_row > r)
    first_nz_row = r;
  if (last_nz_row < r)
    last_nz_row = r;
 }
 /* This returns true if all items are finite (not Nan nor Inf). */
 bool
 SparseTensor::isFinite() const
 {
  bool res = true;
  const_iterator run = m.begin();
  while (res && run != m.end())
    {
      if (!std::isfinite((*run).second.second))
        res = false;
      ++run;
    }
  return res;
 }
 /* This returns a ratio of a number of non-zero columns in folded
   tensor to the total number of columns. */
 double
 SparseTensor::getFoldIndexFillFactor() const
 {
  int cnt = 0;
  const_iterator start_col = m.begin();
  while (start_col != m.end())
    {
      cnt++;
      const IntSequence &key = (*start_col).first;
      start_col = m.upper_bound(key);
    }
  return ((double) cnt)/ncols();
 }
 /* This returns a ratio of a number of non-zero columns in unfolded
   tensor to the total number of columns. */
 double
 SparseTensor::getUnfoldIndexFillFactor() const
 {
  int cnt = 0;
  const_iterator start_col = m.begin();
  while (start_col != m.end())
    {
      const IntSequence &key = (*start_col).first;
      Symmetry s(key);
      cnt += Tensor::noverseq(s);
      start_col = m.upper_bound(key);
    }
  return ((double) cnt)/ncols();
 }
 /* This prints the fill factor and all items. */
 void
 SparseTensor::print() const
 {
  printf("Fill: %3.2f %%\n", 100*getFillFactor());
  const_iterator start_col = m.begin();
  while (start_col != m.end())
    {
      const IntSequence &key = (*start_col).first;
      printf("Column: "); key.print();
      const_iterator end_col = m.upper_bound(key);
      int cnt = 1;
      for (const_iterator run = start_col; run != end_col; ++run, cnt++)
        {
          if ((cnt/7)*7 == cnt)
            printf("\n");
          printf("%d(%6.2g)  ", (*run).second.first, (*run).second.second);
        }
      printf("\n");
      start_col = end_col;
    }
 }
 FSSparseTensor::FSSparseTensor(int d, int nvar, int r)
  : SparseTensor(d, r, FFSTensor::calcMaxOffset(nvar, d)),
    nv(nvar), sym(d)
 {
 }
 FSSparseTensor::FSSparseTensor(const FSSparseTensor &t)
  : SparseTensor(t),
    nv(t.nvar()), sym(t.sym)
 {
 }
 void
 FSSparseTensor::insert(const IntSequence &key, int r, double c)
 {
  TL_RAISE_IF(!key.isSorted(),
              "Key is not sorted in FSSparseTensor::insert");
  TL_RAISE_IF(key[key.size()-1] >= nv || key[0] < 0,
              "Wrong value of the key in FSSparseTensor::insert");
  SparseTensor::insert(key, r, c);
 }
 /* We go through the tensor |t| which is supposed to have single
   column. If the item of |t| is nonzero, we make a key by sorting the
   index, and then we go through all items having the same key (it is its
   column), obtain the row number and the element, and do the
   multiplication.
   The test for non-zero is |a != 0.0|, since there will be items which
   are exact zeros.
   I have also tried to make the loop through the sparse tensor outer, and
   find index of tensor |t| within the loop. Surprisingly, it is little
   slower (for monomial tests with probability of zeros equal 0.3). But
   everything depends how filled is the sparse tensor. */
 void
 FSSparseTensor::multColumnAndAdd(const Tensor &t, Vector &v) const
 {
  // check compatibility of input parameters
  TL_RAISE_IF(v.length() != nrows(),
              "Wrong size of output vector in FSSparseTensor::multColumnAndAdd");
  TL_RAISE_IF(t.dimen() != dimen(),
              "Wrong dimension of tensor in FSSparseTensor::multColumnAndAdd");
  TL_RAISE_IF(t.ncols() != 1,
              "The input tensor is not single-column in FSSparseTensor::multColumnAndAdd");
  for (Tensor::index it = t.begin(); it != t.end(); ++it)
    {
      int ind = *it;
      double a = t.get(ind, 0);
      if (a != 0.0)
        {
          IntSequence key(it.getCoor());
          key.sort();
          // check that |key| is within the range
          TL_RAISE_IF(key[0] < 0 || key[key.size()-1] >= nv,
                      "Wrong coordinates of index in FSSparseTensor::multColumnAndAdd");
          const_iterator first_pos = m.lower_bound(key);
          const_iterator last_pos = m.upper_bound(key);
          for (const_iterator cit = first_pos; cit != last_pos; ++cit)
            {
              int r = (*cit).second.first;
              double c = (*cit).second.second;
              v[r] += c * a;
            }
        }
    }
 }
 void
 FSSparseTensor::print() const
 {
  printf("FS Sparse tensor: dim=%d, nv=%d, (%dx%d)\n", dim, nv, nr, nc);
  SparseTensor::print();
 }
 // |GSSparseTensor| slicing constructor
 /* This is the same as |@<|FGSTensor| slicing from |FSSparseTensor|@>|. */
 GSSparseTensor::GSSparseTensor(const FSSparseTensor &t, const IntSequence &ss,
                               const IntSequence &coor, const TensorDimens &td)
  : SparseTensor(td.dimen(), t.nrows(), td.calcFoldMaxOffset()),
    tdims(td)
 {
  // set |lb| and |ub| to lower and upper bounds of slice indices
  /* This is the same as |@<set |lb| and |ub| to lower and upper bounds
     of indices@>| in {\tt gs\_tensor.cpp}, see that file for details. */
  IntSequence s_offsets(ss.size(), 0);
  for (int i = 1; i < ss.size(); i++)
    s_offsets[i] = s_offsets[i-1] + ss[i-1];
  IntSequence lb(coor.size());
  IntSequence ub(coor.size());
  for (int i = 0; i < coor.size(); i++)
    {
      lb[i] = s_offsets[coor[i]];
      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
    }
  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
    {
      if (lb.lessEq((*run).first) && (*run).first.lessEq(ub))
        {
          IntSequence c((*run).first);
          c.add(-1, lb);
          insert(c, (*run).second.first, (*run).second.second);
        }
    }
 }
 void
 GSSparseTensor::insert(const IntSequence &s, int r, double c)
 {
  TL_RAISE_IF(!s.less(tdims.getNVX()),
              "Wrong coordinates of index in GSSparseTensor::insert");
  SparseTensor::insert(s, r, c);
 }
 void
 GSSparseTensor::print() const
 {
  printf("GS Sparse tensor: (%dx%d)\nSymmetry: ", nr, nc);
  tdims.getSym().print();
  printf("NVS: ");
  tdims.getNVS().print();
  SparseTensor::print();
 }
--- a/dynare++/tl/cc/sparse_tensor.cweb
+++ b/dynare++/tl/cc/sparse_tensor.cweb
@ -1,274 +0,0 @@
@q $Id: sparse_tensor.cweb 1258 2007-05-11 13:59:10Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt sparse\_tensor.cpp} file.
@c
 #include "sparse_tensor.h"
 #include "fs_tensor.h"
 #include "tl_exception.h"
 #include <cmath>
@<|SparseTensor::insert| code@>;
@<|SparseTensor::isFinite| code@>;
@<|SparseTensor::getFoldIndexFillFactor| code@>;
@<|SparseTensor::getUnfoldIndexFillFactor| code@>;
@<|SparseTensor::print| code@>;
@<|FSSparseTensor| constructor code@>;
@<|FSSparseTensor| copy constructor code@>;
@<|FSSparseTensor::insert| code@>;
@<|FSSparseTensor::multColumnAndAdd| code@>;
@<|FSSparseTensor::print| code@>;
@<|GSSparseTensor| slicing constructor@>;
@<|GSSparseTensor::insert| code@>;
@<|GSSparseTensor::print| code@>;
@ This is straightforward. Before we insert anything, we do a few
 checks. Then we reset |first_nz_row| and |last_nz_row| if necessary.
@<|SparseTensor::insert| code@>=
 void SparseTensor::insert(const IntSequence& key, int r, double c)
 {
 	TL_RAISE_IF(r < 0 || r >= nr,
 				"Row number out of dimension of tensor in SparseTensor::insert");
 	TL_RAISE_IF(key.size() != dimen(),
 				"Wrong length of key in SparseTensor::insert");
 	TL_RAISE_IF(! std::isfinite(c),
 				"Insertion of non-finite value in SparseTensor::insert");
 	iterator first_pos = m.lower_bound(key);
 	@<check that pair |key| and |r| is unique@>;
 	m.insert(first_pos, Map::value_type(key, Item(r,c)));
 	if (first_nz_row > r)
 		first_nz_row = r;
 	if (last_nz_row < r)
 		last_nz_row = r;
 }
@ 
@<check that pair |key| and |r| is unique@>=
 	iterator last_pos = m.upper_bound(key);
 	for (iterator it = first_pos; it != last_pos; ++it)
 		if ((*it).second.first == r) {
 			TL_RAISE("Duplicate <key, r> insertion in SparseTensor::insert");
 			return;
 		}
@ This returns true if all items are finite (not Nan nor Inf).
@<|SparseTensor::isFinite| code@>=
 bool SparseTensor::isFinite() const
 {
 	bool res = true;
 	const_iterator run = m.begin();
 	while (res && run != m.end()) {
 		if (! std::isfinite((*run).second.second))
 			res = false;
 		++run;
 	}
 	return res;
 }
@ This returns a ratio of a number of non-zero columns in folded
 tensor to the total number of columns.
@<|SparseTensor::getFoldIndexFillFactor| code@>=
 double SparseTensor::getFoldIndexFillFactor() const
 {
 	int cnt = 0;
 	const_iterator start_col = m.begin();
 	while (start_col != m.end()) {
 		cnt++;
 		const IntSequence& key = (*start_col).first;
 		start_col = m.upper_bound(key);
 	}
 	return ((double)cnt)/ncols();
 }
@ This returns a ratio of a number of non-zero columns in unfolded
 tensor to the total number of columns.
@<|SparseTensor::getUnfoldIndexFillFactor| code@>=
 double SparseTensor::getUnfoldIndexFillFactor() const
 {
 	int cnt = 0;
 	const_iterator start_col = m.begin();
 	while (start_col != m.end()) {
 		const IntSequence& key = (*start_col).first;
 		Symmetry s(key);
 		cnt += Tensor::noverseq(s);
 		start_col = m.upper_bound(key);
 	}
 	return ((double)cnt)/ncols();
 }
@ This prints the fill factor and all items.
@<|SparseTensor::print| code@>=
 void SparseTensor::print() const
 {
 	printf("Fill: %3.2f %%\n", 100*getFillFactor());
 	const_iterator start_col = m.begin();
 	while (start_col != m.end()) {
 		const IntSequence& key = (*start_col).first;
 		printf("Column: ");key.print();
 		const_iterator end_col = m.upper_bound(key);
 		int cnt = 1;
 		for (const_iterator run = start_col; run != end_col; ++run, cnt++) {
 			if ((cnt/7)*7 == cnt)
 				printf("\n");
 			printf("%d(%6.2g)  ", (*run).second.first, (*run).second.second);
 		}
 		printf("\n");
 		start_col = end_col;
 	}
 }
@ 
@<|FSSparseTensor| constructor code@>=
 FSSparseTensor::FSSparseTensor(int d, int nvar, int r)
 	: SparseTensor(d, r, FFSTensor::calcMaxOffset(nvar, d)),
 	  nv(nvar), sym(d)
 {}
@ 
@<|FSSparseTensor| copy constructor code@>=
 FSSparseTensor::FSSparseTensor(const FSSparseTensor& t)
 	: SparseTensor(t),
 	  nv(t.nvar()), sym(t.sym)
 {}
@ 
@<|FSSparseTensor::insert| code@>=
 void FSSparseTensor::insert(const IntSequence& key, int r, double c)
 {
 	TL_RAISE_IF(!key.isSorted(),
 				"Key is not sorted in FSSparseTensor::insert");
 	TL_RAISE_IF(key[key.size()-1] >= nv || key[0] < 0,
 				"Wrong value of the key in FSSparseTensor::insert"); 
 	SparseTensor::insert(key, r, c);
 }
@ We go through the tensor |t| which is supposed to have single
 column. If the item of |t| is nonzero, we make a key by sorting the
 index, and then we go through all items having the same key (it is its
 column), obtain the row number and the element, and do the
 multiplication.
 The test for non-zero is |a != 0.0|, since there will be items which
 are exact zeros.
 I have also tried to make the loop through the sparse tensor outer, and
 find index of tensor |t| within the loop. Surprisingly, it is little
 slower (for monomial tests with probability of zeros equal 0.3). But
 everything depends how filled is the sparse tensor.
@<|FSSparseTensor::multColumnAndAdd| code@>=
 void FSSparseTensor::multColumnAndAdd(const Tensor& t, Vector& v) const
 {
 	@<check compatibility of input parameters@>;
 	for (Tensor::index it = t.begin(); it != t.end(); ++it) {
 		int ind = *it;
 		double a = t.get(ind, 0); 
 		if (a != 0.0) {
 			IntSequence key(it.getCoor());
 			key.sort();
 			@<check that |key| is within the range@>;
 			const_iterator first_pos = m.lower_bound(key);
 			const_iterator last_pos = m.upper_bound(key);
 			for (const_iterator cit = first_pos; cit != last_pos; ++cit) {
 				int r = (*cit).second.first;
 				double c = (*cit).second.second;
 				v[r] += c * a;
 			}
 		}
 	}
 }
@ 
@<check compatibility of input parameters@>=
 	TL_RAISE_IF(v.length() != nrows(),
 				"Wrong size of output vector in FSSparseTensor::multColumnAndAdd");
 	TL_RAISE_IF(t.dimen() != dimen(),
 				"Wrong dimension of tensor in FSSparseTensor::multColumnAndAdd");
 	TL_RAISE_IF(t.ncols() != 1,
 				"The input tensor is not single-column in FSSparseTensor::multColumnAndAdd");
@ 
@<check that |key| is within the range@>=
 	TL_RAISE_IF(key[0] < 0 || key[key.size()-1] >= nv,
 				"Wrong coordinates of index in FSSparseTensor::multColumnAndAdd");
@ 
@<|FSSparseTensor::print| code@>=
 void FSSparseTensor::print() const
 {
 	printf("FS Sparse tensor: dim=%d, nv=%d, (%dx%d)\n", dim, nv, nr, nc);
 	SparseTensor::print();
 }
@ This is the same as |@<|FGSTensor| slicing from |FSSparseTensor|@>|. 
@<|GSSparseTensor| slicing constructor@>=
 GSSparseTensor::GSSparseTensor(const FSSparseTensor& t, const IntSequence& ss,
 							   const IntSequence& coor, const TensorDimens& td)
 	: SparseTensor(td.dimen(), t.nrows(), td.calcFoldMaxOffset()),
 	  tdims(td)
 {
 	@<set |lb| and |ub| to lower and upper bounds of slice indices@>;
 	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
 	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
 	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
 		if (lb.lessEq((*run).first) && (*run).first.lessEq(ub)) {
 			IntSequence c((*run).first);
 			c.add(-1, lb);
 			insert(c, (*run).second.first, (*run).second.second);
 		}
 	}
 }
@ This is the same as |@<set |lb| and |ub| to lower and upper bounds
 of indices@>| in {\tt gs\_tensor.cpp}, see that file for details.
@<set |lb| and |ub| to lower and upper bounds of slice indices@>=
 	IntSequence s_offsets(ss.size(), 0);
 	for (int i = 1; i < ss.size(); i++)
 		s_offsets[i] = s_offsets[i-1] + ss[i-1];
 	IntSequence lb(coor.size());
 	IntSequence ub(coor.size());
 	for (int i = 0; i < coor.size(); i++) {
 		lb[i] = s_offsets[coor[i]];
 		ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
 	}
@ 
@<|GSSparseTensor::insert| code@>=
 void GSSparseTensor::insert(const IntSequence& s, int r, double c)
 {
 	TL_RAISE_IF(! s.less(tdims.getNVX()),
 				"Wrong coordinates of index in GSSparseTensor::insert");
 	SparseTensor::insert(s, r, c);
 }
@ 
@<|GSSparseTensor::print| code@>=
 void GSSparseTensor::print() const
 {
 	printf("GS Sparse tensor: (%dx%d)\nSymmetry: ", nr, nc);
 	tdims.getSym().print();
 	printf("NVS: ");
 	tdims.getNVS().print();
 	SparseTensor::print();
 }
@ End of {\tt sparse\_tensor.cpp} file.
--- a/dynare++/tl/cc/sparse_tensor.hh
+++ b/dynare++/tl/cc/sparse_tensor.hh
@ -0,0 +1,187 @@
 // Copyright 2004, Ondra Kamenik
 // Sparse tensor.
 /* Here we declare a sparse full and general symmetry tensors with the
   multidimensional index along columns. We implement them as a |multimap|
   associating to each sequence of coordinates |IntSequence| a set of
   pairs (row, number). This is very convenient but not optimal in terms
   of memory consumption. So the implementation can be changed.
   The current |multimap| implementation allows insertions.  Another
   advantage of this approach is that we do not need to calculate column
   numbers from the |IntSequence|, since the column is accessed directly
   via the key which is |IntSequence|.
   The only operation we need to do with the full symmetry sparse tensor
   is a left multiplication of a row oriented single column tensor. The
   result of such operation is a column of the same size as the sparse
   tensor. Other important operations are slicing operations. We need to
   do sparse and dense slices of full symmetry sparse tensors. In fact,
   the only constructor of general symmetry sparse tensor is slicing from
   the full symmetry sparse. */
 #ifndef SPARSE_TENSOR_H
 #define SPARSE_TENSOR_H
 #include "symmetry.hh"
 #include "tensor.hh"
 #include "gs_tensor.hh"
 #include "Vector.h"
 #include <map>
 using namespace std;
 // |ltseq| predicate
 struct ltseq
 {
  bool
  operator()(const IntSequence &s1, const IntSequence &s2) const
  {
    return s1 < s2;
  }
 };
 /* This is a super class of both full symmetry and general symmetry
   sparse tensors. It contains a |multimap| and implements insertions. It
   tracks maximum and minimum row, for which there is an item. */
 class SparseTensor
 {
 public:
  typedef pair<int, double> Item;
  typedef multimap<IntSequence, Item, ltseq> Map;
  typedef Map::const_iterator const_iterator;
 protected:
  typedef Map::iterator iterator;
  Map m;
  const int dim;
  const int nr;
  const int nc;
  int first_nz_row;
  int last_nz_row;
 public:
  SparseTensor(int d, int nnr, int nnc)
    : dim(d), nr(nnr), nc(nnc), first_nz_row(nr), last_nz_row(-1)
  {
  }
  SparseTensor(const SparseTensor &t)
    : m(t.m), dim(t.dim), nr(t.nr), nc(t.nc)
  {
  }
  virtual ~SparseTensor()
  {
  }
  void insert(const IntSequence &s, int r, double c);
  const Map &
  getMap() const
  {
    return m;
  }
  int
  dimen() const
  {
    return dim;
  }
  int
  nrows() const
  {
    return nr;
  }
  int
  ncols() const
  {
    return nc;
  }
  double
  getFillFactor() const
  {
    return ((double) m.size())/(nrows()*ncols());
  }
  double getFoldIndexFillFactor() const;
  double getUnfoldIndexFillFactor() const;
  int
  getNumNonZero() const
  {
    return m.size();
  }
  int
  getFirstNonZeroRow() const
  {
    return first_nz_row;
  }
  int
  getLastNonZeroRow() const
  {
    return last_nz_row;
  }
  virtual const Symmetry&getSym() const = 0;
  void print() const;
  bool isFinite() const;
 };
 /* This is a full symmetry sparse tensor. It implements
   |multColumnAndAdd| and in addition to |sparseTensor|, it has |nv|
   (number of variables), and symmetry (basically it is a dimension). */
 class FSSparseTensor : public SparseTensor
 {
 public:
  typedef SparseTensor::const_iterator const_iterator;
 private:
  const int nv;
  const Symmetry sym;
 public:
  FSSparseTensor(int d, int nvar, int r);
  FSSparseTensor(const FSSparseTensor &t);
  void insert(const IntSequence &s, int r, double c);
  void multColumnAndAdd(const Tensor &t, Vector &v) const;
  const Symmetry &
  getSym() const
  {
    return sym;
  }
  int
  nvar() const
  {
    return nv;
  }
  void print() const;
 };
 /* This is a general symmetry sparse tensor. It has |TensorDimens| and
   can be constructed as a slice of the full symmetry sparse tensor. The
   slicing constructor takes the same form as the slicing |FGSTensor|
   constructor from full symmetry sparse tensor. */
 class GSSparseTensor : public SparseTensor
 {
 public:
  typedef SparseTensor::const_iterator const_iterator;
 private:
  const TensorDimens tdims;
 public:
  GSSparseTensor(const FSSparseTensor &t, const IntSequence &ss,
                 const IntSequence &coor, const TensorDimens &td);
  GSSparseTensor(const GSSparseTensor &t)
    : SparseTensor(t), tdims(t.tdims)
  {
  }
  void insert(const IntSequence &s, int r, double c);
  const Symmetry &
  getSym() const
  {
    return tdims.getSym();
  }
  const TensorDimens &
  getDims() const
  {
    return tdims;
  }
  void print() const;
 };
 #endif
--- a/dynare++/tl/cc/sparse_tensor.hweb
+++ b/dynare++/tl/cc/sparse_tensor.hweb
@ -1,154 +0,0 @@
@q $Id: sparse_tensor.hweb 522 2005-11-25 15:45:54Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Sparse tensor. Start of {\tt sparse\_tensor.h} file.
 Here we declare a sparse full and general symmetry tensors with the
 multidimensional index along columns. We implement them as a |multimap|
 associating to each sequence of coordinates |IntSequence| a set of
 pairs (row, number). This is very convenient but not optimal in terms
 of memory consumption. So the implementation can be changed.
 The current |multimap| implementation allows insertions.  Another
 advantage of this approach is that we do not need to calculate column
 numbers from the |IntSequence|, since the column is accessed directly
 via the key which is |IntSequence|.
 The only operation we need to do with the full symmetry sparse tensor
 is a left multiplication of a row oriented single column tensor. The
 result of such operation is a column of the same size as the sparse
 tensor. Other important operations are slicing operations. We need to
 do sparse and dense slices of full symmetry sparse tensors. In fact,
 the only constructor of general symmetry sparse tensor is slicing from
 the full symmetry sparse.
@s SparseTensor int
@s FSSparseTensor int
@s GSSparseTensor int
@c 
 #ifndef SPARSE_TENSOR_H
 #define SPARSE_TENSOR_H
 #include "symmetry.h"
 #include "tensor.h"
 #include "gs_tensor.h"
 #include "Vector.h"
 #include <map>
 using namespace std;
@<|ltseq| predicate@>;
@<|SparseTensor| class declaration@>;
@<|FSSparseTensor| class declaration@>;
@<|GSSparseTensor| class declaration@>;
 #endif
@ 
@<|ltseq| predicate@>=
 struct ltseq {
 	bool operator()(const IntSequence& s1, const IntSequence& s2) const
 		{@+ return s1 < s2;@+}
 };
@ This is a super class of both full symmetry and general symmetry
 sparse tensors. It contains a |multimap| and implements insertions. It
 tracks maximum and minimum row, for which there is an item.
@<|SparseTensor| class declaration@>=
 class SparseTensor {
 public:@;
 	typedef pair<int, double> Item;
 	typedef multimap<IntSequence, Item, ltseq> Map;
 	typedef Map::const_iterator const_iterator;
 protected:@;
 	typedef Map::iterator iterator;
 	Map m;
 	const int dim;
 	const int nr;
 	const int nc;
 	int first_nz_row;
 	int last_nz_row;
 public:@;
 	SparseTensor(int d, int nnr, int nnc)
 		: dim(d), nr(nnr), nc(nnc), first_nz_row(nr), last_nz_row(-1) @+{}
 	SparseTensor(const SparseTensor& t)
 		: m(t.m), dim(t.dim), nr(t.nr), nc(t.nc) @+{}
 	virtual ~SparseTensor() @+{}
 	void insert(const IntSequence& s, int r, double c);
 	const Map& getMap() const
 		{@+ return m;@+}
 	int dimen() const
 		{@+ return dim;@+}
 	int nrows() const
 		{@+ return nr;@+}
 	int ncols() const
 		{@+ return nc;@+}
 	double getFillFactor() const
 		{@+ return ((double)m.size())/(nrows()*ncols());@+}
 	double getFoldIndexFillFactor() const;
 	double getUnfoldIndexFillFactor() const;
 	int getNumNonZero() const
 		{@+ return m.size();@+}
 	int getFirstNonZeroRow() const
 		{@+ return first_nz_row;@+}
 	int getLastNonZeroRow() const
 		{@+ return last_nz_row;@+}
 	virtual const Symmetry& getSym() const =0;
 	void print() const;
 	bool isFinite() const;
 }
@ This is a full symmetry sparse tensor. It implements
 |multColumnAndAdd| and in addition to |sparseTensor|, it has |nv|
 (number of variables), and symmetry (basically it is a dimension).
@<|FSSparseTensor| class declaration@>=
 class FSSparseTensor : public SparseTensor {
 public:@;
 	typedef SparseTensor::const_iterator const_iterator;
 private:@;
 	const int nv;
 	const Symmetry sym; 
 public:@;
 	FSSparseTensor(int d, int nvar, int r);
 	FSSparseTensor(const FSSparseTensor& t);
 	void insert(const IntSequence& s, int r, double c);
 	void multColumnAndAdd(const Tensor& t, Vector& v) const;
 	const Symmetry& getSym() const
 		{@+ return sym;@+}
 	int nvar() const
 		{@+ return nv;@+}
 	void print() const;
 };
@ This is a general symmetry sparse tensor. It has |TensorDimens| and
 can be constructed as a slice of the full symmetry sparse tensor. The
 slicing constructor takes the same form as the slicing |FGSTensor|
 constructor from full symmetry sparse tensor.
@<|GSSparseTensor| class declaration@>=
 class GSSparseTensor : public SparseTensor {
 public:@;
 	typedef SparseTensor::const_iterator const_iterator;
 private:@;
 	const TensorDimens tdims;
 public:@;
 	GSSparseTensor(const FSSparseTensor& t, const IntSequence& ss,
 				   const IntSequence& coor, const TensorDimens& td);
 	GSSparseTensor(const GSSparseTensor& t)
 		: SparseTensor(t), tdims(t.tdims) @+{}
 	void insert(const IntSequence& s, int r, double c);
 	const Symmetry& getSym() const
 		{@+ return tdims.getSym();@+}
 	const TensorDimens& getDims() const
 		{@+ return tdims;@+}
 	void print() const;
 };
@ End of {\tt sparse\_tensor.h} file.
--- a/dynare++/tl/cc/stack_container.cc
+++ b/dynare++/tl/cc/stack_container.cc
@ -0,0 +1,662 @@
 // Copyright 2004, Ondra Kamenik
 #include "stack_container.hh"
 #include "pyramid_prod2.hh"
 #include "ps_tensor.hh"
 double FoldedStackContainer::fill_threshold = 0.00005;
 double UnfoldedStackContainer::fill_threshold = 0.00005;
 // |FoldedStackContainer::multAndAdd| sparse code
 /* Here we multiply the sparse tensor with the
   |FoldedStackContainer|. We have four implementations,
   |multAndAddSparse1|, |multAndAddSparse2|, |multAndAddSparse3|, and
   |multAndAddSparse4|.  The third is not threaded yet and I expect that
   it is certainly the slowest. The |multAndAddSparse4| exploits the
   sparsity, however, it seems to be still worse than |multAndAddSparse2|
   even for really sparse matrices. On the other hand, it can be more
   efficient than |multAndAddSparse2| for large problems, since it does
   not need that much of memory and can avoid much swapping. Very
   preliminary examination shows that |multAndAddSparse2| is the best in
   terms of time. */
 void
 FoldedStackContainer::multAndAdd(const FSSparseTensor &t,
                                 FGSTensor &out) const
 {
  TL_RAISE_IF(t.nvar() != getAllSize(),
              "Wrong number of variables of tensor for FoldedStackContainer::multAndAdd");
  multAndAddSparse2(t, out);
 }
 // |FoldedStackContainer::multAndAdd| dense code
 /* Here we perform the Faa Di Bruno step for a given dimension |dim|, and for
   the dense fully symmetric tensor which is scattered in the container
   of general symmetric tensors. The implementation is pretty the same as
   |@<|UnfoldedStackContainer::multAndAdd| dense code@>|. */
 void
 FoldedStackContainer::multAndAdd(int dim, const FGSContainer &c, FGSTensor &out) const
 {
  TL_RAISE_IF(c.num() != numStacks(),
              "Wrong symmetry length of container for FoldedStackContainer::multAndAdd");
  THREAD_GROUP gr;
  SymmetrySet ss(dim, c.num());
  for (symiterator si(ss); !si.isEnd(); ++si)
    {
      if (c.check(*si))
        {
          THREAD *worker = new WorkerFoldMAADense(*this, *si, c, out);
          gr.insert(worker);
        }
    }
  gr.run();
 }
 /* This is analogous to |@<|WorkerUnfoldMAADense::operator()()|
   code@>|. */
 void
 WorkerFoldMAADense::operator()()
 {
  Permutation iden(dense_cont.num());
  IntSequence coor(sym, iden.getMap());
  const FGSTensor *g = dense_cont.get(sym);
  cont.multAndAddStacks(coor, *g, out, &out);
 }
 WorkerFoldMAADense::WorkerFoldMAADense(const FoldedStackContainer &container,
                                       const Symmetry &s,
                                       const FGSContainer &dcontainer,
                                       FGSTensor &outten)
  : cont(container), sym(s), dense_cont(dcontainer), out(outten)
 {
 }
 /* This is analogous to |@<|UnfoldedStackContainer::multAndAddSparse1|
   code@>|. */
 void
 FoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
                                        FGSTensor &out) const
 {
  THREAD_GROUP gr;
  UFSTensor dummy(0, numStacks(), t.dimen());
  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
    {
      THREAD *worker = new WorkerFoldMAASparse1(*this, t, out, ui.getCoor());
      gr.insert(worker);
    }
  gr.run();
 }
 /* This is analogous to |@<|WorkerUnfoldMAASparse1::operator()()| code@>|.
   The only difference is that instead of |UPSTensor| as a
   result of multiplication of unfolded tensor and tensors from
   containers, we have |FPSTensor| with partially folded permuted
   symmetry.
   todo: make slice vertically narrowed according to the fill of t,
   vertically narrow out accordingly. */
 void
 WorkerFoldMAASparse1::operator()()
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  const PermutationSet &pset = tls.pbundle->get(t.dimen());
  Permutation iden(t.dimen());
  UPSTensor slice(t, cont.getStackSizes(), coor,
                  PerTensorDimens(cont.getStackSizes(), coor));
  for (int iper = 0; iper < pset.getNum(); iper++)
    {
      const Permutation &per = pset.get(iper);
      IntSequence percoor(coor.size());
      per.apply(coor, percoor);
      for (EquivalenceSet::const_iterator it = eset.begin();
           it != eset.end(); ++it)
        {
          if ((*it).numClasses() == t.dimen())
            {
              StackProduct<FGSTensor> sp(cont, *it, out.getSym());
              if (!sp.isZero(percoor))
                {
                  KronProdStack<FGSTensor> kp(sp, percoor);
                  kp.optimizeOrder();
                  const Permutation &oper = kp.getPer();
                  if (Permutation(oper, per) == iden)
                    {
                      FPSTensor fps(out.getDims(), *it, slice, kp);
                      {
                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
                        fps.addTo(out);
                      }
                    }
                }
            }
        }
    }
 }
 WorkerFoldMAASparse1::WorkerFoldMAASparse1(const FoldedStackContainer &container,
                                           const FSSparseTensor &ten,
                                           FGSTensor &outten, const IntSequence &c)
  : cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle))
 {
 }
 /* Here is the second implementation of sparse folded |multAndAdd|. It
   is pretty similar to implementation of
   |@<|UnfoldedStackContainer::multAndAddSparse2| code@>|. We make a
   dense folded |slice|, and then call folded |multAndAddStacks|, which
   multiplies all the combinations compatible with the slice. */
 void
 FoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
                                        FGSTensor &out) const
 {
  THREAD_GROUP gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
    {
      THREAD *worker = new WorkerFoldMAASparse2(*this, t, out, fi.getCoor());
      gr.insert(worker);
    }
  gr.run();
 }
 /* Here we make a sparse slice first and then call |multAndAddStacks|
   if the slice is not empty. If the slice is really sparse, we call
   sparse version of |multAndAddStacks|. What means ``really sparse'' is
   given by |fill_threshold|. It is not tuned yet, a practice shows that
   it must be a really low number, since sparse |multAndAddStacks| is
   much slower than the dense version.
   Further, we take only nonzero rows of the slice, and accordingly of
   the out tensor. We jump over zero initial rows and drop zero tailing
   rows. */
 void
 WorkerFoldMAASparse2::operator()()
 {
  GSSparseTensor slice(t, cont.getStackSizes(), coor,
                       TensorDimens(cont.getStackSizes(), coor));
  if (slice.getNumNonZero())
    {
      if (slice.getUnfoldIndexFillFactor() > FoldedStackContainer::fill_threshold)
        {
          FGSTensor dense_slice(slice);
          int r1 = slice.getFirstNonZeroRow();
          int r2 = slice.getLastNonZeroRow();
          FGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
          FGSTensor out1(r1, r2-r1+1, out);
          cont.multAndAddStacks(coor, dense_slice1, out1, &out);
        }
      else
        cont.multAndAddStacks(coor, slice, out, &out);
    }
 }
 WorkerFoldMAASparse2::WorkerFoldMAASparse2(const FoldedStackContainer &container,
                                           const FSSparseTensor &ten,
                                           FGSTensor &outten, const IntSequence &c)
  : cont(container), t(ten), out(outten), coor(c)
 {
 }
 /* Here is the third implementation of the sparse folded
   |multAndAdd|. It is column-wise implementation, and thus is not a good
   candidate for the best performer.
   We go through all columns from the output. For each column we
   calculate folded |sumcol| which is a sum of all appropriate columns
   for all suitable equivalences. So we go through all suitable
   equivalences, for each we construct a |StackProduct| object and
   construct |IrregTensor| for a corresponding column of $z$. The
   |IrregTensor| is an abstraction for Kronecker multiplication of
   stacked columns of the two containers without zeros. Then the column
   is added to |sumcol|. Finally, the |sumcol| is multiplied by the
   sparse tensor. */
 void
 FoldedStackContainer::multAndAddSparse3(const FSSparseTensor &t,
                                        FGSTensor &out) const
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  for (Tensor::index run = out.begin(); run != out.end(); ++run)
    {
      Vector outcol(out, *run);
      FRSingleTensor sumcol(t.nvar(), t.dimen());
      sumcol.zeros();
      for (EquivalenceSet::const_iterator it = eset.begin();
           it != eset.end(); ++it)
        {
          if ((*it).numClasses() == t.dimen())
            {
              StackProduct<FGSTensor> sp(*this, *it, out.getSym());
              IrregTensorHeader header(sp, run.getCoor());
              IrregTensor irten(header);
              irten.addTo(sumcol);
            }
        }
      t.multColumnAndAdd(sumcol, outcol);
    }
 }
 /* Here is the fourth implementation of sparse
   |FoldedStackContainer::multAndAdd|. It is almost equivalent to
   |multAndAddSparse2| with the exception that the |FPSTensor| as a
   result of a product of a slice and Kronecker product of the stack
   derivatives is calculated in the sparse fashion. For further details, see
   |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and
   |@<|FPSTensor| sparse constructor@>|. */
 void
 FoldedStackContainer::multAndAddSparse4(const FSSparseTensor &t, FGSTensor &out) const
 {
  THREAD_GROUP gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
    {
      THREAD *worker = new WorkerFoldMAASparse4(*this, t, out, fi.getCoor());
      gr.insert(worker);
    }
  gr.run();
 }
 /* The |WorkerFoldMAASparse4| is the same as |WorkerFoldMAASparse2|
   with the exception that we call a sparse version of
   |multAndAddStacks|. */
 void
 WorkerFoldMAASparse4::operator()()
 {
  GSSparseTensor slice(t, cont.getStackSizes(), coor,
                       TensorDimens(cont.getStackSizes(), coor));
  if (slice.getNumNonZero())
    cont.multAndAddStacks(coor, slice, out, &out);
 }
 WorkerFoldMAASparse4::WorkerFoldMAASparse4(const FoldedStackContainer &container,
                                           const FSSparseTensor &ten,
                                           FGSTensor &outten, const IntSequence &c)
  : cont(container), t(ten), out(outten), coor(c)
 {
 }
 // |FoldedStackContainer::multAndAddStacks| dense code
 /* This is almost the same as
   |@<|UnfoldedStackContainer::multAndAddStacks| code@>|. The only
   difference is that we do not construct a |UPSTensor| from
   |KronProdStack|, but we construct partially folded permuted
   symmetry |FPSTensor|. Note that the tensor |g| must be unfolded
   in order to be able to multiply with unfolded rows of Kronecker
   product. However, columns of such a product are partially
   folded giving a rise to the |FPSTensor|. */
 void
 FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
                                       const FGSTensor &g,
                                       FGSTensor &out, const void *ad) const
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  UGSTensor ug(g);
  UFSTensor dummy_u(0, numStacks(), g.dimen());
  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
    {
      IntSequence tmp(ui.getCoor());
      tmp.sort();
      if (tmp == coor)
        {
          Permutation sort_per(ui.getCoor());
          sort_per.inverse();
          for (EquivalenceSet::const_iterator it = eset.begin();
               it != eset.end(); ++it)
            {
              if ((*it).numClasses() == g.dimen())
                {
                  StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
                  if (!sp.isZero(coor))
                    {
                      KronProdStack<FGSTensor> kp(sp, coor);
                      if (ug.getSym().isFull())
                        kp.optimizeOrder();
                      FPSTensor fps(out.getDims(), *it, sort_per, ug, kp);
                      {
                        SYNCHRO syn(ad, "multAndAddStacks");
                        fps.addTo(out);
                      }
                    }
                }
            }
        }
    }
 }
 // |FoldedStackContainer::multAndAddStacks| sparse code
 /* This is almost the same as
   |@<|FoldedStackContainer::multAndAddStacks| dense code@>|. The only
   difference is that the Kronecker product of the stacks is multiplied
   with sparse slice |GSSparseTensor| (not dense slice |FGSTensor|). The
   multiplication is done in |@<|FPSTensor| sparse constructor@>|. */
 void
 FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
                                       const GSSparseTensor &g,
                                       FGSTensor &out, const void *ad) const
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  UFSTensor dummy_u(0, numStacks(), g.dimen());
  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
    {
      IntSequence tmp(ui.getCoor());
      tmp.sort();
      if (tmp == coor)
        {
          Permutation sort_per(ui.getCoor());
          sort_per.inverse();
          for (EquivalenceSet::const_iterator it = eset.begin();
               it != eset.end(); ++it)
            {
              if ((*it).numClasses() == g.dimen())
                {
                  StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
                  if (!sp.isZero(coor))
                    {
                      KronProdStack<FGSTensor> kp(sp, coor);
                      FPSTensor fps(out.getDims(), *it, sort_per, g, kp);
                      {
                        SYNCHRO syn(ad, "multAndAddStacks");
                        fps.addTo(out);
                      }
                    }
                }
            }
        }
    }
 }
 // |UnfoldedStackContainer::multAndAdd| sparse code
 /*  Here we simply call either |multAndAddSparse1| or
    |multAndAddSparse2|. The first one allows for optimization of
    Kronecker products, so it seems to be more efficient. */
 void
 UnfoldedStackContainer::multAndAdd(const FSSparseTensor &t,
                                   UGSTensor &out) const
 {
  TL_RAISE_IF(t.nvar() != getAllSize(),
              "Wrong number of variables of tensor for UnfoldedStackContainer::multAndAdd");
  multAndAddSparse2(t, out);
 }
 // |UnfoldedStackContainer::multAndAdd| dense code
 /* Here we implement the formula for stacks for fully symmetric tensor
   scattered in a number of general symmetry tensors contained in a given
   container. The implementations is pretty the same as in
   |multAndAddSparse2| but we do not do the slices of sparse tensor, but
   only a lookup to the container.
   This means that we do not iterate through a dummy folded tensor to
   obtain folded coordinates of stacks, rather we iterate through all
   symmetries contained in the container and the coordinates of stacks
   are obtained as unfolded identity sequence via the symmetry. The
   reason of doing this is that we are unable to calculate symmetry from
   stack coordinates as easily as stack coordinates from the symmetry. */
 void
 UnfoldedStackContainer::multAndAdd(int dim, const UGSContainer &c,
                                   UGSTensor &out) const
 {
  TL_RAISE_IF(c.num() != numStacks(),
              "Wrong symmetry length of container for UnfoldedStackContainer::multAndAdd");
  THREAD_GROUP gr;
  SymmetrySet ss(dim, c.num());
  for (symiterator si(ss); !si.isEnd(); ++si)
    {
      if (c.check(*si))
        {
          THREAD *worker = new WorkerUnfoldMAADense(*this, *si, c, out);
          gr.insert(worker);
        }
    }
  gr.run();
 }
 void
 WorkerUnfoldMAADense::operator()()
 {
  Permutation iden(dense_cont.num());
  IntSequence coor(sym, iden.getMap());
  const UGSTensor *g = dense_cont.get(sym);
  cont.multAndAddStacks(coor, *g, out, &out);
 }
 WorkerUnfoldMAADense::WorkerUnfoldMAADense(const UnfoldedStackContainer &container,
                                           const Symmetry &s,
                                           const UGSContainer &dcontainer,
                                           UGSTensor &outten)
  : cont(container), sym(s), dense_cont(dcontainer), out(outten)
 {
 }
 /* Here we implement the formula for unfolded tensors. If, for instance,
   a coordinate $z$ of a tensor $\left[f_{z^2}\right]$ is partitioned as
   $z=[a, b]$, then we perform the following:
   $$
   \eqalign{
   \left[f_{z^2}\right]\left(\sum_c\left[\matrix{a_{c(x)}\cr b_{c(y)}}\right]
   \otimes\left[\matrix{a_{c(y)}\cr b_{c(y)}}\right]\right)=&
   \left[f_{aa}\right]\left(\sum_ca_{c(x)}\otimes a_{c(y)}\right)+
   \left[f_{ab}\right]\left(\sum_ca_{c(x)}\otimes b_{c(y)}\right)+\cr
   &\left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)+
   \left[f_{bb}\right]\left(\sum_cb_{c(x)}\otimes b_{c(y)}\right)\cr
   }
   $$
   This is exactly what happens here. The code is clear. It goes through
   all combinations of stacks, and each thread is responsible for
   operation for the slice corresponding to the combination of the stacks. */
 void
 UnfoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
                                          UGSTensor &out) const
 {
  THREAD_GROUP gr;
  UFSTensor dummy(0, numStacks(), t.dimen());
  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
    {
      THREAD *worker = new WorkerUnfoldMAASparse1(*this, t, out, ui.getCoor());
      gr.insert(worker);
    }
  gr.run();
 }
 /* This does a step of |@<|UnfoldedStackContainer::multAndAddSparse1| code@>| for
   a given coordinates. First it makes the slice of the given stack coordinates.
   Then it multiplies everything what should be multiplied with the slice.
   That is it goes through all equivalences, creates |StackProduct|, then
   |KronProdStack|, which is added to |out|. So far everything is clear.
   However, we want to use optimized |KronProdAllOptim| to minimize
   a number of flops and memory needed in the Kronecker product. So we go
   through all permutations |per|, permute the coordinates to get
   |percoor|, go through all equivalences, and make |KronProdStack| and
   optimize it. The result of optimization is a permutation |oper|. Now,
   we multiply the Kronecker product with the slice, only if the slice
   has the same ordering of coordinates as the Kronecker product
   |KronProdStack|. However, it is not perfectly true. Since we go
   through {\bf all} permutations |per|, there might be two different
   permutations leading to the same ordering in |KronProdStack| and thus
   the same ordering in the optimized |KronProdStack|. The two cases
   would be counted twice, which is wrong. That is why we do not
   condition on $\hbox{coor}\circ\hbox{oper}\circ\hbox{per} =
   \hbox{coor}$, but we condition on
   $\hbox{oper}\circ\hbox{per}=\hbox{id}$. In this way, we rule out
   permutations |per| leading to the same ordering of stacks when
   applied on |coor|.
   todo: vertically narrow slice and out according to the fill in t. */
 void
 WorkerUnfoldMAASparse1::operator()()
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  const PermutationSet &pset = tls.pbundle->get(t.dimen());
  Permutation iden(t.dimen());
  UPSTensor slice(t, cont.getStackSizes(), coor,
                  PerTensorDimens(cont.getStackSizes(), coor));
  for (int iper = 0; iper < pset.getNum(); iper++)
    {
      const Permutation &per = pset.get(iper);
      IntSequence percoor(coor.size());
      per.apply(coor, percoor);
      for (EquivalenceSet::const_iterator it = eset.begin();
           it != eset.end(); ++it)
        {
          if ((*it).numClasses() == t.dimen())
            {
              StackProduct<UGSTensor> sp(cont, *it, out.getSym());
              if (!sp.isZero(percoor))
                {
                  KronProdStack<UGSTensor> kp(sp, percoor);
                  kp.optimizeOrder();
                  const Permutation &oper = kp.getPer();
                  if (Permutation(oper, per) == iden)
                    {
                      UPSTensor ups(out.getDims(), *it, slice, kp);
                      {
                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
                        ups.addTo(out);
                      }
                    }
                }
            }
        }
    }
 }
 WorkerUnfoldMAASparse1::WorkerUnfoldMAASparse1(const UnfoldedStackContainer &container,
                                               const FSSparseTensor &ten,
                                               UGSTensor &outten, const IntSequence &c)
  : cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle))
 {
 }
 /* In here we implement the formula by a bit different way. We use the
   fact, using notation of |@<|UnfoldedStackContainer::multAndAddSparse2|
   code@>|, that
   $$
   \left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)=
   \left[f_{ab}\right]\left(\sum_ca_{c(y)}\otimes b_{c(b)}\right)\cdot P
   $$
   where $P$ is a suitable permutation of columns. The permutation
   corresponds to (in this example) a swap of $a$ and $b$. An advantage
   of this approach is that we do not need |UPSTensor| for $f_{ba}$, and
   thus we decrease the number of needed slices.
   So we go through all folded indices of stack coordinates, then for
   each such index |fi| we make a slice and call |multAndAddStacks|. This
   goes through all corresponding unfolded indices to perform the
   formula. Each unsorted (unfold) index implies a sorting permutation
   |sort_per| which must be used to permute stacks in |StackProduct|, and
   permute equivalence classes when |UPSTensor| is formed. In this way
   the column permutation $P$ from the formula is factored to the
   permutation of |UPSTensor|. */
 void
 UnfoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
                                          UGSTensor &out) const
 {
  THREAD_GROUP gr;
  FFSTensor dummy_f(0, numStacks(), t.dimen());
  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
    {
      THREAD *worker = new WorkerUnfoldMAASparse2(*this, t, out, fi.getCoor());
      gr.insert(worker);
    }
  gr.run();
 }
 /* This does a step of |@<|UnfoldedStackContainer::multAndAddSparse2| code@>| for
   a given coordinates.
   todo: implement |multAndAddStacks| for sparse slice as
   |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and do this method as
   |@<|WorkerFoldMAASparse2::operator()()| code@>|. */
 void
 WorkerUnfoldMAASparse2::operator()()
 {
  GSSparseTensor slice(t, cont.getStackSizes(), coor,
                       TensorDimens(cont.getStackSizes(), coor));
  if (slice.getNumNonZero())
    {
      FGSTensor fslice(slice);
      UGSTensor dense_slice(fslice);
      int r1 = slice.getFirstNonZeroRow();
      int r2 = slice.getLastNonZeroRow();
      UGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
      UGSTensor out1(r1, r2-r1+1, out);
      cont.multAndAddStacks(coor, dense_slice1, out1, &out);
    }
 }
 WorkerUnfoldMAASparse2::WorkerUnfoldMAASparse2(const UnfoldedStackContainer &container,
                                               const FSSparseTensor &ten,
                                               UGSTensor &outten, const IntSequence &c)
  : cont(container), t(ten), out(outten), coor(c)
 {
 }
 /* For a given unfolded coordinates of stacks |fi|, and appropriate
   tensor $g$, whose symmetry is a symmetry of |fi|, the method
   contributes to |out| all tensors in unfolded stack formula involving
   stacks chosen by |fi|.
   We go through all |ui| coordinates which yield |fi| after sorting. We
   construct a permutation |sort_per| which sorts |ui| to |fi|. We go
   through all appropriate equivalences, and construct |StackProduct|
   from equivalence classes permuted by |sort_per|, then |UPSTensor| with
   implied permutation of columns by the permuted equivalence by
   |sort_per|. The |UPSTensor| is then added to |out|.
   We cannot use here the optimized |KronProdStack|, since the symmetry
   of |UGSTensor& g| prescribes the ordering of the stacks. However, if
   |g| is fully symmetric, we can do the optimization harmlessly. */
 void
 UnfoldedStackContainer::multAndAddStacks(const IntSequence &fi,
                                         const UGSTensor &g,
                                         UGSTensor &out, const void *ad) const
 {
  const EquivalenceSet &eset = ebundle.get(out.dimen());
  UFSTensor dummy_u(0, numStacks(), g.dimen());
  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
    {
      IntSequence tmp(ui.getCoor());
      tmp.sort();
      if (tmp == fi)
        {
          Permutation sort_per(ui.getCoor());
          sort_per.inverse();
          for (EquivalenceSet::const_iterator it = eset.begin();
               it != eset.end(); ++it)
            {
              if ((*it).numClasses() == g.dimen())
                {
                  StackProduct<UGSTensor> sp(*this, *it, sort_per, out.getSym());
                  if (!sp.isZero(fi))
                    {
                      KronProdStack<UGSTensor> kp(sp, fi);
                      if (g.getSym().isFull())
                        kp.optimizeOrder();
                      UPSTensor ups(out.getDims(), *it, sort_per, g, kp);
                      {
                        SYNCHRO syn(ad, "multAndAddStacks");
                        ups.addTo(out);
                      }
                    }
                }
            }
        }
    }
 }
--- a/dynare++/tl/cc/stack_container.cweb
+++ b/dynare++/tl/cc/stack_container.cweb
@ -1,670 +0,0 @@
@q $Id: stack_container.cweb 1835 2008-05-19 01:54:48Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt stack\_container.cpp} file.
@c
 #include "stack_container.h"
 #include "pyramid_prod2.h"
 #include "ps_tensor.h"
 double FoldedStackContainer::fill_threshold = 0.00005;
 double UnfoldedStackContainer::fill_threshold = 0.00005;
@<|FoldedStackContainer::multAndAdd| sparse code@>;
@<|FoldedStackContainer::multAndAdd| dense code@>;
@<|WorkerFoldMAADense::operator()()| code@>;
@<|WorkerFoldMAADense| constructor code@>;
@<|FoldedStackContainer::multAndAddSparse1| code@>;
@<|WorkerFoldMAASparse1::operator()()| code@>;
@<|WorkerFoldMAASparse1| constructor code@>;
@<|FoldedStackContainer::multAndAddSparse2| code@>;
@<|WorkerFoldMAASparse2::operator()()| code@>;
@<|WorkerFoldMAASparse2| constructor code@>;
@<|FoldedStackContainer::multAndAddSparse3| code@>;
@<|FoldedStackContainer::multAndAddSparse4| code@>;
@<|WorkerFoldMAASparse4::operator()()| code@>;
@<|WorkerFoldMAASparse4| constructor code@>;
@<|FoldedStackContainer::multAndAddStacks| dense code@>;
@<|FoldedStackContainer::multAndAddStacks| sparse code@>;
@#
@<|UnfoldedStackContainer::multAndAdd| sparse code@>;
@<|UnfoldedStackContainer::multAndAdd| dense code@>;
@<|WorkerUnfoldMAADense::operator()()| code@>;
@<|WorkerUnfoldMAADense| constructor code@>;
@<|UnfoldedStackContainer::multAndAddSparse1| code@>;
@<|WorkerUnfoldMAASparse1::operator()()| code@>;
@<|WorkerUnfoldMAASparse1| constructor code@>;
@<|UnfoldedStackContainer::multAndAddSparse2| code@>;
@<|WorkerUnfoldMAASparse2::operator()()| code@>;
@<|WorkerUnfoldMAASparse2| constructor code@>;
@<|UnfoldedStackContainer::multAndAddStacks| code@>;
@ Here we multiply the sparse tensor with the
 |FoldedStackContainer|. We have four implementations,
 |multAndAddSparse1|, |multAndAddSparse2|, |multAndAddSparse3|, and
 |multAndAddSparse4|.  The third is not threaded yet and I expect that
 it is certainly the slowest. The |multAndAddSparse4| exploits the
 sparsity, however, it seems to be still worse than |multAndAddSparse2|
 even for really sparse matrices. On the other hand, it can be more
 efficient than |multAndAddSparse2| for large problems, since it does
 not need that much of memory and can avoid much swapping. Very
 preliminary examination shows that |multAndAddSparse2| is the best in
 terms of time.
@s FSSparseTensor int
@s IrregTensorHeader int
@s IrregTensor int
@<|FoldedStackContainer::multAndAdd| sparse code@>=
 void FoldedStackContainer::multAndAdd(const FSSparseTensor& t,
 									  FGSTensor& out) const
 {
 	TL_RAISE_IF(t.nvar() != getAllSize(),
 				"Wrong number of variables of tensor for FoldedStackContainer::multAndAdd");
 	multAndAddSparse2(t, out);
 }
@ Here we perform the Faa Di Bruno step for a given dimension |dim|, and for
 the dense fully symmetric tensor which is scattered in the container
 of general symmetric tensors. The implementation is pretty the same as
 |@<|UnfoldedStackContainer::multAndAdd| dense code@>|.
@<|FoldedStackContainer::multAndAdd| dense code@>=
 void FoldedStackContainer::multAndAdd(int dim, const FGSContainer& c, FGSTensor& out) const
 {
 	TL_RAISE_IF(c.num() != numStacks(),
 				"Wrong symmetry length of container for FoldedStackContainer::multAndAdd");
 	THREAD_GROUP@, gr;
 	SymmetrySet ss(dim, c.num());
 	for (symiterator si(ss); !si.isEnd(); ++si) {
 		if (c.check(*si)) {
 			THREAD* worker = new WorkerFoldMAADense(*this, *si, c, out);
 			gr.insert(worker);
 		}
 	}
 	gr.run();
 }
@ This is analogous to |@<|WorkerUnfoldMAADense::operator()()|
 code@>|.
@<|WorkerFoldMAADense::operator()()| code@>=
 void WorkerFoldMAADense::operator()()
 {
 	Permutation iden(dense_cont.num());
 	IntSequence coor(sym, iden.getMap());
 	const FGSTensor* g = dense_cont.get(sym);
 	cont.multAndAddStacks(coor, *g, out, &out);
 }
@ 
@<|WorkerFoldMAADense| constructor code@>=
 WorkerFoldMAADense::WorkerFoldMAADense(const FoldedStackContainer& container, 
 									   const Symmetry& s,
 									   const FGSContainer& dcontainer,
 									   FGSTensor& outten)
 	: cont(container), sym(s), dense_cont(dcontainer), out(outten)
 {}
@ This is analogous to |@<|UnfoldedStackContainer::multAndAddSparse1|
 code@>|.
@<|FoldedStackContainer::multAndAddSparse1| code@>=
 void FoldedStackContainer::multAndAddSparse1(const FSSparseTensor& t,
 											 FGSTensor& out) const
 {
 	THREAD_GROUP@, gr;
 	UFSTensor dummy(0, numStacks(), t.dimen());
 	for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui) {
 		THREAD* worker = new WorkerFoldMAASparse1(*this, t, out, ui.getCoor());
 		gr.insert(worker);
 	}
 	gr.run();
 }
@ This is analogous to |@<|WorkerUnfoldMAASparse1::operator()()| code@>|.
 The only difference is that instead of |UPSTensor| as a
 result of multiplication of unfolded tensor and tensors from
 containers, we have |FPSTensor| with partially folded permuted
 symmetry.
 todo: make slice vertically narrowed according to the fill of t,
 vertically narrow out accordingly.
@<|WorkerFoldMAASparse1::operator()()| code@>=
 void WorkerFoldMAASparse1::operator()()
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	const PermutationSet& pset = tls.pbundle->get(t.dimen());
 	Permutation iden(t.dimen());
 	UPSTensor slice(t, cont.getStackSizes(), coor,
 					PerTensorDimens(cont.getStackSizes(), coor));
 	for (int iper = 0; iper < pset.getNum(); iper++) {
 		const Permutation& per = pset.get(iper);
 		IntSequence percoor(coor.size());
 		per.apply(coor, percoor);
 		for (EquivalenceSet::const_iterator it = eset.begin();
 			 it != eset.end(); ++it) {
 			if ((*it).numClasses() == t.dimen()) {
 				StackProduct<FGSTensor> sp(cont, *it, out.getSym());
 				if (! sp.isZero(percoor)) {
 					KronProdStack<FGSTensor> kp(sp, percoor);
 					kp.optimizeOrder();
 					const Permutation& oper = kp.getPer();
 					if (Permutation(oper, per) == iden) {
 						FPSTensor fps(out.getDims(), *it, slice, kp);
 						{
 							SYNCHRO@, syn(&out, "WorkerUnfoldMAASparse1");
 							fps.addTo(out);
 						}
 					}
 				}
 			}
 		}
 	}
 }
@ 
@<|WorkerFoldMAASparse1| constructor code@>=
 WorkerFoldMAASparse1::WorkerFoldMAASparse1(const FoldedStackContainer& container,
 										   const FSSparseTensor& ten,
 										   FGSTensor& outten, const IntSequence& c)
 	: cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle)) @+{}
@ Here is the second implementation of sparse folded |multAndAdd|. It
 is pretty similar to implementation of
 |@<|UnfoldedStackContainer::multAndAddSparse2| code@>|. We make a
 dense folded |slice|, and then call folded |multAndAddStacks|, which
 multiplies all the combinations compatible with the slice.
@<|FoldedStackContainer::multAndAddSparse2| code@>=
 void FoldedStackContainer::multAndAddSparse2(const FSSparseTensor& t,
 											 FGSTensor& out) const
 {
 	THREAD_GROUP@, gr;
 	FFSTensor dummy_f(0, numStacks(), t.dimen());
 	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
 		THREAD* worker = new WorkerFoldMAASparse2(*this, t, out, fi.getCoor());
 		gr.insert(worker);
 	}
 	gr.run();
 }
@ Here we make a sparse slice first and then call |multAndAddStacks|
 if the slice is not empty. If the slice is really sparse, we call
 sparse version of |multAndAddStacks|. What means ``really sparse'' is
 given by |fill_threshold|. It is not tuned yet, a practice shows that
 it must be a really low number, since sparse |multAndAddStacks| is
 much slower than the dense version.
 Further, we take only nonzero rows of the slice, and accordingly of
 the out tensor. We jump over zero initial rows and drop zero tailing
 rows.
@<|WorkerFoldMAASparse2::operator()()| code@>=
 void WorkerFoldMAASparse2::operator()()
 {
 	GSSparseTensor slice(t, cont.getStackSizes(), coor,
 						 TensorDimens(cont.getStackSizes(), coor));
 	if (slice.getNumNonZero()) {
 		if (slice.getUnfoldIndexFillFactor() > FoldedStackContainer::fill_threshold) {
 			FGSTensor dense_slice(slice);
 			int r1 = slice.getFirstNonZeroRow();
 			int r2 = slice.getLastNonZeroRow();
 			FGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
 			FGSTensor out1(r1, r2-r1+1, out);
 			cont.multAndAddStacks(coor, dense_slice1, out1, &out);
 		} else
 			cont.multAndAddStacks(coor, slice, out, &out);
 	}
 }
@ 
@<|WorkerFoldMAASparse2| constructor code@>=
 WorkerFoldMAASparse2::WorkerFoldMAASparse2(const FoldedStackContainer& container,
 										   const FSSparseTensor& ten,
 										   FGSTensor& outten, const IntSequence& c)
 	: cont(container), t(ten), out(outten), coor(c)
 {}
@ Here is the third implementation of the sparse folded
 |multAndAdd|. It is column-wise implementation, and thus is not a good
 candidate for the best performer.
 We go through all columns from the output. For each column we
 calculate folded |sumcol| which is a sum of all appropriate columns
 for all suitable equivalences. So we go through all suitable
 equivalences, for each we construct a |StackProduct| object and
 construct |IrregTensor| for a corresponding column of $z$. The
 |IrregTensor| is an abstraction for Kronecker multiplication of
 stacked columns of the two containers without zeros. Then the column
 is added to |sumcol|. Finally, the |sumcol| is multiplied by the
 sparse tensor.
@<|FoldedStackContainer::multAndAddSparse3| code@>=
 void FoldedStackContainer::multAndAddSparse3(const FSSparseTensor& t,
 											 FGSTensor& out) const
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	for (Tensor::index run = out.begin(); run != out.end(); ++run) {
 		Vector outcol(out, *run);
 		FRSingleTensor sumcol(t.nvar(), t.dimen());
 		sumcol.zeros();
 		for (EquivalenceSet::const_iterator it = eset.begin();
 			 it != eset.end(); ++it) {
 			if ((*it).numClasses() == t.dimen()) {
 				StackProduct<FGSTensor> sp(*this, *it, out.getSym());
 				IrregTensorHeader header(sp, run.getCoor());
 				IrregTensor irten(header);
 				irten.addTo(sumcol);
 			}
 		}
 		t.multColumnAndAdd(sumcol, outcol);
 	}
 }
@ Here is the fourth implementation of sparse
 |FoldedStackContainer::multAndAdd|. It is almost equivalent to
 |multAndAddSparse2| with the exception that the |FPSTensor| as a
 result of a product of a slice and Kronecker product of the stack
 derivatives is calculated in the sparse fashion. For further details, see
 |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and
 |@<|FPSTensor| sparse constructor@>|.
@<|FoldedStackContainer::multAndAddSparse4| code@>=
 void FoldedStackContainer::multAndAddSparse4(const FSSparseTensor& t, FGSTensor& out) const
 {
 	THREAD_GROUP@, gr;
 	FFSTensor dummy_f(0, numStacks(), t.dimen());
 	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
 		THREAD* worker = new WorkerFoldMAASparse4(*this, t, out, fi.getCoor());
 		gr.insert(worker);
 	}
 	gr.run();
 }
@ The |WorkerFoldMAASparse4| is the same as |WorkerFoldMAASparse2|
 with the exception that we call a sparse version of
 |multAndAddStacks|.
@<|WorkerFoldMAASparse4::operator()()| code@>=
 void WorkerFoldMAASparse4::operator()()
 {
 	GSSparseTensor slice(t, cont.getStackSizes(), coor,
 						 TensorDimens(cont.getStackSizes(), coor)); 
 	if (slice.getNumNonZero())
 		cont.multAndAddStacks(coor, slice, out, &out);
 }
@ 
@<|WorkerFoldMAASparse4| constructor code@>=
 WorkerFoldMAASparse4::WorkerFoldMAASparse4(const FoldedStackContainer& container,
 										   const FSSparseTensor& ten,
 										   FGSTensor& outten, const IntSequence& c)
 	: cont(container), t(ten), out(outten), coor(c)
 {}
@ This is almost the same as
 |@<|UnfoldedStackContainer::multAndAddStacks| code@>|. The only
 difference is that we do not construct a |UPSTensor| from
 |KronProdStack|, but we construct partially folded permuted
 symmetry |FPSTensor|. Note that the tensor |g| must be unfolded
 in order to be able to multiply with unfolded rows of Kronecker
 product. However, columns of such a product are partially
 folded giving a rise to the |FPSTensor|.
@<|FoldedStackContainer::multAndAddStacks| dense code@>=
 void FoldedStackContainer::multAndAddStacks(const IntSequence& coor,
 											const FGSTensor& g,
 											FGSTensor& out, const void* ad) const
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	UGSTensor ug(g);
 	UFSTensor dummy_u(0, numStacks(), g.dimen());
 	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
 		IntSequence tmp(ui.getCoor());
 		tmp.sort();
 		if (tmp == coor) {
 			Permutation sort_per(ui.getCoor());
 			sort_per.inverse();
 			for (EquivalenceSet::const_iterator it = eset.begin();
 				 it != eset.end(); ++it) {
 				if ((*it).numClasses() == g.dimen()) {
 					StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
 					if (! sp.isZero(coor)) {
 						KronProdStack<FGSTensor> kp(sp, coor);
 						if (ug.getSym().isFull())
 							kp.optimizeOrder();
 						FPSTensor fps(out.getDims(), *it, sort_per, ug, kp);
 						{
 							SYNCHRO@, syn(ad, "multAndAddStacks");
 							fps.addTo(out);
 						}
 					}
 				}
 			}
 		}
 	}
 }
@ This is almost the same as
 |@<|FoldedStackContainer::multAndAddStacks| dense code@>|. The only
 difference is that the Kronecker product of the stacks is multiplied
 with sparse slice |GSSparseTensor| (not dense slice |FGSTensor|). The
 multiplication is done in |@<|FPSTensor| sparse constructor@>|.
@<|FoldedStackContainer::multAndAddStacks| sparse code@>=
 void FoldedStackContainer::multAndAddStacks(const IntSequence& coor,
 											const GSSparseTensor& g,
 											FGSTensor& out, const void* ad) const
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	UFSTensor dummy_u(0, numStacks(), g.dimen());
 	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
 		IntSequence tmp(ui.getCoor());
 		tmp.sort();
 		if (tmp == coor) {
 			Permutation sort_per(ui.getCoor());
 			sort_per.inverse();
 			for (EquivalenceSet::const_iterator it = eset.begin();
 				 it != eset.end(); ++it) {
 				if ((*it).numClasses() == g.dimen()) {
 					StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
 					if (! sp.isZero(coor)) {
 						KronProdStack<FGSTensor> kp(sp, coor);
 						FPSTensor fps(out.getDims(), *it, sort_per, g, kp);
 						{
 							SYNCHRO@, syn(ad, "multAndAddStacks");
 							fps.addTo(out);
 						}
 					}
 				}
 			}
 		}
 	}
 }
@ Here we simply call either |multAndAddSparse1| or
 |multAndAddSparse2|. The first one allows for optimization of
 Kronecker products, so it seems to be more efficient.
@<|UnfoldedStackContainer::multAndAdd| sparse code@>=
 void UnfoldedStackContainer::multAndAdd(const FSSparseTensor& t,
 										UGSTensor& out) const
 {
 	TL_RAISE_IF(t.nvar() != getAllSize(),
 				"Wrong number of variables of tensor for UnfoldedStackContainer::multAndAdd");
 	multAndAddSparse2(t, out);
 }
@ Here we implement the formula for stacks for fully symmetric tensor
 scattered in a number of general symmetry tensors contained in a given
 container. The implementations is pretty the same as in
 |multAndAddSparse2| but we do not do the slices of sparse tensor, but
 only a lookup to the container.
 This means that we do not iterate through a dummy folded tensor to
 obtain folded coordinates of stacks, rather we iterate through all
 symmetries contained in the container and the coordinates of stacks
 are obtained as unfolded identity sequence via the symmetry. The
 reason of doing this is that we are unable to calculate symmetry from
 stack coordinates as easily as stack coordinates from the symmetry.
@<|UnfoldedStackContainer::multAndAdd| dense code@>=
 void UnfoldedStackContainer::multAndAdd(int dim, const UGSContainer& c,
 										UGSTensor& out) const
 {
 	TL_RAISE_IF(c.num() != numStacks(),
 				"Wrong symmetry length of container for UnfoldedStackContainer::multAndAdd");
 	THREAD_GROUP@, gr;
 	SymmetrySet ss(dim, c.num());
 	for (symiterator si(ss); !si.isEnd(); ++si) {
 		if (c.check(*si)) {
 			THREAD* worker = new WorkerUnfoldMAADense(*this, *si, c, out);
 			gr.insert(worker);
 		}
 	}
 	gr.run();
 }
@ 
@<|WorkerUnfoldMAADense::operator()()| code@>=
 void WorkerUnfoldMAADense::operator()()
 {
 	Permutation iden(dense_cont.num());
 	IntSequence coor(sym, iden.getMap());
 	const UGSTensor* g = dense_cont.get(sym);
 	cont.multAndAddStacks(coor, *g, out, &out);
 }
@ 
@<|WorkerUnfoldMAADense| constructor code@>=
 WorkerUnfoldMAADense::WorkerUnfoldMAADense(const UnfoldedStackContainer& container,
 										   const Symmetry& s,
 										   const UGSContainer& dcontainer,
 										   UGSTensor& outten)
 	: cont(container), sym(s), dense_cont(dcontainer), out(outten)@+ {}
@ Here we implement the formula for unfolded tensors. If, for instance,
 a coordinate $z$ of a tensor $\left[f_{z^2}\right]$ is partitioned as
 $z=[a, b]$, then we perform the following:
 $$
 \eqalign{
 \left[f_{z^2}\right]\left(\sum_c\left[\matrix{a_{c(x)}\cr b_{c(y)}}\right]
 \otimes\left[\matrix{a_{c(y)}\cr b_{c(y)}}\right]\right)=&
 \left[f_{aa}\right]\left(\sum_ca_{c(x)}\otimes a_{c(y)}\right)+
 \left[f_{ab}\right]\left(\sum_ca_{c(x)}\otimes b_{c(y)}\right)+\cr
 &\left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)+
 \left[f_{bb}\right]\left(\sum_cb_{c(x)}\otimes b_{c(y)}\right)\cr
 }
 $$
 This is exactly what happens here. The code is clear. It goes through
 all combinations of stacks, and each thread is responsible for
 operation for the slice corresponding to the combination of the stacks.
@<|UnfoldedStackContainer::multAndAddSparse1| code@>=
 void UnfoldedStackContainer::multAndAddSparse1(const FSSparseTensor& t,
 											   UGSTensor& out) const
 {
 	THREAD_GROUP@, gr;
 	UFSTensor dummy(0, numStacks(), t.dimen());
 	for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui) {
 		THREAD* worker = new WorkerUnfoldMAASparse1(*this, t, out, ui.getCoor());
 		gr.insert(worker);
 	}
 	gr.run();
 }
@ This does a step of |@<|UnfoldedStackContainer::multAndAddSparse1| code@>| for
 a given coordinates. First it makes the slice of the given stack coordinates.
 Then it multiplies everything what should be multiplied with the slice.
 That is it goes through all equivalences, creates |StackProduct|, then
 |KronProdStack|, which is added to |out|. So far everything is clear.
 However, we want to use optimized |KronProdAllOptim| to minimize
 a number of flops and memory needed in the Kronecker product. So we go
 through all permutations |per|, permute the coordinates to get
 |percoor|, go through all equivalences, and make |KronProdStack| and
 optimize it. The result of optimization is a permutation |oper|. Now,
 we multiply the Kronecker product with the slice, only if the slice
 has the same ordering of coordinates as the Kronecker product
 |KronProdStack|. However, it is not perfectly true. Since we go
 through {\bf all} permutations |per|, there might be two different
 permutations leading to the same ordering in |KronProdStack| and thus
 the same ordering in the optimized |KronProdStack|. The two cases
 would be counted twice, which is wrong. That is why we do not
 condition on $\hbox{coor}\circ\hbox{oper}\circ\hbox{per} =
 \hbox{coor}$, but we condition on
 $\hbox{oper}\circ\hbox{per}=\hbox{id}$. In this way, we rule out
 permutations |per| leading to the same ordering of stacks when
 applied on |coor|.
 todo: vertically narrow slice and out according to the fill in t.
@<|WorkerUnfoldMAASparse1::operator()()| code@>=
 void WorkerUnfoldMAASparse1::operator()()
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	const PermutationSet& pset = tls.pbundle->get(t.dimen());
 	Permutation iden(t.dimen());
 	UPSTensor slice(t, cont.getStackSizes(), coor,
 					PerTensorDimens(cont.getStackSizes(), coor));
 	for (int iper = 0; iper < pset.getNum(); iper++) {
 		const Permutation& per = pset.get(iper);
 		IntSequence percoor(coor.size());
 		per.apply(coor, percoor);
 		for (EquivalenceSet::const_iterator it = eset.begin();
 			 it != eset.end(); ++it) {
 			if ((*it).numClasses() == t.dimen()) {
 				StackProduct<UGSTensor> sp(cont, *it, out.getSym());
 				if (! sp.isZero(percoor)) {
 					KronProdStack<UGSTensor> kp(sp, percoor);
 					kp.optimizeOrder();
 					const Permutation& oper = kp.getPer();
 					if (Permutation(oper, per) == iden) {
 						UPSTensor ups(out.getDims(), *it, slice, kp);
 						{
 							SYNCHRO@, syn(&out, "WorkerUnfoldMAASparse1");
 							ups.addTo(out);
 						}
 					}
 				}
 			}
 		}
 	}
 }
@ 
@<|WorkerUnfoldMAASparse1| constructor code@>=
 WorkerUnfoldMAASparse1::WorkerUnfoldMAASparse1(const UnfoldedStackContainer& container,
 											   const FSSparseTensor& ten,
 											   UGSTensor& outten, const IntSequence& c)
 	: cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle)) @+{}
@ In here we implement the formula by a bit different way. We use the
 fact, using notation of |@<|UnfoldedStackContainer::multAndAddSparse2|
 code@>|, that
 $$
 \left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)=
 \left[f_{ab}\right]\left(\sum_ca_{c(y)}\otimes b_{c(b)}\right)\cdot P
 $$
 where $P$ is a suitable permutation of columns. The permutation
 corresponds to (in this example) a swap of $a$ and $b$. An advantage
 of this approach is that we do not need |UPSTensor| for $f_{ba}$, and
 thus we decrease the number of needed slices.
 So we go through all folded indices of stack coordinates, then for
 each such index |fi| we make a slice and call |multAndAddStacks|. This
 goes through all corresponding unfolded indices to perform the
 formula. Each unsorted (unfold) index implies a sorting permutation
 |sort_per| which must be used to permute stacks in |StackProduct|, and
 permute equivalence classes when |UPSTensor| is formed. In this way
 the column permutation $P$ from the formula is factored to the
 permutation of |UPSTensor|.
@<|UnfoldedStackContainer::multAndAddSparse2| code@>=
 void UnfoldedStackContainer::multAndAddSparse2(const FSSparseTensor& t,
 											   UGSTensor& out) const
 {
 	THREAD_GROUP@, gr;
 	FFSTensor dummy_f(0, numStacks(), t.dimen());
 	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
 		THREAD* worker = new WorkerUnfoldMAASparse2(*this, t, out, fi.getCoor());
 		gr.insert(worker);
 	}
 	gr.run();
 }
@ This does a step of |@<|UnfoldedStackContainer::multAndAddSparse2| code@>| for
 a given coordinates.
 todo: implement |multAndAddStacks| for sparse slice as
 |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and do this method as
 |@<|WorkerFoldMAASparse2::operator()()| code@>|.
@<|WorkerUnfoldMAASparse2::operator()()| code@>=
 void WorkerUnfoldMAASparse2::operator()()
 {
 	GSSparseTensor slice(t, cont.getStackSizes(), coor,
 						 TensorDimens(cont.getStackSizes(), coor));
 	if (slice.getNumNonZero()) {
 		FGSTensor fslice(slice);
 		UGSTensor dense_slice(fslice);
 		int r1 = slice.getFirstNonZeroRow();
 		int r2 = slice.getLastNonZeroRow();
 		UGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
 		UGSTensor out1(r1, r2-r1+1, out);
 		cont.multAndAddStacks(coor, dense_slice1, out1, &out);
 	}
 }
@ 
@<|WorkerUnfoldMAASparse2| constructor code@>=
 WorkerUnfoldMAASparse2::WorkerUnfoldMAASparse2(const UnfoldedStackContainer& container,
 											   const FSSparseTensor& ten,
 											   UGSTensor& outten, const IntSequence& c)
 	: cont(container), t(ten), out(outten), coor(c) @+{}
@ For a given unfolded coordinates of stacks |fi|, and appropriate
 tensor $g$, whose symmetry is a symmetry of |fi|, the method
 contributes to |out| all tensors in unfolded stack formula involving
 stacks chosen by |fi|.
 We go through all |ui| coordinates which yield |fi| after sorting. We
 construct a permutation |sort_per| which sorts |ui| to |fi|. We go
 through all appropriate equivalences, and construct |StackProduct|
 from equivalence classes permuted by |sort_per|, then |UPSTensor| with
 implied permutation of columns by the permuted equivalence by
 |sort_per|. The |UPSTensor| is then added to |out|.
 We cannot use here the optimized |KronProdStack|, since the symmetry
 of |UGSTensor& g| prescribes the ordering of the stacks. However, if
 |g| is fully symmetric, we can do the optimization harmlessly.
@<|UnfoldedStackContainer::multAndAddStacks| code@>=
 void UnfoldedStackContainer::multAndAddStacks(const IntSequence& fi,
 											  const UGSTensor& g,
 											  UGSTensor& out, const void* ad) const
 {
 	const EquivalenceSet& eset = ebundle.get(out.dimen());
 	UFSTensor dummy_u(0, numStacks(), g.dimen());
 	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
 		IntSequence tmp(ui.getCoor());
 		tmp.sort();
 		if (tmp == fi) {
 			Permutation sort_per(ui.getCoor());
 			sort_per.inverse();
 			for (EquivalenceSet::const_iterator it = eset.begin();
 				 it != eset.end(); ++it) {
 				if ((*it).numClasses() == g.dimen()) {
 					StackProduct<UGSTensor> sp(*this, *it, sort_per, out.getSym());
 					if (! sp.isZero(fi)) {
 						KronProdStack<UGSTensor> kp(sp, fi);
 						if (g.getSym().isFull())
 							kp.optimizeOrder();
 						UPSTensor ups(out.getDims(), *it, sort_per, g, kp);
 						{
 							SYNCHRO@, syn(ad, "multAndAddStacks");
 							ups.addTo(out);
 						}
 					}
 				}
 			}
 		}
 	}
 }
@ End of {\tt stack\_container.cpp} file.
--- a/dynare++/tl/cc/stack_container.hh
+++ b/dynare++/tl/cc/stack_container.hh
@ -0,0 +1,744 @@
 // Copyright 2004, Ondra Kamenik
 // Stack of containers.
 /* Here we develop abstractions for stacked containers of tensors. For
   instance, in perturbation methods for SDGE we need function
   $$z(y,u,u',\sigma)=\left[\matrix{G(y,u,u',\sigma)\cr g(y,u,\sigma)\cr y\cr u}\right]$$
   and we need to calculate one step of Faa Di Bruno formula
   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_l}=\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
   \sum_{c\in M_{l,k}}\prod_{m=1}^l\left[z_{s^k(c_m)}\right]^{\beta_m}_{c_m(\alpha)}$$
   where we have containers for derivatives of $G$ and $g$.
   The main purpose of this file is to define abstractions for stack of
   containers and possibly raw variables, and code |multAndAdd| method
   calculating (one step of) the Faa Di Bruno formula for folded and
   unfolded tensors. Note also, that tensors $\left[f_{z^l}\right]$ are
   sparse.
   The abstractions are built as follows. At the top, there is an
   interface describing stack of columns. It contains pure virtual
   methods needed for manipulating the container stack. For technical
   reasons it is a template. Both versions (folded, and unfolded) provide
   all interface necessary for implementation of |multAndAdd|. The second
   way of inheritance is first general implementation of the interface
   |StackContainer|, and then specific (|ZContainer| for our specific
   $z$). The only method which is virtual also after |StackContainer| is
   |getType|, which is implemented in the specialization and determines
   behaviour of the stack. The complete classes are obtained by
   inheriting from the both branches, as it is drawn below:
   \def\drawpenta#1#2#3#4#5{%
   \hbox{$
   \hgrid=40pt\vgrid=20pt%
   \sarrowlength=25pt%
   \gridcommdiag{%
   &&\hbox{#1}&&\cr
   &\llap{virtual}\arrow(-1,-1)&&\arrow(1,-1)\rlap{virtual}&\cr
   \hbox{#2}&&&&\hbox{#3}\cr
   \arrow(0,-1)&&&&\cr
   \hbox{#4}&&&
   {\multiply\sarrowlength by 63\divide\sarrowlength by 50\arrow(-1,-2)}&\cr
   &\arrow(1,-1)&&&\cr
   &&\hbox{#5}&&\cr
   }$}}
   \centerline{
   \drawpenta{|StackContainerInterface<FGSTensor>|}{|StackContainer<FGSTensor>|}%
   {|FoldedStackContainer|}{|ZContainer<FGSTensor>|}{|FoldedZContainer|}
   }
   \centerline{
   \drawpenta{|StackContainerInterface<UGSTensor>|}{|StackContainer<UGSTensor>|}%
   {|UnfoldedStackContainer|}{|ZContainer<UGSTensor>|}{|UnfoldedZContainer|}
   }
   We have also two supporting classes |StackProduct| and |KronProdStack|
   and a number of worker classes used as threads. */
 #ifndef STACK_CONTAINER_H
 #define STACK_CONTAINER_H
 #include "int_sequence.hh"
 #include "equivalence.hh"
 #include "tl_static.hh"
 #include "t_container.hh"
 #include "kron_prod.hh"
 #include "permutation.hh"
 #include "sthread.hh"
 /* Here is the general interface to stack container. The subclasses
   maintain |IntSequence| of stack sizes, i.e. size of $G$, $g$, $y$, and
   $u$. Then a convenience |IntSequence| of stack offsets. Then vector of
   pointers to containers, in our example $G$, and $g$.
   A non-virtual subclass must implement |getType| which determines
   dependency of stack items on symmetries. There are three possible types
   for a symmetry. Either the stack item derivative wrt. the symmetry is
   a matrix, or a unit matrix, or zero.
   Method |isZero| returns true if the derivative of a given stack item
   wrt. to given symmetry is zero as defined by |getType| or the
   derivative is not present in the container. In this way, we can
   implement the formula conditional some of the tensors are zero, which
   is not true (they are only missing).
   Method |createPackedColumn| returns a vector of stack derivatives with
   respect to the given symmetry and of the given column, where all zeros
   from zero types, or unit matrices are deleted. See {\tt
   kron\_prod2.hweb} for explanation. */
 template <class _Ttype>
 class StackContainerInterface
 {
 public:
  typedef TensorContainer<_Ttype> _Ctype;
  typedef enum { matrix, unit, zero} itype;
 protected:
  const EquivalenceBundle &ebundle;
 public:
  StackContainerInterface()
    : ebundle(*(tls.ebundle))
  {
  }
  virtual ~StackContainerInterface()
  {
  }
  virtual const IntSequence&getStackSizes() const = 0;
  virtual IntSequence&getStackSizes() = 0;
  virtual const IntSequence&getStackOffsets() const = 0;
  virtual IntSequence&getStackOffsets() = 0;
  virtual int numConts() const = 0;
  virtual const _Ctype *getCont(int i) const = 0;
  virtual itype getType(int i, const Symmetry &s) const = 0;
  virtual int numStacks() const = 0;
  virtual bool isZero(int i, const Symmetry &s) const = 0;
  virtual const _Ttype *getMatrix(int i, const Symmetry &s) const = 0;
  virtual int getLengthOfMatrixStacks(const Symmetry &s) const = 0;
  virtual int getUnitPos(const Symmetry &s) const = 0;
  virtual Vector *createPackedColumn(const Symmetry &s,
                                     const IntSequence &coor,
                                     int &iu) const = 0;
  int
  getAllSize() const
  {
    return getStackOffsets()[numStacks()-1]
      + getStackSizes()[numStacks()-1];
  }
 };
 /* Here is |StackContainer|, which implements almost all interface
   |StackContainerInterface| but one method |getType| which is left for
   implementation to specializations. */
 template <class _Ttype>
 class StackContainer : virtual public StackContainerInterface<_Ttype>
 {
 public:
  typedef StackContainerInterface<_Ttype> _Stype;
  typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
  typedef typename StackContainerInterface<_Ttype>::itype itype;
 protected:
  int num_conts;
  IntSequence stack_sizes;
  IntSequence stack_offsets;
  const _Ctype **const conts;
 public:
  StackContainer(int ns, int nc)
    : num_conts(nc), stack_sizes(ns, 0), stack_offsets(ns, 0),
      conts(new const _Ctype *[nc])
  {
  }
  virtual ~StackContainer()
  {
    delete [] conts;
  }
  const IntSequence &
  getStackSizes() const
  {
    return stack_sizes;
  }
  IntSequence &
  getStackSizes()
  {
    return stack_sizes;
  }
  const IntSequence &
  getStackOffsets() const
  {
    return stack_offsets;
  }
  IntSequence &
  getStackOffsets()
  {
    return stack_offsets;
  }
  int
  numConts() const
  {
    return num_conts;
  }
  const _Ctype *
  getCont(int i) const
  {
    return conts[i];
  }
  virtual itype getType(int i, const Symmetry &s) const = 0;
  int
  numStacks() const
  {
    return stack_sizes.size();
  }
  bool
  isZero(int i, const Symmetry &s) const
  {
    TL_RAISE_IF(i < 0 || i >= numStacks(),
                "Wrong index to stack in StackContainer::isZero.");
    return (getType(i, s) == _Stype::zero
            || (getType(i, s) == _Stype::matrix && !conts[i]->check(s)));
  }
  const _Ttype *
  getMatrix(int i, const Symmetry &s) const
  {
    TL_RAISE_IF(isZero(i, s) || getType(i, s) == _Stype::unit,
                "Matrix is not returned in StackContainer::getMatrix");
    return conts[i]->get(s);
  }
  int
  getLengthOfMatrixStacks(const Symmetry &s) const
  {
    int res = 0;
    int i = 0;
    while (i < numStacks() && getType(i, s) == _Stype::matrix)
      res += stack_sizes[i++];
    return res;
  }
  int
  getUnitPos(const Symmetry &s) const
  {
    if (s.dimen() != 1)
      return -1;
    int i = numStacks()-1;
    while (i >= 0 && getType(i, s) != _Stype::unit)
      i--;
    return i;
  }
  Vector *
  createPackedColumn(const Symmetry &s,
                     const IntSequence &coor, int &iu) const
  {
    TL_RAISE_IF(s.dimen() != coor.size(),
                "Incompatible coordinates for symmetry in StackContainer::createPackedColumn");
    int len = getLengthOfMatrixStacks(s);
    iu = -1;
    int i = 0;
    if (-1 != (i = getUnitPos(s)))
      {
        iu = stack_offsets[i] + coor[0];
        len++;
      }
    Vector *res = new Vector(len);
    i = 0;
    while (i < numStacks() && getType(i, s) == _Stype::matrix)
      {
        const _Ttype *t = getMatrix(i, s);
        Tensor::index ind(t, coor);
        Vector subres(*res, stack_offsets[i], stack_sizes[i]);
        subres = ConstVector(ConstGeneralMatrix(*t), *ind);
        i++;
      }
    if (iu != -1)
      (*res)[len-1] = 1;
    return res;
  }
 protected:
  void
  calculateOffsets()
  {
    stack_offsets[0] = 0;
    for (int i = 1; i < stack_offsets.size(); i++)
      stack_offsets[i] = stack_offsets[i-1] + stack_sizes[i-1];
  }
 };
 class WorkerFoldMAADense;
 class WorkerFoldMAASparse1;
 class WorkerFoldMAASparse2;
 class WorkerFoldMAASparse4;
 class FoldedStackContainer : virtual public StackContainerInterface<FGSTensor>
 {
  friend class WorkerFoldMAADense;
  friend class WorkerFoldMAASparse1;
  friend class WorkerFoldMAASparse2;
  friend class WorkerFoldMAASparse4;
 public:
  static double fill_threshold;
  void
  multAndAdd(int dim, const TensorContainer<FSSparseTensor> &c,
             FGSTensor &out) const
  {
    if (c.check(Symmetry(dim)))
      multAndAdd(*(c.get(Symmetry(dim))), out);
  }
  void multAndAdd(const FSSparseTensor &t, FGSTensor &out) const;
  void multAndAdd(int dim, const FGSContainer &c, FGSTensor &out) const;
 protected:
  void multAndAddSparse1(const FSSparseTensor &t, FGSTensor &out) const;
  void multAndAddSparse2(const FSSparseTensor &t, FGSTensor &out) const;
  void multAndAddSparse3(const FSSparseTensor &t, FGSTensor &out) const;
  void multAndAddSparse4(const FSSparseTensor &t, FGSTensor &out) const;
  void multAndAddStacks(const IntSequence &fi, const FGSTensor &g,
                        FGSTensor &out, const void *ad) const;
  void multAndAddStacks(const IntSequence &fi, const GSSparseTensor &g,
                        FGSTensor &out, const void *ad) const;
 };
 class WorkerUnfoldMAADense;
 class WorkerUnfoldMAASparse1;
 class WorkerUnfoldMAASparse2;
 class UnfoldedStackContainer : virtual public StackContainerInterface<UGSTensor>
 {
  friend class WorkerUnfoldMAADense;
  friend class WorkerUnfoldMAASparse1;
  friend class WorkerUnfoldMAASparse2;
 public:
  static double fill_threshold;
  void
  multAndAdd(int dim, const TensorContainer<FSSparseTensor> &c,
             UGSTensor &out) const
  {
    if (c.check(Symmetry(dim)))
      multAndAdd(*(c.get(Symmetry(dim))), out);
  }
  void multAndAdd(const FSSparseTensor &t, UGSTensor &out) const;
  void multAndAdd(int dim, const UGSContainer &c, UGSTensor &out) const;
 protected:
  void multAndAddSparse1(const FSSparseTensor &t, UGSTensor &out) const;
  void multAndAddSparse2(const FSSparseTensor &t, UGSTensor &out) const;
  void multAndAddStacks(const IntSequence &fi, const UGSTensor &g,
                        UGSTensor &out, const void *ad) const;
 };
 /* Here is the specialization of the |StackContainer|. We implement
   here the $z$ needed in SDGE context. We implement |getType| and define
   a constructor feeding the data and sizes.
   Note that it has two containers, the first is dependent on four
   variables $G(y^*,u,u',\sigma)$, and the second dependent on three
   variables $g(y^*,u,\sigma)$. So that we would be able to stack them,
   we make the second container $g$ be dependent on four variables, the
   third being $u'$ a dummy and always returning zero if dimension of
   $u'$ is positive. */
 template <class _Ttype>
 class ZContainer : public StackContainer<_Ttype>
 {
 public:
  typedef StackContainer<_Ttype> _Tparent;
  typedef StackContainerInterface<_Ttype> _Stype;
  typedef typename _Tparent::_Ctype _Ctype;
  typedef typename _Tparent::itype itype;
  ZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
             int ny, int nu)
    : _Tparent(4, 2)
  {
    _Tparent::stack_sizes[0] = ngss; _Tparent::stack_sizes[1] = ng;
    _Tparent::stack_sizes[2] = ny; _Tparent::stack_sizes[3] = nu;
    _Tparent::conts[0] = gss;
    _Tparent::conts[1] = g;
    _Tparent::calculateOffsets();
  }
  /* Here we say, what happens if we derive $z$. recall the top of the
     file, how $z$ looks, and code is clear. */
  itype
  getType(int i, const Symmetry &s) const
  {
    if (i == 0)
      return _Stype::matrix;
    if (i == 1)
      if (s[2] > 0)
        return _Stype::zero;
      else
        return _Stype::matrix;
    if (i == 2)
      if (s == Symmetry(1, 0, 0, 0))
        return _Stype::unit;
      else
        return _Stype::zero;
    if (i == 3)
      if (s == Symmetry(0, 1, 0, 0))
        return _Stype::unit;
      else
        return _Stype::zero;
    TL_RAISE("Wrong stack index in ZContainer::getType");
    return _Stype::zero;
  }
 };
 class FoldedZContainer : public ZContainer<FGSTensor>,
                         public FoldedStackContainer
 {
 public:
  typedef TensorContainer<FGSTensor> _Ctype;
  FoldedZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
                   int ny, int nu)
    : ZContainer<FGSTensor>(gss, ngss, g, ng, ny, nu)
  {
  }
 };
 class UnfoldedZContainer : public ZContainer<UGSTensor>,
                           public UnfoldedStackContainer
 {
 public:
  typedef TensorContainer<UGSTensor> _Ctype;
  UnfoldedZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
                     int ny, int nu)
    : ZContainer<UGSTensor>(gss, ngss, g, ng, ny, nu)
  {
  }
 };
 /* Here we have another specialization of container used in context of
   SDGE. We define a container for
   $$G(y,u,u',\sigma)=g^{**}(g^*(y,u,\sigma),u',\sigma)$$
   For some reason, the symmetry of $g^{**}$ has length $4$ although it
   is really dependent on three variables. (To now the reason, consult
   |@<|ZContainer| class declaration@>|.) So, it has four stack, the
   third one is dummy, and always returns zero. The first stack
   corresponds to a container of $g^*$. */
 template <class _Ttype>
 class GContainer : public StackContainer<_Ttype>
 {
 public:
  typedef StackContainer<_Ttype> _Tparent;
  typedef StackContainerInterface<_Ttype> _Stype;
  typedef typename StackContainer<_Ttype>::_Ctype _Ctype;
  typedef typename StackContainer<_Ttype>::itype itype;
  GContainer(const _Ctype *gs, int ngs, int nu)
    : StackContainer<_Ttype>(4, 1)
  {
    _Tparent::stack_sizes[0] = ngs; _Tparent::stack_sizes[1] = nu;
    _Tparent::stack_sizes[2] = nu; _Tparent::stack_sizes[3] = 1;
    _Tparent::conts[0] = gs;
    _Tparent::calculateOffsets();
  }
  /* Here we define the dependencies in
     $g^{**}(g^*(y,u,\sigma),u',\sigma)$. Also note, that first derivative
     of $g^*$ wrt $\sigma$ is always zero, so we also add this
     information. */
  itype
  getType(int i, const Symmetry &s) const
  {
    if (i == 0)
      if (s[2] > 0 || s == Symmetry(0, 0, 0, 1))
        return _Stype::zero;
      else
        return _Stype::matrix;
    if (i == 1)
      if (s == Symmetry(0, 0, 1, 0))
        return _Stype::unit;
      else
        return _Stype::zero;
    if (i == 2)
      return _Stype::zero;
    if (i == 3)
      if (s == Symmetry(0, 0, 0, 1))
        return _Stype::unit;
      else
        return _Stype::zero;
    TL_RAISE("Wrong stack index in GContainer::getType");
    return _Stype::zero;
  }
 };
 class FoldedGContainer : public GContainer<FGSTensor>,
                         public FoldedStackContainer
 {
 public:
  typedef TensorContainer<FGSTensor> _Ctype;
  FoldedGContainer(const _Ctype *gs, int ngs, int nu)
    : GContainer<FGSTensor>(gs, ngs, nu)
  {
  }
 };
 class UnfoldedGContainer : public GContainer<UGSTensor>,
                           public UnfoldedStackContainer
 {
 public:
  typedef TensorContainer<UGSTensor> _Ctype;
  UnfoldedGContainer(const _Ctype *gs, int ngs, int nu)
    : GContainer<UGSTensor>(gs, ngs, nu)
  {
  }
 };
 /* Here we have a support class for product of |StackContainer|s. It
   only adds a dimension to |StackContainer|. It selects the symmetries
   according to equivalence classes passed to the constructor. The
   equivalence can have permuted classes by some given
   permutation. Nothing else is interesting. */
 template <class _Ttype>
 class StackProduct
 {
 public:
  typedef StackContainerInterface<_Ttype> _Stype;
  typedef typename _Stype::_Ctype _Ctype;
  typedef typename _Stype::itype itype;
 protected:
  const _Stype &stack_cont;
  InducedSymmetries syms;
  Permutation per;
 public:
  StackProduct(const _Stype &sc, const Equivalence &e,
               const Symmetry &os)
    : stack_cont(sc), syms(e, os), per(e)
  {
  }
  StackProduct(const _Stype &sc, const Equivalence &e,
               const Permutation &p, const Symmetry &os)
    : stack_cont(sc), syms(e, p, os), per(e, p)
  {
  }
  int
  dimen() const
  {
    return syms.size();
  }
  int
  getAllSize() const
  {
    return stack_cont.getAllSize();
  }
  const Symmetry &
  getProdSym(int ip) const
  {
    return syms[ip];
  }
  bool
  isZero(const IntSequence &istacks) const
  {
    TL_RAISE_IF(istacks.size() != dimen(),
                "Wrong istacks coordinates for StackProduct::isZero");
    bool res = false;
    int i = 0;
    while (i < dimen() && !(res = stack_cont.isZero(istacks[i], syms[i])))
      i++;
    return res;
  }
  itype
  getType(int is, int ip) const
  {
    TL_RAISE_IF(is < 0 || is >= stack_cont.numStacks(),
                "Wrong index to stack in StackProduct::getType");
    TL_RAISE_IF(ip < 0 || ip >= dimen(),
                "Wrong index to stack container in StackProduct::getType");
    return stack_cont.getType(is, syms[ip]);
  }
  const _Ttype *
  getMatrix(int is, int ip) const
  {
    return stack_cont.getMatrix(is, syms[ip]);
  }
  void
  createPackedColumns(const IntSequence &coor,
                      Vector **vs, IntSequence &iu) const
  {
    TL_RAISE_IF(iu.size() != dimen(),
                "Wrong storage length for unit flags in StackProduct::createPackedColumn");
    TL_RAISE_IF(coor.size() != per.size(),
                "Wrong size of index coor in StackProduct::createPackedColumn");
    IntSequence perindex(coor.size());
    per.apply(coor, perindex);
    int off = 0;
    for (int i = 0; i < dimen(); i++)
      {
        IntSequence percoor(perindex, off, syms[i].dimen() + off);
        vs[i] = stack_cont.createPackedColumn(syms[i], percoor, iu[i]);
        off += syms[i].dimen();
      }
  }
  int
  getSize(int is) const
  {
    return stack_cont.getStackSizes()[is];
  }
  int
  numMatrices(const IntSequence &istacks) const
  {
    TL_RAISE_IF(istacks.size() != dimen(),
                "Wrong size of stack coordinates in StackContainer::numMatrices");
    int ret = 0;
    int ip = 0;
    while (ip < dimen() && getType(istacks[ip], ip) == _Stype::matrix)
      {
        ret++;
        ip++;
      }
    return ret;
  }
 };
 /* Here we only inherit from Kronecker product |KronProdAllOptim|, only to
   allow for a constructor constructing from |StackProduct|. */
 template <class _Ttype>
 class KronProdStack : public KronProdAllOptim
 {
 public:
  typedef StackProduct<_Ttype> _Ptype;
  typedef StackContainerInterface<_Ttype> _Stype;
  /* Here we construct |KronProdAllOptim| from |StackContainer| and given
     selections of stack items from stack containers in the product. We
     only decide whether to insert matrix, or unit matrix.
     At this point, we do not call |KronProdAllOptim::optimizeOrder|, so
     the |KronProdStack| behaves like |KronProdAll| (i.e. no optimization
     is done). */
  KronProdStack(const _Ptype &sp, const IntSequence &istack)
    : KronProdAllOptim(sp.dimen())
  {
    TL_RAISE_IF(sp.dimen() != istack.size(),
                "Wrong stack product dimension for KronProdStack constructor");
    for (int i = 0; i < sp.dimen(); i++)
      {
        TL_RAISE_IF(sp.getType(istack[i], i) == _Stype::zero,
                    "Attempt to construct KronProdStack from zero matrix");
        if (sp.getType(istack[i], i) == _Stype::unit)
          setUnit(i, sp.getSize(istack[i]));
        if (sp.getType(istack[i], i) == _Stype::matrix)
          {
            const TwoDMatrix *m = sp.getMatrix(istack[i], i);
            TL_RAISE_IF(m->nrows() != sp.getSize(istack[i]),
                        "Wrong size of returned matrix in KronProdStack constructor");
            setMat(i, *m);
          }
      }
  }
 };
 class WorkerFoldMAADense : public THREAD
 {
  const FoldedStackContainer &cont;
  Symmetry sym;
  const FGSContainer &dense_cont;
  FGSTensor &out;
 public:
  WorkerFoldMAADense(const FoldedStackContainer &container,
                     const Symmetry &s,
                     const FGSContainer &dcontainer,
                     FGSTensor &outten);
  void operator()();
 };
 class WorkerFoldMAASparse1 : public THREAD
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
  FGSTensor &out;
  IntSequence coor;
  const EquivalenceBundle &ebundle;
 public:
  WorkerFoldMAASparse1(const FoldedStackContainer &container,
                       const FSSparseTensor &ten,
                       FGSTensor &outten, const IntSequence &c);
  void operator()();
 };
 class WorkerFoldMAASparse2 : public THREAD
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
  FGSTensor &out;
  IntSequence coor;
 public:
  WorkerFoldMAASparse2(const FoldedStackContainer &container,
                       const FSSparseTensor &ten,
                       FGSTensor &outten, const IntSequence &c);
  void operator()();
 };
 class WorkerFoldMAASparse4 : public THREAD
 {
  const FoldedStackContainer &cont;
  const FSSparseTensor &t;
  FGSTensor &out;
  IntSequence coor;
 public:
  WorkerFoldMAASparse4(const FoldedStackContainer &container,
                       const FSSparseTensor &ten,
                       FGSTensor &outten, const IntSequence &c);
  void operator()();
 };
 class WorkerUnfoldMAADense : public THREAD
 {
  const UnfoldedStackContainer &cont;
  Symmetry sym;
  const UGSContainer &dense_cont;
  UGSTensor &out;
 public:
  WorkerUnfoldMAADense(const UnfoldedStackContainer &container,
                       const Symmetry &s,
                       const UGSContainer &dcontainer,
                       UGSTensor &outten);
  void operator()();
 };
 class WorkerUnfoldMAASparse1 : public THREAD
 {
  const UnfoldedStackContainer &cont;
  const FSSparseTensor &t;
  UGSTensor &out;
  IntSequence coor;
  const EquivalenceBundle &ebundle;
 public:
  WorkerUnfoldMAASparse1(const UnfoldedStackContainer &container,
                         const FSSparseTensor &ten,
                         UGSTensor &outten, const IntSequence &c);
  void operator()();
 };
 class WorkerUnfoldMAASparse2 : public THREAD
 {
  const UnfoldedStackContainer &cont;
  const FSSparseTensor &t;
  UGSTensor &out;
  IntSequence coor;
 public:
  WorkerUnfoldMAASparse2(const UnfoldedStackContainer &container,
                         const FSSparseTensor &ten,
                         UGSTensor &outten, const IntSequence &c);
  void operator()();
 };
 #endif
--- a/dynare++/tl/cc/stack_container.hweb
+++ b/dynare++/tl/cc/stack_container.hweb
@ -1,771 +0,0 @@
@q $Id: stack_container.hweb 745 2006-05-09 13:20:00Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Stack of containers. Start of {\tt stack\_container.h} file.
 Here we develop abstractions for stacked containers of tensors. For
 instance, in perturbation methods for SDGE we need function
 $$z(y,u,u',\sigma)=\left[\matrix{G(y,u,u',\sigma)\cr g(y,u,\sigma)\cr y\cr u}\right]$$
 and we need to calculate one step of Faa Di Bruno formula
 $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_l}=\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
 \sum_{c\in M_{l,k}}\prod_{m=1}^l\left[z_{s^k(c_m)}\right]^{\beta_m}_{c_m(\alpha)}$$
 where we have containers for derivatives of $G$ and $g$.
 The main purpose of this file is to define abstractions for stack of
 containers and possibly raw variables, and code |multAndAdd| method
 calculating (one step of) the Faa Di Bruno formula for folded and
 unfolded tensors. Note also, that tensors $\left[f_{z^l}\right]$ are
 sparse.
 The abstractions are built as follows. At the top, there is an
 interface describing stack of columns. It contains pure virtual
 methods needed for manipulating the container stack. For technical
 reasons it is a template. Both versions (folded, and unfolded) provide
 all interface necessary for implementation of |multAndAdd|. The second
 way of inheritance is first general implementation of the interface
 |StackContainer|, and then specific (|ZContainer| for our specific
 $z$). The only method which is virtual also after |StackContainer| is
 |getType|, which is implemented in the specialization and determines
 behaviour of the stack. The complete classes are obtained by
 inheriting from the both branches, as it is drawn below:
 \def\drawpenta#1#2#3#4#5{%
 \hbox{$
 \hgrid=40pt\vgrid=20pt%
 \sarrowlength=25pt%
 \gridcommdiag{%
 &&\hbox{#1}&&\cr
 &\llap{virtual}\arrow(-1,-1)&&\arrow(1,-1)\rlap{virtual}&\cr
 \hbox{#2}&&&&\hbox{#3}\cr
 \arrow(0,-1)&&&&\cr
 \hbox{#4}&&&
 {\multiply\sarrowlength by 63\divide\sarrowlength by 50\arrow(-1,-2)}&\cr
 &\arrow(1,-1)&&&\cr
 &&\hbox{#5}&&\cr
 }$}}
 \centerline{
 \drawpenta{|StackContainerInterface<FGSTensor>|}{|StackContainer<FGSTensor>|}%
 	      {|FoldedStackContainer|}{|ZContainer<FGSTensor>|}{|FoldedZContainer|}
 }
 \centerline{
 \drawpenta{|StackContainerInterface<UGSTensor>|}{|StackContainer<UGSTensor>|}%
 	      {|UnfoldedStackContainer|}{|ZContainer<UGSTensor>|}{|UnfoldedZContainer|}
 }
 We have also two supporting classes |StackProduct| and |KronProdStack|
 and a number of worker classes used as threads.
@s StackContainerInterface int
@s StackContainer int
@s ZContainer int
@s FoldedStackContainer int
@s UnfoldedStackContainer int
@s FoldedZContainer int
@s UnfoldedZContainer int
@s WorkerFoldMAADense int
@s WorkerFoldMAASparse1 int
@s WorkerFoldMAASparse2 int
@s WorkerFoldMAASparse4 int
@s WorkerUnfoldMAADense int
@s WorkerUnfoldMAASparse1 int
@s WorkerUnfoldMAASparse2 int
@s GContainer int
@s FoldedGContainer int
@s UnfoldedGContainer int
@s StackProduct int
@s KronProdStack int
@c
 #ifndef STACK_CONTAINER_H
 #define STACK_CONTAINER_H
 #include "int_sequence.h"
 #include "equivalence.h"
 #include "tl_static.h"
 #include "t_container.h"
 #include "kron_prod.h"
 #include "permutation.h"
 #include "sthread.h"
@<|StackContainerInterface| class declaration@>;
@<|StackContainer| class declaration@>;
@<|FoldedStackContainer| class declaration@>;
@<|UnfoldedStackContainer| class declaration@>;
@<|ZContainer| class declaration@>;
@<|FoldedZContainer| class declaration@>;
@<|UnfoldedZContainer| class declaration@>;
@<|GContainer| class declaration@>;
@<|FoldedGContainer| class declaration@>;
@<|UnfoldedGContainer| class declaration@>;
@<|StackProduct| class declaration@>;
@<|KronProdStack| class declaration@>;
@<|WorkerFoldMAADense| class declaration@>;
@<|WorkerFoldMAASparse1| class declaration@>;
@<|WorkerFoldMAASparse2| class declaration@>;
@<|WorkerFoldMAASparse4| class declaration@>;
@<|WorkerUnfoldMAADense| class declaration@>;
@<|WorkerUnfoldMAASparse1| class declaration@>;
@<|WorkerUnfoldMAASparse2| class declaration@>;
 #endif
@ Here is the general interface to stack container. The subclasses
 maintain |IntSequence| of stack sizes, i.e. size of $G$, $g$, $y$, and
 $u$. Then a convenience |IntSequence| of stack offsets. Then vector of
 pointers to containers, in our example $G$, and $g$.
 A non-virtual subclass must implement |getType| which determines
 dependency of stack items on symmetries. There are three possible types
 for a symmetry. Either the stack item derivative wrt. the symmetry is
 a matrix, or a unit matrix, or zero.
 Method |isZero| returns true if the derivative of a given stack item
 wrt. to given symmetry is zero as defined by |getType| or the
 derivative is not present in the container. In this way, we can
 implement the formula conditional some of the tensors are zero, which
 is not true (they are only missing).
 Method |createPackedColumn| returns a vector of stack derivatives with
 respect to the given symmetry and of the given column, where all zeros
 from zero types, or unit matrices are deleted. See {\tt
 kron\_prod2.hweb} for explanation.
@<|StackContainerInterface| class declaration@>=
 template <class _Ttype>@;
 class StackContainerInterface {
 public:@;
 	typedef TensorContainer<_Ttype> _Ctype;
 	typedef enum {@+ matrix, unit, zero@+} itype;
 protected:@;
 	const EquivalenceBundle& ebundle;
 public:@;
 	StackContainerInterface()
 		: ebundle(*(tls.ebundle))@+ {}
 	virtual ~StackContainerInterface()@+ {}
 	virtual const IntSequence& getStackSizes() const =0;
 	virtual IntSequence& getStackSizes() =0;
 	virtual const IntSequence& getStackOffsets() const =0;
 	virtual IntSequence& getStackOffsets() =0;
 	virtual int numConts() const =0;
 	virtual const _Ctype* getCont(int i) const =0;
 	virtual itype getType(int i, const Symmetry& s) const =0;
 	virtual int numStacks() const =0;
 	virtual bool isZero(int i, const Symmetry& s) const =0;
 	virtual const _Ttype* getMatrix(int i, const Symmetry& s) const =0;
 	virtual int getLengthOfMatrixStacks(const Symmetry& s) const =0;
 	virtual int getUnitPos(const Symmetry& s) const =0;
 	virtual Vector* createPackedColumn(const Symmetry& s,
 									   const IntSequence& coor,
 									   int& iu) const =0;
 	int getAllSize() const
 		{@+ return getStackOffsets()[numStacks()-1]
 			 + getStackSizes()[numStacks()-1];@+}
 };
@ Here is |StackContainer|, which implements almost all interface
 |StackContainerInterface| but one method |getType| which is left for
 implementation to specializations.
@<|StackContainer| class declaration@>=
 template <class _Ttype>@;
 class StackContainer : virtual public StackContainerInterface<_Ttype> {
 public:@;
 	typedef StackContainerInterface<_Ttype> _Stype;
 	typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
 	typedef typename StackContainerInterface<_Ttype>::itype itype;
 protected:@;
 	int num_conts;
 	IntSequence stack_sizes;
 	IntSequence stack_offsets;
 	const _Ctype** const conts;
 public:@;
 	StackContainer(int ns, int nc)
 		: num_conts(nc), stack_sizes(ns, 0), stack_offsets(ns, 0),
 		  conts(new const _Ctype*[nc])@+ {}
 	virtual ~StackContainer() @+{delete [] conts;}
 	const IntSequence& getStackSizes() const
 		{@+ return stack_sizes;@+}
 	IntSequence& getStackSizes()
 		{@+ return stack_sizes;@+}
 	const IntSequence& getStackOffsets() const
 		{@+ return stack_offsets;@+}
 	IntSequence& getStackOffsets()
 		{@+ return stack_offsets;@+}
 	int numConts() const
 		{@+ return num_conts;}
 	const _Ctype* getCont(int i) const
 		{@+ return conts[i];@+}
 	virtual itype getType(int i, const Symmetry& s) const =0;
 	int numStacks() const
 		{@+ return stack_sizes.size();@+}
 	@<|StackContainer::isZero| code@>;
 	@<|StackContainer::getMatrix| code@>;
 	@<|StackContainer::getLengthOfMatrixStacks| code@>;
 	@<|StackContainer::getUnitPos| code@>;
 	@<|StackContainer::createPackedColumn| code@>;
 protected:@;
 	@<|StackContainer::calculateOffsets| code@>;
 };
@ 
@<|StackContainer::isZero| code@>=
 bool isZero(int i, const Symmetry& s) const
 {
 	TL_RAISE_IF(i < 0 || i >= numStacks(),
 				"Wrong index to stack in StackContainer::isZero.");
 	return (getType(i, s) == _Stype::zero ||
 			(getType(i, s) == _Stype::matrix && !conts[i]->check(s)));
 }
@ 
@<|StackContainer::getMatrix| code@>=
 const _Ttype* getMatrix(int i, const Symmetry& s) const
 {
 	TL_RAISE_IF(isZero(i, s) || getType(i, s) == _Stype::unit,
 				"Matrix is not returned in StackContainer::getMatrix");
 	return conts[i]->get(s);
 }
@ 
@<|StackContainer::getLengthOfMatrixStacks| code@>=
 int getLengthOfMatrixStacks(const Symmetry& s) const
 {
 	int res = 0;
 	int i = 0;
 	while (i < numStacks() && getType(i, s) == _Stype::matrix)
 		res += stack_sizes[i++];
 	return res;
 }
@ 
@<|StackContainer::getUnitPos| code@>=
 int getUnitPos(const Symmetry& s) const
 {
 	if (s.dimen() != 1)
 		return -1;
 	int i = numStacks()-1; 
 	while (i >= 0 && getType(i, s) != _Stype::unit)
 		i--;
 	return i;
 }
@ 
@<|StackContainer::createPackedColumn| code@>=
 Vector* createPackedColumn(const Symmetry& s,
 						   const IntSequence& coor, int& iu) const
 {
 	TL_RAISE_IF(s.dimen() != coor.size(),
 				"Incompatible coordinates for symmetry in StackContainer::createPackedColumn");
 	int len = getLengthOfMatrixStacks(s);
 	iu = -1;
 	int i = 0;
 	if (-1 != (i = getUnitPos(s))) {
 		iu = stack_offsets[i] + coor[0];
 		len++;
 	}
 	Vector* res = new Vector(len);
 	i = 0;
 	while (i < numStacks() && getType(i, s) == _Stype::matrix) {
 		const _Ttype* t = getMatrix(i, s);
 		Tensor::index ind(t, coor);
 		Vector subres(*res, stack_offsets[i], stack_sizes[i]);
 		subres = ConstVector(ConstGeneralMatrix(*t), *ind);
 		i++;
 	}
 	if (iu != -1)
 		(*res)[len-1] = 1;
 	return res;
 }
@ 
@<|StackContainer::calculateOffsets| code@>=
 void calculateOffsets()
 {
 	stack_offsets[0] = 0;
 	for (int i = 1; i < stack_offsets.size(); i++)
 		stack_offsets[i] = stack_offsets[i-1] + stack_sizes[i-1];
 }
@ 
@<|FoldedStackContainer| class declaration@>=
 class WorkerFoldMAADense;
 class WorkerFoldMAASparse1;
 class WorkerFoldMAASparse2;
 class WorkerFoldMAASparse4;
 class FoldedStackContainer : virtual public StackContainerInterface<FGSTensor> {
 	friend class WorkerFoldMAADense;
 	friend class WorkerFoldMAASparse1;
 	friend class WorkerFoldMAASparse2;
 	friend class WorkerFoldMAASparse4;
 public:@;
 	static double fill_threshold;
 	void multAndAdd(int dim, const TensorContainer<FSSparseTensor>& c ,
 					FGSTensor& out) const
 		{@+ if (c.check(Symmetry(dim))) multAndAdd(*(c.get(Symmetry(dim))), out);@+}
 	void multAndAdd(const FSSparseTensor& t, FGSTensor& out) const;
 	void multAndAdd(int dim, const FGSContainer& c, FGSTensor& out) const;
 protected:@;
 	void multAndAddSparse1(const FSSparseTensor& t, FGSTensor& out) const;
 	void multAndAddSparse2(const FSSparseTensor& t, FGSTensor& out) const;
 	void multAndAddSparse3(const FSSparseTensor& t, FGSTensor& out) const;
 	void multAndAddSparse4(const FSSparseTensor& t, FGSTensor& out) const;
 	void multAndAddStacks(const IntSequence& fi, const FGSTensor& g,
 						  FGSTensor& out, const void* ad) const;
 	void multAndAddStacks(const IntSequence& fi, const GSSparseTensor& g,
 						  FGSTensor& out, const void* ad) const;
 };
@ 
@<|UnfoldedStackContainer| class declaration@>=
 class WorkerUnfoldMAADense;
 class WorkerUnfoldMAASparse1;
 class WorkerUnfoldMAASparse2;
 class UnfoldedStackContainer : virtual public StackContainerInterface<UGSTensor> {
 	friend class WorkerUnfoldMAADense;
 	friend class WorkerUnfoldMAASparse1;
 	friend class WorkerUnfoldMAASparse2;
 public:@;
 	static double fill_threshold;
 	void multAndAdd(int dim, const TensorContainer<FSSparseTensor>& c ,
 					UGSTensor& out) const
 		{@+ if (c.check(Symmetry(dim))) multAndAdd(*(c.get(Symmetry(dim))), out);@+}
 	void multAndAdd(const FSSparseTensor& t, UGSTensor& out) const;
 	void multAndAdd(int dim, const UGSContainer& c, UGSTensor& out) const;
 protected:@;
 	void multAndAddSparse1(const FSSparseTensor& t, UGSTensor& out) const;
 	void multAndAddSparse2(const FSSparseTensor& t, UGSTensor& out) const;
 	void multAndAddStacks(const IntSequence& fi, const UGSTensor& g,
 						  UGSTensor& out, const void* ad) const;
 };
@ Here is the specialization of the |StackContainer|. We implement
 here the $z$ needed in SDGE context. We implement |getType| and define
 a constructor feeding the data and sizes.
 Note that it has two containers, the first is dependent on four
 variables $G(y^*,u,u',\sigma)$, and the second dependent on three
 variables $g(y^*,u,\sigma)$. So that we would be able to stack them,
 we make the second container $g$ be dependent on four variables, the
 third being $u'$ a dummy and always returning zero if dimension of
 $u'$ is positive.
@<|ZContainer| class declaration@>=
 template <class _Ttype>@;
 class ZContainer : public StackContainer<_Ttype> {
 public:@;
 	typedef StackContainer<_Ttype> _Tparent;
 	typedef StackContainerInterface<_Ttype> _Stype;
 	typedef typename _Tparent::_Ctype _Ctype;
 	typedef typename _Tparent::itype itype;
 	ZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
 			   int ny, int nu)
 		: _Tparent(4, 2)
 		{
 			_Tparent::stack_sizes[0] = ngss; _Tparent::stack_sizes[1] = ng;
 			_Tparent::stack_sizes[2] = ny; _Tparent::stack_sizes[3] = nu;
 			_Tparent::conts[0] = gss;
 			_Tparent::conts[1] = g;
 			_Tparent::calculateOffsets();
 		}
 	@<|ZContainer::getType| code@>;
 };
@ Here we say, what happens if we derive $z$. recall the top of the
 file, how $z$ looks, and code is clear.
@<|ZContainer::getType| code@>=
 itype getType(int i, const Symmetry& s) const
 {
 	if (i == 0)
 		return _Stype::matrix;
 	if (i == 1)
 		if (s[2] > 0)
 			return _Stype::zero;
 		else
 			return _Stype::matrix;
 	if (i == 2)
 		if (s == Symmetry(1,0,0,0))
 			return _Stype::unit;
 		else
 			return _Stype::zero;
 	if (i == 3)
 		if (s == Symmetry(0,1,0,0))
 			return _Stype::unit;
 		else
 			return _Stype::zero;
 	TL_RAISE("Wrong stack index in ZContainer::getType");
 	return _Stype::zero;
 }
@ 
@<|FoldedZContainer| class declaration@>=
 class FoldedZContainer : public ZContainer<FGSTensor>,
 						 public FoldedStackContainer {
 public:@;
 	typedef TensorContainer<FGSTensor> _Ctype;
 	FoldedZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
 					 int ny, int nu)
 		: ZContainer<FGSTensor>(gss, ngss, g, ng, ny, nu)@+ {}
 };
@ 
@<|UnfoldedZContainer| class declaration@>=
 class UnfoldedZContainer : public ZContainer<UGSTensor>,
 						   public UnfoldedStackContainer {
 public:@;
 	typedef TensorContainer<UGSTensor> _Ctype;
 	UnfoldedZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
 					   int ny, int nu)
 		: ZContainer<UGSTensor>(gss, ngss, g, ng, ny, nu)@+ {}
 };
@ Here we have another specialization of container used in context of
 SDGE. We define a container for
 $$G(y,u,u',\sigma)=g^{**}(g^*(y,u,\sigma),u',\sigma)$$
 For some reason, the symmetry of $g^{**}$ has length $4$ although it
 is really dependent on three variables. (To now the reason, consult
 |@<|ZContainer| class declaration@>|.) So, it has four stack, the
 third one is dummy, and always returns zero. The first stack
 corresponds to a container of $g^*$.
@<|GContainer| class declaration@>=
 template <class _Ttype>@;
 class GContainer : public StackContainer<_Ttype> {
 public:@;
 	typedef StackContainer<_Ttype> _Tparent;
 	typedef StackContainerInterface<_Ttype> _Stype;
 	typedef typename StackContainer<_Ttype>::_Ctype _Ctype;
 	typedef typename StackContainer<_Ttype>::itype itype;
 	GContainer(const _Ctype* gs, int ngs, int nu)
 		: StackContainer<_Ttype>(4, 1)
 		{
 			_Tparent::stack_sizes[0] = ngs; _Tparent::stack_sizes[1] = nu;
 			_Tparent::stack_sizes[2] = nu; _Tparent::stack_sizes[3] = 1;
 			_Tparent::conts[0] = gs;
 			_Tparent::calculateOffsets();
 		}
 	@<|GContainer::getType| code@>;
 };
@ Here we define the dependencies in
 $g^{**}(g^*(y,u,\sigma),u',\sigma)$. Also note, that first derivative
 of $g^*$ wrt $\sigma$ is always zero, so we also add this
 information.
@<|GContainer::getType| code@>=
 itype getType(int i, const Symmetry& s) const
 {
 	if (i == 0)
 		if (s[2] > 0 || s == Symmetry(0,0,0,1))
 			return _Stype::zero;
 		else
 			return _Stype::matrix;
 	if (i == 1)
 		if (s == Symmetry(0,0,1,0))
 			return _Stype::unit;
 		else
 			return _Stype::zero;
 	if (i == 2)
 		return _Stype::zero;
 	if (i == 3)
 		if (s == Symmetry(0,0,0,1))
 			return _Stype::unit;
 		else
 			return _Stype::zero;
 	TL_RAISE("Wrong stack index in GContainer::getType");
 	return _Stype::zero;
 }
@ 
@<|FoldedGContainer| class declaration@>=
 class FoldedGContainer : public GContainer<FGSTensor>,
 						 public FoldedStackContainer {
 public:@;
 	typedef TensorContainer<FGSTensor> _Ctype;
 	FoldedGContainer(const _Ctype* gs, int ngs, int nu)
 		: GContainer<FGSTensor>(gs, ngs, nu)@+ {}
 };
@ 
@<|UnfoldedGContainer| class declaration@>=
 class UnfoldedGContainer : public GContainer<UGSTensor>,
 						   public UnfoldedStackContainer {
 public:@;
 	typedef TensorContainer<UGSTensor> _Ctype;
 	UnfoldedGContainer(const _Ctype* gs, int ngs, int nu)
 		: GContainer<UGSTensor>(gs, ngs, nu)@+ {}
 };
@ Here we have a support class for product of |StackContainer|s. It
 only adds a dimension to |StackContainer|. It selects the symmetries
 according to equivalence classes passed to the constructor. The
 equivalence can have permuted classes by some given
 permutation. Nothing else is interesting.
@<|StackProduct| class declaration@>=
 template <class _Ttype>@;
 class StackProduct {
 public:@;
 	typedef StackContainerInterface<_Ttype> _Stype;
 	typedef typename _Stype::_Ctype _Ctype;
 	typedef typename _Stype::itype itype;
 protected:@;
 	const _Stype& stack_cont;
 	InducedSymmetries syms;
 	Permutation per;
 public:@;
 	StackProduct(const _Stype& sc, const Equivalence& e,
 				 const Symmetry& os)
 		: stack_cont(sc), syms(e, os), per(e)@+ {}
 	StackProduct(const _Stype& sc, const Equivalence& e,
 				 const Permutation& p, const Symmetry& os)
 		: stack_cont(sc), syms(e, p, os), per(e, p)@+ {}
 	int dimen() const
 		{@+ return syms.size();@+}
 	int getAllSize() const
 		{@+ return stack_cont.getAllSize();@+}
 	const Symmetry& getProdSym(int ip) const
 		{@+ return syms[ip];@+}
 	@<|StackProduct::isZero| code@>;
 	@<|StackProduct::getType| code@>;
 	@<|StackProduct::getMatrix| code@>;
 	@<|StackProduct::createPackedColumns| code@>;
 	@<|StackProduct::getSize| code@>;
 	@<|StackProduct::numMatrices| code@>;
 };
@ 
@<|StackProduct::isZero| code@>=
 bool isZero(const IntSequence& istacks) const
 {
 	TL_RAISE_IF(istacks.size() != dimen(),
 				"Wrong istacks coordinates for StackProduct::isZero");
 	bool res = false;
 	int i = 0;
 	while (i < dimen() && !(res = stack_cont.isZero(istacks[i], syms[i])))
 		i++;
 	return res;
 }
@ 
@<|StackProduct::getType| code@>=
 itype getType(int is, int ip) const
 {
 	TL_RAISE_IF(is < 0 || is >= stack_cont.numStacks(),
 				"Wrong index to stack in StackProduct::getType");
 	TL_RAISE_IF(ip < 0 || ip >= dimen(),
 				"Wrong index to stack container in StackProduct::getType");
 	return stack_cont.getType(is, syms[ip]);
 }
@ 
@<|StackProduct::getMatrix| code@>=
 const _Ttype* getMatrix(int is, int ip) const
 {
 	return stack_cont.getMatrix(is, syms[ip]);
 }
@ 
@<|StackProduct::createPackedColumns| code@>=
 void createPackedColumns(const IntSequence& coor,
 						 Vector** vs, IntSequence& iu) const
 {
 	TL_RAISE_IF(iu.size() != dimen(),
 				"Wrong storage length for unit flags in StackProduct::createPackedColumn");
 	TL_RAISE_IF(coor.size() != per.size(),
 				"Wrong size of index coor in StackProduct::createPackedColumn");
 	IntSequence perindex(coor.size());
 	per.apply(coor, perindex);
 	int off = 0;
 	for (int i = 0; i < dimen(); i++) {
 		IntSequence percoor(perindex, off, syms[i].dimen() + off);
 		vs[i] = stack_cont.createPackedColumn(syms[i], percoor, iu[i]);
 		off += syms[i].dimen();
 	}
 }
@ 
@<|StackProduct::getSize| code@>=
 int getSize(int is) const
 {
 	return stack_cont.getStackSizes()[is];
 }
@ 
@<|StackProduct::numMatrices| code@>=
 int numMatrices(const IntSequence& istacks) const
 {
 	TL_RAISE_IF(istacks.size() != dimen(),
 				"Wrong size of stack coordinates in StackContainer::numMatrices");
 	int ret = 0;
 	int ip = 0;
 	while (ip < dimen() && getType(istacks[ip], ip) == _Stype::matrix) {
 		ret++;
 		ip++;
 	}
 	return ret;
 }
@ Here we only inherit from Kronecker product |KronProdAllOptim|, only to
 allow for a constructor constructing from |StackProduct|.
@<|KronProdStack| class declaration@>=
 template <class _Ttype>
 class KronProdStack : public KronProdAllOptim {
 public:@;
 	typedef StackProduct<_Ttype> _Ptype;
 	typedef StackContainerInterface<_Ttype> _Stype;
 	@<|KronProdStack| constructor code@>;
 };
@ Here we construct |KronProdAllOptim| from |StackContainer| and given
 selections of stack items from stack containers in the product. We
 only decide whether to insert matrix, or unit matrix.
 At this point, we do not call |KronProdAllOptim::optimizeOrder|, so
 the |KronProdStack| behaves like |KronProdAll| (i.e. no optimization
 is done).
@<|KronProdStack| constructor code@>=
 KronProdStack(const _Ptype& sp, const IntSequence& istack)
 	: KronProdAllOptim(sp.dimen())
 {
 	TL_RAISE_IF(sp.dimen() != istack.size(),
 				"Wrong stack product dimension for KronProdStack constructor");
 	for (int i = 0; i < sp.dimen(); i++) {
 		TL_RAISE_IF(sp.getType(istack[i], i) == _Stype::zero,
 					"Attempt to construct KronProdStack from zero matrix");
 		if (sp.getType(istack[i], i) == _Stype::unit)
 			setUnit(i, sp.getSize(istack[i]));
 		if (sp.getType(istack[i], i) == _Stype::matrix) {
 			const TwoDMatrix* m = sp.getMatrix(istack[i], i);
 			TL_RAISE_IF(m->nrows() != sp.getSize(istack[i]),
 						"Wrong size of returned matrix in KronProdStack constructor");
 			setMat(i, *m);
 		}
 	}
 }
@ 
@<|WorkerFoldMAADense| class declaration@>=
 class WorkerFoldMAADense : public THREAD {
 	const FoldedStackContainer& cont;
 	Symmetry sym;
 	const FGSContainer& dense_cont;
 	FGSTensor& out;
 public:@;
 	WorkerFoldMAADense(const FoldedStackContainer& container, 
 					   const Symmetry& s,
 					   const FGSContainer& dcontainer,
 					   FGSTensor& outten);
 	void operator()();
 };
@ 
@<|WorkerFoldMAASparse1| class declaration@>=
 class WorkerFoldMAASparse1 : public THREAD {
 	const FoldedStackContainer& cont;
 	const FSSparseTensor& t;
 	FGSTensor& out;
 	IntSequence coor;
 	const EquivalenceBundle& ebundle;
 public:@;
 	WorkerFoldMAASparse1(const FoldedStackContainer& container,
 						 const FSSparseTensor& ten,
 						 FGSTensor& outten, const IntSequence& c);
 	void operator()();
 };
@ 
@<|WorkerFoldMAASparse2| class declaration@>=
 class WorkerFoldMAASparse2 : public THREAD {
 	const FoldedStackContainer& cont;
 	const FSSparseTensor& t;
 	FGSTensor& out;
 	IntSequence coor;
 public:@;
 	WorkerFoldMAASparse2(const FoldedStackContainer& container,
 						 const FSSparseTensor& ten,
 						 FGSTensor& outten, const IntSequence& c);
 	void operator()();
 };
@ 
@<|WorkerFoldMAASparse4| class declaration@>=
 class WorkerFoldMAASparse4 : public THREAD {
 	const FoldedStackContainer& cont;
 	const FSSparseTensor& t;
 	FGSTensor& out;
 	IntSequence coor;
 public:@;
 	WorkerFoldMAASparse4(const FoldedStackContainer& container,
 						 const FSSparseTensor& ten,
 						 FGSTensor& outten, const IntSequence& c);
 	void operator()();
 };
@ 
@<|WorkerUnfoldMAADense| class declaration@>=
 class WorkerUnfoldMAADense : public THREAD {
 	const UnfoldedStackContainer& cont;
 	Symmetry sym;
 	const UGSContainer& dense_cont;
 	UGSTensor& out;
 public:@;
 	WorkerUnfoldMAADense(const UnfoldedStackContainer& container, 
 						 const Symmetry& s,
 						 const UGSContainer& dcontainer,
 						 UGSTensor& outten);
 	void operator()();
 };
@ 
@<|WorkerUnfoldMAASparse1| class declaration@>=
 class WorkerUnfoldMAASparse1 : public THREAD {
 	const UnfoldedStackContainer& cont;
 	const FSSparseTensor& t;
 	UGSTensor& out;
 	IntSequence coor;
 	const EquivalenceBundle& ebundle;
 public:@;
 	WorkerUnfoldMAASparse1(const UnfoldedStackContainer& container,
 						   const FSSparseTensor& ten,
 						   UGSTensor& outten, const IntSequence& c);
 	void operator()();
 };
@ 
@<|WorkerUnfoldMAASparse2| class declaration@>=
 class WorkerUnfoldMAASparse2 : public THREAD {
 	const UnfoldedStackContainer& cont;
 	const FSSparseTensor& t;
 	UGSTensor& out;
 	IntSequence coor;
 public:@;
 	WorkerUnfoldMAASparse2(const UnfoldedStackContainer& container,
 						   const FSSparseTensor& ten,
 						   UGSTensor& outten, const IntSequence& c);
 	void operator()();
 };
@ End of {\tt stack\_container.h} file.
--- a/dynare++/tl/cc/sthread.cc
+++ b/dynare++/tl/cc/sthread.cc
@ -0,0 +1,232 @@
 // Copyright 2004, Ondra Kamenik
 /* We set the default values for
   |max_parallel_threads| for both |posix| and |empty| implementation and
   both joinable and detach group. For |posix| this defaults to
   uniprocessor machine with hyper-threading, this is 2. */
 #include <cstring>
 #include "sthread.hh"
 #ifdef HAVE_PTHREAD
 namespace sthread
 {
  template<>
  int thread_group<posix>::max_parallel_threads = 2;
  template<>
  int detach_thread_group<posix>::max_parallel_threads = 2;
  // POSIX specializations methods
  void *posix_thread_function(void *c);
  template <>
  void
  thread_traits<posix>::run(_Ctype *c)
  {
    pthread_create(&(c->getThreadIden()), NULL, posix_thread_function, (void *) c);
  }
  void *posix_detach_thread_function(void *c);
  template <>
  void
  thread_traits<posix>::detach_run(_Dtype *c)
  {
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
    pthread_create(&(c->getThreadIden()), &attr, posix_detach_thread_function, (void *) c);
    pthread_attr_destroy(&attr);
  }
  template <>
  void
  thread_traits<posix>::exit()
  {
    pthread_exit(NULL);
  }
  template <>
  void
  thread_traits<posix>::join(_Ctype *c)
  {
    pthread_join(c->getThreadIden(), NULL);
  }
  template <>
  void
  mutex_traits<posix>::init(pthread_mutex_t &m)
  {
    pthread_mutex_init(&m, NULL);
  }
  template <>
  void
  mutex_traits<posix>::lock(pthread_mutex_t &m)
  {
    pthread_mutex_lock(&m);
  }
  template <>
  void
  mutex_traits<posix>::unlock(pthread_mutex_t &m)
  {
    pthread_mutex_unlock(&m);
  }
  template <>
  void
  cond_traits<posix>::init(_Tcond &cond)
  {
    pthread_cond_init(&cond, NULL);
  }
  template <>
  void
  cond_traits<posix>::broadcast(_Tcond &cond)
  {
    pthread_cond_broadcast(&cond);
  }
  template <>
  void
  cond_traits<posix>::wait(_Tcond &cond, _Tmutex &mutex)
  {
    pthread_cond_wait(&cond, &mutex);
  }
  template <>
  void
  cond_traits<posix>::destroy(_Tcond &cond)
  {
    pthread_cond_destroy(&cond);
  }
  /* Here we instantiate the static map, and construct |PosixSynchro|
     using that map. */
  static posix_synchro::mutex_map_t posix_mm;
  PosixSynchro::PosixSynchro(const void *c, const char *id)
    : posix_synchro(c, id, posix_mm)
  {
  }
  /* This function is of the type |void* function(void*)| as required by
     POSIX, but it typecasts its argument and runs |operator()()|. */
  void *
  posix_thread_function(void *c)
  {
    thread_traits<posix>::_Ctype *ct
      = (thread_traits<posix>::_Ctype *)c;
    try
      {
        ct->operator()();
      }
    catch (...)
      {
        ct->exit();
      }
    return NULL;
  }
  void *
  posix_detach_thread_function(void *c)
  {
    thread_traits<posix>::_Dtype *ct
      = (thread_traits<posix>::_Dtype *)c;
    condition_counter<posix> *counter = ct->counter;
    try
      {
        ct->operator()();
      }
    catch (...)
      {
        ct->exit();
      }
    if (counter)
      counter->decrease();
    return NULL;
  }
 }
 #else
 namespace sthread
 {
  template<>
  int thread_group<empty>::max_parallel_threads = 1;
  template<>
  int detach_thread_group<empty>::max_parallel_threads = 1;
  // non-threading specialization methods
  /* The only trait methods we need to work are |thread_traits::run| and
     |thread_traits::detach_run|, which directly call
     |operator()()|. Anything other is empty. */
  template <>
  void
  thread_traits<empty>::run(_Ctype *c)
  {
    c->operator()();
  }
  template <>
  void
  thread_traits<empty>::detach_run(_Dtype *c)
  {
    c->operator()();
  }
  template <>
  void
  thread_traits<empty>::exit()
  {
  }
  template <>
  void
  thread_traits<empty>::join(_Ctype *c)
  {
  }
  template <>
  void
  mutex_traits<empty>::init(Empty &m)
  {
  }
  template <>
  void
  mutex_traits<empty>::lock(Empty &m)
  {
  }
  template <>
  void
  mutex_traits<empty>::unlock(Empty &m)
  {
  }
  template <>
  void
  cond_traits<empty>::init(_Tcond &cond)
  {
  }
  template <>
  void
  cond_traits<empty>::broadcast(_Tcond &cond)
  {
  }
  template <>
  void
  cond_traits<empty>::wait(_Tcond &cond, _Tmutex &mutex)
  {
  }
  template <>
  void
  cond_traits<empty>::destroy(_Tcond &cond)
  {
  }
 }
 #endif
--- a/dynare++/tl/cc/sthread.cweb
+++ b/dynare++/tl/cc/sthread.cweb
@ -1,224 +0,0 @@
@q $Id: sthread.cweb 2269 2008-11-23 14:33:22Z michel $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt sthreads.h} file. We set the default values for
 |max_parallel_threads| for both |posix| and |empty| implementation and
 both joinable and detach group. For |posix| this defaults to
 uniprocessor machine with hyper-threading, this is 2.
@c
 #include <cstring>
 #include "sthread.h"
 #ifdef HAVE_PTHREAD
 namespace sthread {
 	template<>
 	int thread_group<posix>::max_parallel_threads = 2;
 	template<>
 	int detach_thread_group<posix>::max_parallel_threads = 2;
 	@<POSIX specializations methods@>;
 }
 #else
 namespace sthread {
 	template<>
 	int thread_group<empty>::max_parallel_threads = 1;
 	template<>
 	int detach_thread_group<empty>::max_parallel_threads = 1;
 	@<non-threading specialization methods@>;
 }
 #endif
@ 
@<POSIX specializations methods@>=
 	@<|thread_traits| method codes@>;
 	@<|mutex_traits| method codes@>;
 	@<|cond_traits| method codes@>;
 	@<|PosixSynchro| constructor@>;
 	@<|posix_thread_function| code@>;
 	@<|posix_detach_thread_function| code@>;
@ 
@<|thread_traits| method codes@>=
 void* posix_thread_function(void* c);
 template <>
 void thread_traits<posix>::run(_Ctype* c)
 {
 	pthread_create(&(c->getThreadIden()), NULL, posix_thread_function, (void*) c);
 }
@#
 void* posix_detach_thread_function(void* c);
 template <>
 void thread_traits<posix>::detach_run(_Dtype* c)
 {
 	pthread_attr_t attr;
 	pthread_attr_init(&attr);
 	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 	pthread_create(&(c->getThreadIden()), &attr, posix_detach_thread_function, (void*) c);
 	pthread_attr_destroy(&attr);
 }
@#
 template <>
 void thread_traits<posix>::exit()
 {
 	pthread_exit(NULL);
 }
@#
 template <>
 void thread_traits<posix>::join(_Ctype* c)
 {
 	pthread_join(c->getThreadIden(), NULL);
 }
@ 
@<|mutex_traits| method codes@>=
 template <>
 void mutex_traits<posix>::init(pthread_mutex_t& m)
 {
 	pthread_mutex_init(&m, NULL);
 }
@#
 template <>
 void mutex_traits<posix>::lock(pthread_mutex_t& m)
 {
 	pthread_mutex_lock(&m);
 }
@#
 template <>
 void mutex_traits<posix>::unlock(pthread_mutex_t& m)
 {
 	pthread_mutex_unlock(&m);
 }
@ 
@<|cond_traits| method codes@>=
 template <>
 void cond_traits<posix>::init(_Tcond& cond)
 {
 	pthread_cond_init(&cond, NULL);
 }
@#
 template <>
 void cond_traits<posix>::broadcast(_Tcond& cond)
 {
 	pthread_cond_broadcast(&cond);
 }
@#
 template <>
 void cond_traits<posix>::wait(_Tcond& cond, _Tmutex& mutex)
 {
 	pthread_cond_wait(&cond, &mutex);
 }
@#
 template <>
 void cond_traits<posix>::destroy(_Tcond& cond)
 {
 	pthread_cond_destroy(&cond);
 }
@ Here we instantiate the static map, and construct |PosixSynchro|
 using that map.
@<|PosixSynchro| constructor@>=
 static posix_synchro::mutex_map_t posix_mm;
 PosixSynchro::PosixSynchro(const void* c, const char* id)
 	: posix_synchro(c, id, posix_mm) {}
@ This function is of the type |void* function(void*)| as required by
 POSIX, but it typecasts its argument and runs |operator()()|.
@<|posix_thread_function| code@>=
 void* posix_thread_function(void* c)
 {
 	thread_traits<posix>::_Ctype* ct =
 		(thread_traits<posix>::_Ctype*)c;
 	try {
 		ct->operator()();
 	} catch (...) {
 		ct->exit();
 	}
 	return NULL;
 }
@ 
@<|posix_detach_thread_function| code@>=
 void* posix_detach_thread_function(void* c)
 {
 	thread_traits<posix>::_Dtype* ct =
 		(thread_traits<posix>::_Dtype*)c;
 	condition_counter<posix>* counter = ct->counter;
 	try {
 		ct->operator()();
 	} catch (...) {
 		ct->exit();
 	}
 	if (counter)
 		counter->decrease();
 	return NULL;
 }
@ The only trait methods we need to work are |thread_traits::run| and
 |thread_traits::detach_run|, which directly call
 |operator()()|. Anything other is empty.
@<non-threading specialization methods@>=
 template <>
 void thread_traits<empty>::run(_Ctype* c)
 {
 	c->operator()();
 }
 template <>
 void thread_traits<empty>::detach_run(_Dtype* c)
 {
 	c->operator()();
 }
@#
 template <>
 void thread_traits<empty>::exit()
 {
 }
@#
 template <>
 void thread_traits<empty>::join(_Ctype* c)
 {
 }
@#
 template <>
 void mutex_traits<empty>::init(Empty& m)
 {
 }
@#
 template <>
 void mutex_traits<empty>::lock(Empty& m)
 {
 }
@#
 template <>
 void mutex_traits<empty>::unlock(Empty& m)
 {
 }
@#
 template <>
 void cond_traits<empty>::init(_Tcond& cond)
 {
 }
@#
 template <>
 void cond_traits<empty>::broadcast(_Tcond& cond)
 {
 }
@#
 template <>
 void cond_traits<empty>::wait(_Tcond& cond, _Tmutex& mutex)
 {
 }
@#
 template <>
 void cond_traits<empty>::destroy(_Tcond& cond)
 {
 }
@ End of {\tt sthreads.h} file.
--- a/dynare++/tl/cc/sthread.hh
+++ b/dynare++/tl/cc/sthread.hh
@ -0,0 +1,627 @@
 // Copyright 2004, Ondra Kamenik
 // Simple threads.
 /* This file defines types making a simple interface to
   multi-threading. It follows the classical C++ idioms for traits. We
   have three sorts of traits. The first is a |thread_traits|, which make
   interface to thread functions (run, exit, create and join), the second
   is |mutex_traits|, which make interface to mutexes (create, lock,
   unlock), and third is |cond_traits|, which make interface to
   conditions (create, wait, broadcast, and destroy). At present, there
   are two implementations. The first are POSIX threads, mutexes, and
   conditions, the second is serial (no parallelization).
   The file provides the following interfaces templated by the types
   implementing the threading (like types |pthread_t|, and |pthread_mutex_t|
   for POSIX thread and mutex):
   \unorderedlist
   \li |thread| is a pure virtual class, which must be inherited and a
   method |operator()()| be implemented as the running code of the
   thread. This code is run as a new thread by calling |run| method.
   \li |thread_group| allows insertion of |thread|s and running all of
   them simultaneously joining them. The number of maximum parallel
   threads can be controlled. See below.
   \li |synchro| object locks a piece of code to be executed only serially
   for a given data and specified entry-point. It locks the code until it
   is destructed. So, the typical use is to create the |synchro| object
   on the stack of a function which is to be synchronized. The
   synchronization can be subjected to specific data (then a pointer can
   be passed to |synchro|'s constructor), and can be subjected to
   specific entry-point (then |const char*| is passed to the
   constructor).
   \li |detach_thread| inherits from |thread| and models a detached
   thread in contrast to |thread| which models the joinable thread.
   \li |detach_thread_group| groups the detached threads and runs them. They
   are not joined, they are synchronized by means of a counter counting
   running threads. A change of the counter is checked by waiting on an
   associated condition.
   \endunorderedlist
   What implementation is selected is governed (at present) by
   |HAVE_PTHREAD|. If it is defined, then POSIX threads are linked. If
   it is not defined, then serial implementation is taken. In accordance
   with this, the header file defines macros |THREAD|, |THREAD_GROUP|,
   and |SYNCHRO| as the picked specialization of |thread| (or |detach_thread|),
   |thread_group| (or |detach_thread_group|), and |synchro|.
   The type of implementation is controlled by |thread_impl| integer
   template parameter, this can be |posix| or |empty|.
   The number of maximum parallel threads is controlled via a static
   member of |thread_group| and |detach_thread_group| classes. */
 #ifndef STHREAD_H
 #define STHREAD_H
 #ifdef HAVE_PTHREAD
 # include <pthread.h>
 #else
 /* Give valid types for POSIX thread types, otherwise the templates fail in empty mode.
   Don't use typedefs because on some systems |pthread_t| and friends are typedefs even
   without the include. */
 # define pthread_t void *
 # define pthread_mutex_t void *
 # define pthread_cond_t void *
 #endif
 #include <cstdio>
 #include <list>
 #include <map>
 namespace sthread
 {
  using namespace std;
  class Empty
  {
  };
  // classical IF template
  /* Here is the classical IF template. */
  template<bool condition, class Then, class Else>
  struct IF
  {
    typedef Then RET;
  };
  template<class Then, class Else>
  struct IF<false, Then, Else>
  {
    typedef Else RET;
  };
  enum { posix, empty};
  template <int>
  class thread_traits;
  template <int>
  class detach_thread;
  /* The class of |thread| is clear. The user implements |operator()()|,
     the method |run| runs the user's code as joinable thread, |exit| kills the
     execution. */
  template <int thread_impl>
  class thread
  {
    typedef thread_traits<thread_impl> _Ttraits;
    typedef typename _Ttraits::_Tthread _Tthread;
    _Tthread th;
  public:
    virtual ~thread()
    {
    }
    _Tthread &
    getThreadIden()
    {
      return th;
    }
    const _Tthread &
    getThreadIden() const
    {
      return th;
    }
    virtual void operator()() = 0;
    void
    run()
    {
      _Ttraits::run(this);
    }
    void
    detach_run()
    {
      _Ttraits::detach_run(this);
    }
    void
    exit()
    {
      _Ttraits::exit();
    }
  };
  /* The |thread_group| is also clear. We allow a user to insert the
     |thread|s, and then launch |run|, which will run all the threads not
     allowing more than |max_parallel_threads| joining them at the
     end. This static member can be set from outside. */
  template <int thread_impl>
  class thread_group
  {
    typedef thread_traits<thread_impl> _Ttraits;
    typedef thread<thread_impl> _Ctype;
    list<_Ctype *> tlist;
    typedef typename list<_Ctype *>::iterator iterator;
  public:
    static int max_parallel_threads;
    void
    insert(_Ctype *c)
    {
      tlist.push_back(c);
    }
    /* The thread group class maintains list of pointers to threads. It
       takes responsibility of deallocating the threads. So we implement the
       destructor. */
    ~thread_group()
    {
      while (!tlist.empty())
        {
          delete tlist.front();
          tlist.pop_front();
        }
    }
    /* Here we run the threads ensuring that not more than
       |max_parallel_threads| are run in parallel. More over, we do not want
       to run a too low number of threads, since it is wasting with resource
       (if there are). Therefore, we run in parallel |max_parallel_threads|
       batches as long as the remaining threads are greater than the double
       number. And then the remaining batch (less than |2*max_parallel_threads|)
       is run half by half. */
    void
    run()
    {
      int rem = tlist.size();
      iterator pfirst = tlist.begin();
      while (rem > 2*max_parallel_threads)
        {
          pfirst = run_portion(pfirst, max_parallel_threads);
          rem -= max_parallel_threads;
        }
      if (rem > max_parallel_threads)
        {
          pfirst = run_portion(pfirst, rem/2);
          rem -= rem/2;
        }
      run_portion(pfirst, rem);
    }
  private:
    /* This runs a given number of threads in parallel starting from the
       given iterator. It returns the first iterator not run. */
    iterator
    run_portion(iterator start, int n)
    {
      int c = 0;
      for (iterator i = start; c < n; ++i, c++)
        {
          (*i)->run();
        }
      iterator ret;
      c = 0;
      for (ret = start; c < n; ++ret, c++)
        {
          _Ttraits::join(*ret);
        }
      return ret;
    }
  };
  /* Clear. We have only |run|, |detach_run|, |exit| and |join|, since
     this is only a simple interface. */
  template <int thread_impl>
  struct thread_traits
  {
    typedef typename IF<thread_impl == posix, pthread_t, Empty>::RET _Tthread;
    typedef thread<thread_impl> _Ctype;
    typedef detach_thread<thread_impl> _Dtype;
    static void run(_Ctype *c);
    static void detach_run(_Dtype *c);
    static void exit();
    static void join(_Ctype *c);
  };
  /* Clear. We have only |init|, |lock|, and |unlock|. */
  struct ltmmkey;
  typedef pair<const void *, const char *> mmkey;
  template <int thread_impl>
  struct mutex_traits
  {
    typedef typename IF<thread_impl == posix, pthread_mutex_t, Empty>::RET _Tmutex;
    typedef map<mmkey, pair<_Tmutex, int>, ltmmkey> mutex_int_map;
    static void init(_Tmutex &m);
    static void lock(_Tmutex &m);
    static void unlock(_Tmutex &m);
  };
  /* Here we define a map of mutexes keyed by a pair of address, and a
     string. A purpose of the map of mutexes is that, if synchronizing, we
     need to publish mutexes locking some piece of codes (characterized by
     the string) accessing the data (characterized by the pointer). So, if
     any thread needs to pass a |synchro| object, it creates its own with
     the same address and string, and must look to some public storage to
     unlock the mutex. If the |synchro| object is created for the first
     time, the mutex is created and inserted to the map. We count the
     references to the mutex (number of waiting threads) to know, when it
     is save to remove the mutex from the map. This is the only purpose of
     counting the references. Recall, that the mutex is keyed by an address
     of the data, and without removing, the number of mutexes would only
     grow.
     The map itself needs its own mutex to avoid concurrent insertions and
     deletions. */
  struct ltmmkey
  {
    bool
    operator()(const mmkey &k1, const mmkey &k2) const
    {
      return k1.first < k2.first
                        || (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);
    }
  };
  template <int thread_impl>
  class mutex_map :
    public mutex_traits<thread_impl>::mutex_int_map
  {
    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
    typedef mutex_traits<thread_impl> _Mtraits;
    typedef pair<_Tmutex, int> mmval;
    typedef map<mmkey, mmval, ltmmkey> _Tparent;
    typedef typename _Tparent::iterator iterator;
    typedef typename _Tparent::value_type _mvtype;
    _Tmutex m;
  public:
    mutex_map()
    {
      _Mtraits::init(m);
    }
    void
    insert(const void *c, const char *id, const _Tmutex &m)
    {
      _Tparent::insert(_mvtype(mmkey(c, id), mmval(m, 0)));
    }
    bool
    check(const void *c, const char *id) const
    {
      return _Tparent::find(mmkey(c, id)) != _Tparent::end();
    }
    /* This returns a pointer to the pair of mutex and count reference number. */
    mmval *
    get(const void *c, const char *id)
    {
      iterator it = _Tparent::find(mmkey(c, id));
      if (it == _Tparent::end())
        return NULL;
      return &((*it).second);
    }
    /* This removes unconditionally the mutex from the map regardless its
       number of references. The only user of this class should be |synchro|
       class, it implementation must not remove referenced mutex. */
    void
    remove(const void *c, const char *id)
    {
      iterator it = _Tparent::find(mmkey(c, id));
      if (it != _Tparent::end())
        this->erase(it);
    }
    void
    lock_map()
    {
      _Mtraits::lock(m);
    }
    void
    unlock_map()
    {
      _Mtraits::unlock(m);
    }
  };
  /* This is the |synchro| class. The constructor of this class tries to
     lock a mutex for a particular address (identification of data) and
     string (identification of entry-point). If the mutex is already
     locked, it waits until it is unlocked and then returns. The destructor
     releases the lock. The typical use is to construct the object on the
     stacked of the code being synchronized. */
  template <int thread_impl>
  class synchro
  {
    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
    typedef mutex_traits<thread_impl> _Mtraits;
  public:
    typedef mutex_map<thread_impl> mutex_map_t;
  private:
    const void *caller;
    const char *iden;
    mutex_map_t &mutmap;
  public:
    synchro(const void *c, const char *id, mutex_map_t &mmap)
      : caller(c), iden(id), mutmap(mmap)
    {
      lock();
    }
    ~synchro()
    {
      unlock();
    }
  private:
    /* The |lock| function acquires the mutex in the map. First it tries to
       get an exclusive access to the map. Then it increases a number of
       references of the mutex (if it does not exists, it inserts it). Then
       unlocks the map, and finally tries to lock the mutex of the map. */
    void
    lock()
    {
      mutmap.lock_map();
      if (!mutmap.check(caller, iden))
        {
          _Tmutex mut;
          _Mtraits::init(mut);
          mutmap.insert(caller, iden, mut);
        }
      mutmap.get(caller, iden)->second++;
      mutmap.unlock_map();
      _Mtraits::lock(mutmap.get(caller, iden)->first);
    }
    /* The |unlock| function first locks the map. Then releases the lock,
       and decreases a number of references. If it is zero, it removes the
       mutex. */
    void
    unlock()
    {
      mutmap.lock_map();
      if (mutmap.check(caller, iden))
        {
          _Mtraits::unlock(mutmap.get(caller, iden)->first);
          mutmap.get(caller, iden)->second--;
          if (mutmap.get(caller, iden)->second == 0)
            mutmap.remove(caller, iden);
        }
      mutmap.unlock_map();
    }
  };
  /* These are traits for conditions. We need |init|, |broadcast|, |wait|
     and |destroy|. */
  template <int thread_impl>
  struct cond_traits
  {
    typedef typename IF<thread_impl == posix, pthread_cond_t, Empty>::RET _Tcond;
    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
    static void init(_Tcond &cond);
    static void broadcast(_Tcond &cond);
    static void wait(_Tcond &cond, _Tmutex &mutex);
    static void destroy(_Tcond &cond);
  };
  /* Here is the condition counter. It is a counter which starts at 0,
     and can be increased and decreased. A thread can wait until the
     counter is changed, this is implemented by condition. After the wait
     is done, another (or the same) thread, by calling |waitForChange|
     waits for another change. This can be dangerous, since it is possible
     to wait for a change which will not happen, because all the threads
     which can cause the change (by increase of decrease) might had
     finished. */
  template <int thread_impl>
  class condition_counter
  {
    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
    typedef typename cond_traits<thread_impl>::_Tcond _Tcond;
    int counter;
    _Tmutex mut;
    _Tcond cond;
    bool changed;
  public:
    /* We initialize the counter to 0, and |changed| flag to |true|, since
       the counter was change from undefined value to 0. */
    condition_counter()
      : counter(0), changed(true)
    {
      mutex_traits<thread_impl>::init(mut);
      cond_traits<thread_impl>::init(cond);
    }
    /* In destructor, we only release the resources associated with the
       condition. */
    ~condition_counter()
    {
      cond_traits<thread_impl>::destroy(cond);
    }
    /* When increasing, we lock the mutex, advance the counter, remember it
       is changed, broadcast, and release the mutex. */
    void
    increase()
    {
      mutex_traits<thread_impl>::lock(mut);
      counter++;
      changed = true;
      cond_traits<thread_impl>::broadcast(cond);
      mutex_traits<thread_impl>::unlock(mut);
    }
    /* Same as increase. */
    void
    decrease()
    {
      mutex_traits<thread_impl>::lock(mut);
      counter--;
      changed = true;
      cond_traits<thread_impl>::broadcast(cond);
      mutex_traits<thread_impl>::unlock(mut);
    }
    /* We lock the mutex, and if there was a change since the last call of
       |waitForChange|, we return immediately, otherwise we wait for the
       change. The mutex is released. */
    int
    waitForChange()
    {
      mutex_traits<thread_impl>::lock(mut);
      if (!changed)
        {
          cond_traits<thread_impl>::wait(cond, mut);
        }
      changed = false;
      int res = counter;
      mutex_traits<thread_impl>::unlock(mut);
      return res;
    }
  };
  /* The detached thread is the same as joinable |thread|. We only
     re-implement |run| method to call |thread_traits::detach_run|, and add
     a method which installs a counter. The counter is increased and
     decreased on the body of the new thread. */
  template <int thread_impl>
  class detach_thread : public thread<thread_impl>
  {
  public:
    condition_counter<thread_impl> *counter;
    detach_thread() : counter(NULL)
    {
    }
    void
    installCounter(condition_counter<thread_impl> *c)
    {
      counter = c;
    }
    void
    run()
    {
      thread_traits<thread_impl>::detach_run(this);
    }
  };
  /* The detach thread group is (by interface) the same as
     |thread_group|. The extra thing we have here is the |counter|. The
     implementation of |insert| and |run| is different. */
  template<int thread_impl>
  class detach_thread_group
  {
    typedef thread_traits<thread_impl> _Ttraits;
    typedef cond_traits<thread_impl> _Ctraits;
    typedef detach_thread<thread_impl> _Ctype;
    list<_Ctype *> tlist;
    typedef typename list<_Ctype *>::iterator iterator;
    condition_counter<thread_impl> counter;
  public:
    static int max_parallel_threads;
    /* When inserting, the counter is installed to the thread. */
    void
    insert(_Ctype *c)
    {
      tlist.push_back(c);
      c->installCounter(&counter);
    }
    /* The destructor is clear. */
    ~detach_thread_group()
    {
      while (!tlist.empty())
        {
          delete tlist.front();
          tlist.pop_front();
        }
    }
    /* We cycle through all threads in the group, and in each cycle we wait
       for the change in the |counter|. If the counter indicates less than
       maximum parallel threads running, then a new thread is run, and the
       iterator in the list is moved.
       At the end we have to wait for all thread to finish. */
    void
    run()
    {
      int mpt = max_parallel_threads;
      iterator it = tlist.begin();
      while (it != tlist.end())
        {
          if (counter.waitForChange() < mpt)
            {
              counter.increase();
              (*it)->run();
              ++it;
            }
        }
      while (counter.waitForChange() > 0)
        {
        }
    }
  };
 #ifdef HAVE_PTHREAD
  // POSIX thread specializations
  /* Here we only define the specializations for POSIX threads. Then we
     define the macros. Note that the |PosixSynchro| class construct itself
     from the static map defined in {\tt sthreads.cpp}. */
  typedef detach_thread<posix> PosixThread;
  typedef detach_thread_group<posix> PosixThreadGroup;
  typedef synchro<posix> posix_synchro;
  class PosixSynchro : public posix_synchro
  {
  public:
    PosixSynchro(const void *c, const char *id);
  };
 # define THREAD sthread::PosixThread
 # define THREAD_GROUP sthread::PosixThreadGroup
 # define SYNCHRO sthread::PosixSynchro
 #else
  // No threading specializations@>=
  /* Here we define an empty class and use it as thread and
     mutex. |NoSynchro| class is also empty, but an empty constructor is
     declared. The empty destructor is declared only to avoid ``unused
     variable warning''. */
  typedef thread<empty> NoThread;
  typedef thread_group<empty> NoThreadGroup;
  typedef synchro<empty> no_synchro;
  class NoSynchro
  {
  public:
    NoSynchro(const void *c, const char *id)
    {
    }
    ~NoSynchro()
    {
    }
  };
 # define THREAD sthread::NoThread
 # define THREAD_GROUP sthread::NoThreadGroup
 # define SYNCHRO sthread::NoSynchro
 #endif
 };
 #endif
--- a/dynare++/tl/cc/sthread.hweb
+++ b/dynare++/tl/cc/sthread.hweb
@ -1,625 +0,0 @@
@q $Id: sthread.hweb 411 2005-08-11 12:26:13Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Simple threads. Start of {\tt sthreads.h} file.
 This file defines types making a simple interface to
 multi-threading. It follows the classical C++ idioms for traits. We
 have three sorts of traits. The first is a |thread_traits|, which make
 interface to thread functions (run, exit, create and join), the second
 is |mutex_traits|, which make interface to mutexes (create, lock,
 unlock), and third is |cond_traits|, which make interface to
 conditions (create, wait, broadcast, and destroy). At present, there
 are two implementations. The first are POSIX threads, mutexes, and
 conditions, the second is serial (no parallelization).
 The file provides the following interfaces templated by the types
 implementing the threading (like types |pthread_t|, and |pthread_mutex_t|
 for POSIX thread and mutex):
 \unorderedlist
 \li |thread| is a pure virtual class, which must be inherited and a
 method |operator()()| be implemented as the running code of the
 thread. This code is run as a new thread by calling |run| method.
 \li |thread_group| allows insertion of |thread|s and running all of
 them simultaneously joining them. The number of maximum parallel
 threads can be controlled. See below.
 \li |synchro| object locks a piece of code to be executed only serially
 for a given data and specified entry-point. It locks the code until it
 is destructed. So, the typical use is to create the |synchro| object
 on the stack of a function which is to be synchronized. The
 synchronization can be subjected to specific data (then a pointer can
 be passed to |synchro|'s constructor), and can be subjected to
 specific entry-point (then |const char*| is passed to the
 constructor).
 \li |detach_thread| inherits from |thread| and models a detached
 thread in contrast to |thread| which models the joinable thread.
 \li |detach_thread_group| groups the detached threads and runs them. They
 are not joined, they are synchronized by means of a counter counting
 running threads. A change of the counter is checked by waiting on an
 associated condition.
 \endunorderedlist
 What implementation is selected is governed (at present) by
 |HAVE_PTHREAD|. If it is defined, then POSIX threads are linked. If
 it is not defined, then serial implementation is taken. In accordance
 with this, the header file defines macros |THREAD|, |THREAD_GROUP|,
 and |SYNCHRO| as the picked specialization of |thread| (or |detach_thread|),
 |thread_group| (or |detach_thread_group|), and |synchro|.
 The type of implementation is controlled by |thread_impl| integer
 template parameter, this can be |posix| or |empty|.
 The number of maximum parallel threads is controlled via a static
 member of |thread_group| and |detach_thread_group| classes.
@s _Tthread int
@s thread_traits int
@s thread int
@s thread_group int
@s detach_thread int
@s detach_thread_group int
@s cond_traits int
@s condition_counter int
@s mutex_traits int
@s mutex_map int
@s synchro int
@s _Tmutex int
@s pthread_t int
@s pthread_mutex_t int
@s pthread_cond_t int
@s pthread_attr_t int
@s IF int
@s Then int
@s Else int
@s RET int
@s thread_impl int
@c
 #ifndef STHREAD_H
 #define STHREAD_H
 #ifdef HAVE_PTHREAD
 # include <pthread.h>
 #else
 /* Give valid types for POSIX thread types, otherwise the templates fail in empty mode.
   Don't use typedefs because on some systems |pthread_t| and friends are typedefs even
   without the include. */
 # define pthread_t void *
 # define pthread_mutex_t void *
 # define pthread_cond_t void *
 #endif
 #include <cstdio>
 #include <list>
 #include <map>
 namespace sthread {
 	using namespace std;
 	class Empty {};
 	@<classical IF template@>;
 	enum {@+ posix, empty@+};
 	template <int> class thread_traits;
 	template <int> class detach_thread;
 	@<|thread| template class declaration@>;
 	@<|thread_group| template class declaration@>;
 	@<|thread_traits| template class declaration@>;
 	@<|mutex_traits| template class declaration@>;
 	@<|mutex_map| template class declaration@>;
 	@<|synchro| template class declaration@>;
 	@<|cond_traits| template class declaration@>;
 	@<|condition_counter| template class declaration@>;
 	@<|detach_thread| template class declaration@>;
 	@<|detach_thread_group| template class declaration@>;
 #ifdef HAVE_PTHREAD
 	@<POSIX thread specializations@>;
 #else
 	@<No threading specializations@>;
 #endif
 };
 #endif
@ Here is the classical IF template.
@<classical IF template@>=
 template<bool condition, class Then, class Else>
 struct IF {
 	typedef Then RET;
 };
 template<class Then, class Else>
 struct IF<false, Then, Else> {
 	typedef Else RET;
 };
@ The class of |thread| is clear. The user implements |operator()()|,
 the method |run| runs the user's code as joinable thread, |exit| kills the
 execution.
@<|thread| template class declaration@>=
 template <int thread_impl>
 class thread {
 	typedef thread_traits<thread_impl> _Ttraits; 
 	typedef typename _Ttraits::_Tthread _Tthread;
 	_Tthread th;
 public:@;
 	virtual ~thread() {}
 	_Tthread& getThreadIden()
 		{@+ return th;@+}
 	const _Tthread& getThreadIden() const
 		{@+ return th;@+}
 	virtual void operator()() = 0;
 	void run()
 		{@+ _Ttraits::run(this);@+}
 	void detach_run()
 		{@+ _Ttraits::detach_run(this);@+}
 	void exit()
 		{@+ _Ttraits::exit();@+}
 };
@ The |thread_group| is also clear. We allow a user to insert the
 |thread|s, and then launch |run|, which will run all the threads not
 allowing more than |max_parallel_threads| joining them at the
 end. This static member can be set from outside.
@<|thread_group| template class declaration@>=
 template <int thread_impl>
 class thread_group {
 	typedef thread_traits<thread_impl> _Ttraits;
 	typedef thread<thread_impl> _Ctype;
 	list<_Ctype*> tlist;
 	typedef typename list<_Ctype*>::iterator iterator;
 public:@;
 	static int max_parallel_threads;
 	void insert(_Ctype* c)
 		{@+ tlist.push_back(c);@+}
 	@<|thread_group| destructor code@>;
 	@<|thread_group::run| code@>;
 private:@;
 	@<|thread_group::run_portion| code@>;
 };
@ The thread group class maintains list of pointers to threads. It
 takes responsibility of deallocating the threads. So we implement the
 destructor.
@<|thread_group| destructor code@>=
 ~thread_group()
 {
 	while (! tlist.empty()) {
 		delete tlist.front();
 		tlist.pop_front();
 	}
 }
@ This runs a given number of threads in parallel starting from the
 given iterator. It returns the first iterator not run.
@<|thread_group::run_portion| code@>=
 iterator run_portion(iterator start, int n)
 {
 	int c = 0;
 	for (iterator i = start; c < n; ++i, c++) {
 		(*i)->run();
 	}
 	iterator ret;
 	c = 0;
 	for (ret = start; c < n; ++ret, c++) {
 		_Ttraits::join(*ret);
 	}
 	return ret;
 }
@ Here we run the threads ensuring that not more than
 |max_parallel_threads| are run in parallel. More over, we do not want
 to run a too low number of threads, since it is wasting with resource
 (if there are). Therefore, we run in parallel |max_parallel_threads|
 batches as long as the remaining threads are greater than the double
 number. And then the remaining batch (less than |2*max_parallel_threads|)
 is run half by half.
@<|thread_group::run| code@>=
 void run()
 {
 	int rem = tlist.size();
 	iterator pfirst = tlist.begin();
 	while (rem > 2*max_parallel_threads) {
 		pfirst = run_portion(pfirst, max_parallel_threads);
 		rem -= max_parallel_threads;
 	}
 	if (rem > max_parallel_threads) {
 		pfirst = run_portion(pfirst, rem/2);
 		rem -= rem/2;
 	}
 	run_portion(pfirst, rem);
 }
@ Clear. We have only |run|, |detach_run|, |exit| and |join|, since
 this is only a simple interface.
@<|thread_traits| template class declaration@>=
 template <int thread_impl>
 struct thread_traits {
 	typedef typename IF<thread_impl==posix, pthread_t, Empty>::RET _Tthread;
 	typedef thread<thread_impl> _Ctype;
 	typedef detach_thread<thread_impl> _Dtype;
 	static void run(_Ctype* c);
 	static void detach_run(_Dtype* c);
 	static void exit();
 	static void join(_Ctype* c);
 };
@ Clear. We have only |init|, |lock|, and |unlock|.
@<|mutex_traits| template class declaration@>=
 struct ltmmkey;
 typedef pair<const void*, const char*> mmkey;
@#
 template <int thread_impl>
 struct mutex_traits {
 	typedef typename IF<thread_impl==posix, pthread_mutex_t, Empty>::RET _Tmutex;
 	typedef map<mmkey, pair<_Tmutex, int>, ltmmkey> mutex_int_map;
 	static void init(_Tmutex& m);
 	static void lock(_Tmutex& m);
 	static void unlock(_Tmutex& m);
 };
@ Here we define a map of mutexes keyed by a pair of address, and a
 string. A purpose of the map of mutexes is that, if synchronizing, we
 need to publish mutexes locking some piece of codes (characterized by
 the string) accessing the data (characterized by the pointer). So, if
 any thread needs to pass a |synchro| object, it creates its own with
 the same address and string, and must look to some public storage to
 unlock the mutex. If the |synchro| object is created for the first
 time, the mutex is created and inserted to the map. We count the
 references to the mutex (number of waiting threads) to know, when it
 is save to remove the mutex from the map. This is the only purpose of
 counting the references. Recall, that the mutex is keyed by an address
 of the data, and without removing, the number of mutexes would only
 grow.
 The map itself needs its own mutex to avoid concurrent insertions and
 deletions.
@s mutex_int_map int
@<|mutex_map| template class declaration@>=
 struct ltmmkey {
 	bool operator()(const mmkey& k1, const mmkey& k2) const
 		{return k1.first < k2.first ||
 			 (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);} 
 };
@#
 template <int thread_impl>
 class mutex_map
 	: public mutex_traits<thread_impl>::mutex_int_map
 {
 	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
 	typedef mutex_traits<thread_impl> _Mtraits;
 	typedef pair<_Tmutex, int> mmval;
 	typedef map<mmkey, mmval, ltmmkey> _Tparent;
 	typedef typename _Tparent::iterator iterator;
 	typedef typename _Tparent::value_type _mvtype;
 	_Tmutex m;
 public:@;
 	mutex_map()
 		{@+ _Mtraits::init(m);@+}
 	void insert(const void* c, const char* id, const _Tmutex& m)
 		{@+ _Tparent::insert(_mvtype(mmkey(c,id), mmval(m,0)));@+}
 	bool check(const void* c, const char* id) const
 		{@+ return _Tparent::find(mmkey(c, id)) != _Tparent::end();@+}
 	@<|mutex_map::get| code@>;
 	@<|mutex_map::remove| code@>;
 	void lock_map()
 		{@+ _Mtraits::lock(m);@+}
 	void unlock_map()
 		{@+ _Mtraits::unlock(m);@+}
 };
@ This returns a pointer to the pair of mutex and count reference number.
@<|mutex_map::get| code@>=
 mmval* get(const void* c, const char* id)
 {
 	iterator it = _Tparent::find(mmkey(c, id));
 	if (it == _Tparent::end())
 		return NULL;
 	return &((*it).second);
 }
@ This removes unconditionally the mutex from the map regardless its
 number of references. The only user of this class should be |synchro|
 class, it implementation must not remove referenced mutex.
@<|mutex_map::remove| code@>=
 void remove(const void* c, const char* id)
 {
 	iterator it = _Tparent::find(mmkey(c, id));
 	if (it != _Tparent::end())
 		this->erase(it);
 }
@ This is the |synchro| class. The constructor of this class tries to
 lock a mutex for a particular address (identification of data) and
 string (identification of entry-point). If the mutex is already
 locked, it waits until it is unlocked and then returns. The destructor
 releases the lock. The typical use is to construct the object on the
 stacked of the code being synchronized.
@<|synchro| template class declaration@>=
 template <int thread_impl>
 class synchro {
 	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
 	typedef mutex_traits<thread_impl> _Mtraits;
 public:@;
 	typedef mutex_map<thread_impl> mutex_map_t; 
 private:@;
 	const void* caller;
 	const char* iden;
 	mutex_map_t& mutmap;
 public:@;
 	synchro(const void* c, const char* id, mutex_map_t& mmap)
 		: caller(c), iden(id), mutmap(mmap)
 		{@+ lock();@+}
 	~synchro()
 		{@+ unlock();@+}
 private:@;
 	@<|synchro::lock| code@>;
 	@<|synchro::unlock| code@>;
 };
@ The |lock| function acquires the mutex in the map. First it tries to
 get an exclusive access to the map. Then it increases a number of
 references of the mutex (if it does not exists, it inserts it). Then
 unlocks the map, and finally tries to lock the mutex of the map.
@<|synchro::lock| code@>=
 void lock() {
 	mutmap.lock_map();
 	if (!mutmap.check(caller, iden)) {
 		_Tmutex mut;
 		_Mtraits::init(mut);
 		mutmap.insert(caller, iden, mut);
 	}
 	mutmap.get(caller, iden)->second++;
 	mutmap.unlock_map();
 	_Mtraits::lock(mutmap.get(caller, iden)->first);
 }
@ The |unlock| function first locks the map. Then releases the lock,
 and decreases a number of references. If it is zero, it removes the
 mutex.
@<|synchro::unlock| code@>=
 void unlock() {
 	mutmap.lock_map();
 	if (mutmap.check(caller, iden)) {
 		_Mtraits::unlock(mutmap.get(caller, iden)->first);
 		mutmap.get(caller, iden)->second--;
 		if (mutmap.get(caller, iden)->second == 0)
 			mutmap.remove(caller, iden);
 	}
 	mutmap.unlock_map();
 }
@ These are traits for conditions. We need |init|, |broadcast|, |wait|
 and |destroy|.
@<|cond_traits| template class declaration@>=
 template <int thread_impl>
 struct cond_traits {
 	typedef typename IF<thread_impl==posix, pthread_cond_t, Empty>::RET _Tcond;
 	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
 	static void init(_Tcond& cond);
 	static void broadcast(_Tcond& cond);
 	static void wait(_Tcond& cond, _Tmutex& mutex);
 	static void destroy(_Tcond& cond);
 };
@ Here is the condition counter. It is a counter which starts at 0,
 and can be increased and decreased. A thread can wait until the
 counter is changed, this is implemented by condition. After the wait
 is done, another (or the same) thread, by calling |waitForChange|
 waits for another change. This can be dangerous, since it is possible
 to wait for a change which will not happen, because all the threads
 which can cause the change (by increase of decrease) might had
 finished.
@<|condition_counter| template class declaration@>=
 template <int thread_impl>
 class condition_counter {
 	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
 	typedef typename cond_traits<thread_impl>::_Tcond _Tcond;
 	int counter;
 	_Tmutex mut;
 	_Tcond cond;
 	bool changed;
 public:@;
 	@<|condition_counter| constructor code@>;
 	@<|condition_counter| destructor code@>;
 	@<|condition_counter::increase| code@>;
 	@<|condition_counter::decrease| code@>;
 	@<|condition_counter::waitForChange| code@>;
 };
@ We initialize the counter to 0, and |changed| flag to |true|, since
 the counter was change from undefined value to 0.
@<|condition_counter| constructor code@>=
 condition_counter()
 	: counter(0), changed(true)
 {
 	mutex_traits<thread_impl>::init(mut);
 	cond_traits<thread_impl>::init(cond);
 }
@ In destructor, we only release the resources associated with the
 condition.
@<|condition_counter| destructor code@>=
 ~condition_counter()
 {
 	cond_traits<thread_impl>::destroy(cond);
 }
@ When increasing, we lock the mutex, advance the counter, remember it
 is changed, broadcast, and release the mutex.
@<|condition_counter::increase| code@>=
 void increase()
 {
 	mutex_traits<thread_impl>::lock(mut);
 	counter++;
 	changed = true;
 	cond_traits<thread_impl>::broadcast(cond);
 	mutex_traits<thread_impl>::unlock(mut);
 }
@ Same as increase.
@<|condition_counter::decrease| code@>=
 void decrease()
 {
 	mutex_traits<thread_impl>::lock(mut);
 	counter--;
 	changed = true;
 	cond_traits<thread_impl>::broadcast(cond);
 	mutex_traits<thread_impl>::unlock(mut);
 }
@ We lock the mutex, and if there was a change since the last call of
 |waitForChange|, we return immediately, otherwise we wait for the
 change. The mutex is released.
@<|condition_counter::waitForChange| code@>=
 int waitForChange()
 {
 	mutex_traits<thread_impl>::lock(mut);
 	if (!changed) {
 		cond_traits<thread_impl>::wait(cond, mut);
 	}
 	changed = false;
 	int res = counter;
 	mutex_traits<thread_impl>::unlock(mut);
 	return res;
 }
@ The detached thread is the same as joinable |thread|. We only
 re-implement |run| method to call |thread_traits::detach_run|, and add
 a method which installs a counter. The counter is increased and
 decreased on the body of the new thread.
@<|detach_thread| template class declaration@>=
 template <int thread_impl>
 class detach_thread : public thread<thread_impl> {
 public:@;
 	condition_counter<thread_impl>* counter;
 	detach_thread() : counter(NULL) {}
 	void installCounter(condition_counter<thread_impl>* c)
 		{@+ counter = c;@+}
 	void run()
 		{@+thread_traits<thread_impl>::detach_run(this);@+}
 };
@ The detach thread group is (by interface) the same as
 |thread_group|. The extra thing we have here is the |counter|. The
 implementation of |insert| and |run| is different.
@<|detach_thread_group| template class declaration@>=
 template<int thread_impl>
 class detach_thread_group {	
 	typedef thread_traits<thread_impl> _Ttraits;
 	typedef cond_traits<thread_impl> _Ctraits;
 	typedef detach_thread<thread_impl> _Ctype;
 	list<_Ctype *> tlist;
 	typedef typename list<_Ctype*>::iterator iterator;
 	condition_counter<thread_impl> counter;
 public:@;
 	static int max_parallel_threads;
 	@<|detach_thread_group::insert| code@>;
 	@<|detach_thread_group| destructor code@>;
 	@<|detach_thread_group::run| code@>;
 };
@ When inserting, the counter is installed to the thread.
@<|detach_thread_group::insert| code@>=
 void insert(_Ctype* c)
 {
 	tlist.push_back(c);
 	c->installCounter(&counter);
 }
@ The destructor is clear.
@<|detach_thread_group| destructor code@>=
 ~detach_thread_group()
 {
 	while (!tlist.empty()) {
 		delete tlist.front();
 		tlist.pop_front();
 	}
 }
@ We cycle through all threads in the group, and in each cycle we wait
 for the change in the |counter|. If the counter indicates less than
 maximum parallel threads running, then a new thread is run, and the
 iterator in the list is moved.
 At the end we have to wait for all thread to finish.
@<|detach_thread_group::run| code@>=
 void run()
 {
 	int mpt = max_parallel_threads;
 	iterator it = tlist.begin();
 	while (it != tlist.end()) {
 		if (counter.waitForChange() < mpt) {
 			counter.increase();
 			(*it)->run();
 			++it;
 		}
 	}
 	while (counter.waitForChange() > 0) {}
 }
@ Here we only define the specializations for POSIX threads. Then we
 define the macros. Note that the |PosixSynchro| class construct itself
 from the static map defined in {\tt sthreads.cpp}.
@<POSIX thread specializations@>=
 typedef detach_thread<posix> PosixThread;
 typedef detach_thread_group<posix> PosixThreadGroup;
 typedef synchro<posix> posix_synchro;
 class PosixSynchro : public posix_synchro {
 public:@;
 	PosixSynchro(const void* c, const char* id);
 };
@#
 #define THREAD@, sthread::PosixThread
 #define THREAD_GROUP@, sthread::PosixThreadGroup
 #define SYNCHRO@, sthread::PosixSynchro
@ Here we define an empty class and use it as thread and
 mutex. |NoSynchro| class is also empty, but an empty constructor is
 declared. The empty destructor is declared only to avoid ``unused
 variable warning''.
@<No threading specializations@>=
 typedef thread<empty> NoThread;
 typedef thread_group<empty> NoThreadGroup;
 typedef synchro<empty> no_synchro;
 class NoSynchro {
 public:@;
 	NoSynchro(const void* c, const char* id) {}
 	~NoSynchro() {}
 };
@#
 #define THREAD@, sthread::NoThread
 #define THREAD_GROUP@, sthread::NoThreadGroup
 #define SYNCHRO@, sthread::NoSynchro
@ End of {\tt sthreads.h} file.
--- a/dynare++/tl/cc/symmetry.cc
+++ b/dynare++/tl/cc/symmetry.cc
@ -0,0 +1,144 @@
 // Copyright (C) 2004-2011, Ondra Kamenik
 #include "symmetry.hh"
 #include "permutation.hh"
 #include <cstdio>
 /* Construct symmetry as numbers of successively equal items in the sequence. */
 Symmetry::Symmetry(const IntSequence &s)
  : IntSequence(s.getNumDistinct(), 0)
 {
  int p = 0;
  if (s.size() > 0)
    operator[](p) = 1;
  for (int i = 1; i < s.size(); i++)
    {
      if (s[i] != s[i-1])
        p++;
      operator[](p)++;
    }
 }
 /* Find a class of the symmetry containing a given index. */
 int
 Symmetry::findClass(int i) const
 {
  int j = 0;
  int sum = 0;
  do
    {
      sum += operator[](j);
      j++;
    }
  while (j < size() && sum <= i);
  return j-1;
 }
 /* The symmetry is full if it allows for any permutation of indices. It
   means, that there is at most one non-zero index. */
 bool
 Symmetry::isFull() const
 {
  int count = 0;
  for (int i = 0; i < num(); i++)
    if (operator[](i) != 0)
      count++;
  return count <= 1;
 }
 /* Here we construct the beginning of the |symiterator|. The first
   symmetry index is 0. If length is 2, the second index is the
   dimension, otherwise we create the subordinal symmetry set and its
   beginning as subordinal |symiterator|. */
 symiterator::symiterator(SymmetrySet &ss)
  : s(ss), subit(NULL), subs(NULL), end_flag(false)
 {
  s.sym()[0] = 0;
  if (s.size() == 2)
    {
      s.sym()[1] = s.dimen();
    }
  else
    {
      subs = new SymmetrySet(s, s.dimen());
      subit = new symiterator(*subs);
    }
 }
 symiterator::~symiterator()
 {
  if (subit)
    delete subit;
  if (subs)
    delete subs;
 }
 /* Here we move to the next symmetry. We do so only, if we are not at
   the end. If length is 2, we increase lower index and decrease upper
   index, otherwise we increase the subordinal symmetry. If we got to the
   end, we recreate the subordinal symmetry set and set the subordinal
   iterator to the beginning. At the end we test, if we are not at the
   end. This is recognized if the lowest index exceeded the dimension. */
 symiterator &
 symiterator::operator++()
 {
  if (!end_flag)
    {
      if (s.size() == 2)
        {
          s.sym()[0]++;
          s.sym()[1]--;
        }
      else
        {
          ++(*subit);
          if (subit->isEnd())
            {
              delete subit;
              delete subs;
              s.sym()[0]++;
              subs = new SymmetrySet(s, s.dimen()-s.sym()[0]);
              subit = new symiterator(*subs);
            }
        }
      if (s.sym()[0] == s.dimen()+1)
        end_flag = true;
    }
  return *this;
 }
 InducedSymmetries::InducedSymmetries(const Equivalence &e, const Symmetry &s)
 {
  for (Equivalence::const_seqit i = e.begin(); i != e.end(); ++i)
    {
      push_back(Symmetry(s, *i));
    }
 }
 // |InducedSymmetries| permuted constructor code
 InducedSymmetries::InducedSymmetries(const Equivalence &e, const Permutation &p,
                                     const Symmetry &s)
 {
  for (int i = 0; i < e.numClasses(); i++)
    {
      Equivalence::const_seqit it = e.find(p.getMap()[i]);
      push_back(Symmetry(s, *it));
    }
 }
 /* Debug print. */
 void
 InducedSymmetries::print() const
 {
  printf("Induced symmetries: %lu\n", (unsigned long) size());
  for (unsigned int i = 0; i < size(); i++)
    operator[](i).print();
 }
--- a/dynare++/tl/cc/symmetry.cweb
+++ b/dynare++/tl/cc/symmetry.cweb
@ -1,153 +0,0 @@
@q Copyright (C) 2004-2011, Ondra Kamenik @>
@ Start of {\tt symmetry.cpp} file.
@c
 #include "symmetry.h"
 #include "permutation.h"
 #include <cstdio>
@<|Symmetry| constructor code@>;
@<|Symmetry::findClass| code@>;
@<|Symmetry::isFull| code@>;
@<|symiterator| constructor code@>;
@<|symiterator| destructor code@>;
@<|symiterator::operator++| code@>;
@<|InducedSymmetries| constructor code@>;
@<|InducedSymmetries| permuted constructor code@>;
@<|InducedSymmetries::print| code@>;
@ Construct symmetry as numbers of successively equal items in the sequence.
@<|Symmetry| constructor code@>=
 Symmetry::Symmetry(const IntSequence& s)
 	: IntSequence(s.getNumDistinct(), 0)
 {
 	int p = 0;
 	if (s.size() > 0)
 		operator[](p) = 1;
 	for (int i = 1; i < s.size(); i++) {
 		if (s[i] != s[i-1])
 			p++; 
 		operator[](p)++;
 	}
 }
@ Find a class of the symmetry containing a given index.
@<|Symmetry::findClass| code@>=
 int Symmetry::findClass(int i) const
 {
 	int j = 0;
 	int sum = 0;
 	do {
 		sum += operator[](j);
 		j++;
 	} while (j < size() && sum <= i);
 	return j-1;
 }
@ The symmetry is full if it allows for any permutation of indices. It
 means, that there is at most one non-zero index.
@<|Symmetry::isFull| code@>=
 bool Symmetry::isFull() const
 {
 	int count = 0;
 	for (int i = 0; i < num(); i++)
 		if (operator[](i) != 0)
 			count++;
 	return count <=1;
 }
@ Here we construct the beginning of the |symiterator|. The first
 symmetry index is 0. If length is 2, the second index is the
 dimension, otherwise we create the subordinal symmetry set and its
 beginning as subordinal |symiterator|.
@<|symiterator| constructor code@>=
 symiterator::symiterator(SymmetrySet& ss)
 	: s(ss), subit(NULL), subs(NULL), end_flag(false)
 {
 	s.sym()[0] = 0;
 	if (s.size() == 2) {
 		s.sym()[1] = s.dimen();
 	} else {
 		subs = new SymmetrySet(s, s.dimen());
 		subit = new symiterator(*subs);
 	}
 }
@ 
@<|symiterator| destructor code@>=
 symiterator::~symiterator( )
 {
 	if (subit)
 		delete subit;
 	if (subs)
 		delete subs;
 }
@ Here we move to the next symmetry. We do so only, if we are not at
 the end. If length is 2, we increase lower index and decrease upper
 index, otherwise we increase the subordinal symmetry. If we got to the
 end, we recreate the subordinal symmetry set and set the subordinal
 iterator to the beginning. At the end we test, if we are not at the
 end. This is recognized if the lowest index exceeded the dimension.
@<|symiterator::operator++| code@>=
 symiterator& symiterator::operator++()
 {
 	if (!end_flag) {
 		if (s.size() == 2) {
 			s.sym()[0]++;
 			s.sym()[1]--;
 		} else {
 			++(*subit);
 			if (subit->isEnd()) {
 				delete subit;
 				delete subs;
 				s.sym()[0]++;
 				subs = new SymmetrySet(s, s.dimen()-s.sym()[0]);
 				subit = new symiterator(*subs);
 			}
 		}
 		if (s.sym()[0] == s.dimen()+1)
 			end_flag=true;
 	}
 	return *this;
 }
@ 
@<|InducedSymmetries| constructor code@>=
 InducedSymmetries::InducedSymmetries(const Equivalence& e, const Symmetry& s)
 {
 	for (Equivalence::const_seqit i = e.begin(); i != e.end(); ++i) {
 		push_back(Symmetry(s, *i));
 	}
 }
@ 
@<|InducedSymmetries| permuted constructor code@>=
 InducedSymmetries::InducedSymmetries(const Equivalence& e, const Permutation& p,
 									 const Symmetry& s)
 {
 	for (int i = 0; i < e.numClasses(); i++) {
 		Equivalence::const_seqit it = e.find(p.getMap()[i]);
 		push_back(Symmetry(s, *it));
 	}
 }
@ Debug print.
@<|InducedSymmetries::print| code@>=
 void InducedSymmetries::print() const
 {
 	printf("Induced symmetries: %lu\n", (unsigned long) size());
 	for (unsigned int i = 0; i < size(); i++)
 		operator[](i).print();
 }
@ End of {\tt symmetry.cpp} file.
--- a/dynare++/tl/cc/symmetry.hh
+++ b/dynare++/tl/cc/symmetry.hh
@ -0,0 +1,227 @@
 // Copyright 2004, Ondra Kamenik
 // Symmetry.
 /* Symmetry is an abstraction for a term of the form $y^3u^2$. It manages
   only indices, not the variable names. So if one uses this
   abstraction, he must keep in mind that $y$ is the first, and $u$ is
   the second.
   In fact, the symmetry is a special case of equivalence, but its
   implementation is much simpler. We do not need an abstraction for the
   term $yyuyu$ but due to Green theorem we can have term $y^3u^2$. That
   is why the equivalence is too general for our purposes.
   One of a main purposes of the tensor library is to calculate something like:
   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}
   =\left[g_{y^l}\right]_{\gamma_1\ldots\gamma_l}
   \left(\sum_{c\in M_{l,5}}
   \prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha,\beta)}\right)$$
   If, for instance, $l=3$, and $c=\{\{0,4\},\{1,2\},\{3\}\}$, then we
   have to calculate
   $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
   \left[g_u\right]^{\gamma_3}_{\beta_2}
   $$
   We must be able to calculate a symmetry induced by symmetry $y^2u^3$
   and by an equivalence class from equivalence $c$. For equivalence
   class $\{0,4\}$ the induced symmetry is $yu$, since we pick first and
   fifth variable from $y^2u^3$. For a given outer symmetry, the class
   |InducedSymmetries| does this for all classes of a given equivalence.
   We need also to cycle through all possible symmetries yielding the
   given dimension. For this purpose we define classes |SymmetrySet| and
   |symiterator|.
   The symmetry is implemented as |IntSequence|, in fact, it inherits
   from it. */
 #ifndef SYMMETRY_H
 #define SYMMETRY_H
 #include "equivalence.hh"
 #include "int_sequence.hh"
 #include <list>
 #include <vector>
 /* Clear. The method |isFull| returns true if and only if the symmetry
   allows for any permutation of indices. */
 class Symmetry : public IntSequence
 {
 public:
  /* We provide three constructors for symmetries of the form $y^n$,
     $y^nu^m$, $y^nu^m\sigma^k$. Also a copy constructor, and finally a
     constructor of implied symmetry for a symmetry and an equivalence
     class. It is already implemented in |IntSequence| so we only call
     appropriate constructor of |IntSequence|. We also provide the
     subsymmetry, which takes the given length of symmetry from the end.
     The last constructor constructs a symmetry from an integer sequence
     (supposed to be ordered) as a symmetry counting successively equal
     items. For instance the sequence $(a,a,a,b,c,c,d,d,d,d)$ produces
     symmetry $(3,1,2,4)$. */
  Symmetry(int len, const char *dummy)
    : IntSequence(len, 0)
  {
  }
  Symmetry(int i1)
    : IntSequence(1, i1)
  {
  }
  Symmetry(int i1, int i2)
    : IntSequence(2)
  {
    operator[](0) = i1; operator[](1) = i2;
  }
  Symmetry(int i1, int i2, int i3)
    : IntSequence(3)
  {
    operator[](0) = i1;
    operator[](1) = i2;
    operator[](2) = i3;
  }
  Symmetry(int i1, int i2, int i3, int i4)
    : IntSequence(4)
  {
    operator[](0) = i1;
    operator[](1) = i2;
    operator[](2) = i3;
    operator[](3) = i4;
  }
  Symmetry(const Symmetry &s)
    : IntSequence(s)
  {
  }
  Symmetry(const Symmetry &s, const OrdSequence &cl)
    : IntSequence(s, cl.getData())
  {
  }
  Symmetry(Symmetry &s, int len)
    : IntSequence(s, s.size()-len, s.size())
  {
  }
  Symmetry(const IntSequence &s);
  int
  num() const
  {
    return size();
  }
  int
  dimen() const
  {
    return sum();
  }
  int findClass(int i) const;
  bool isFull() const;
 };
 /* The class |SymmetrySet| defines a set of symmetries of the given
   length having given dimension. It does not store all the symmetries,
   rather it provides a storage for one symmetry, which is changed as an
   adjoint iterator moves.
   The iterator class is |symiterator|. It is implemented
   recursively. The iterator object, when created, creates subordinal
   iterator, which iterates over a symmetry set whose length is one less,
   and dimension is the former dimension. When the subordinal iterator
   goes to its end, the superordinal iterator increases left most index in
   the symmetry, resets the subordinal symmetry set with different
   dimension, and iterates through the subordinal symmetry set until its
   end, and so on. That's why we provide also |SymmetrySet| constructor
   for construction of a subordinal symmetry set.
   The typical usage of the abstractions for |SymmetrySet| and
   |symiterator| is as follows:
   \kern0.3cm
   \centerline{|for (symiterator si(SymmetrySet(6, 4)); !si.isEnd(); ++si) {body}|}
   \kern0.3cm
   \noindent It goes through all symmetries of size 4 having dimension
   6. One can use |*si| as the symmetry in the body. */
 class SymmetrySet
 {
  Symmetry run;
  int dim;
 public:
  SymmetrySet(int d, int length)
    : run(length, ""), dim(d)
  {
  }
  SymmetrySet(SymmetrySet &s, int d)
    : run(s.run, s.size()-1), dim(d)
  {
  }
  int
  dimen() const
  {
    return dim;
  }
  const Symmetry &
  sym() const
  {
    return run;
  }
  Symmetry &
  sym()
  {
    return run;
  }
  int
  size() const
  {
    return run.size();
  }
 };
 /* The logic of |symiterator| was described in |@<|SymmetrySet| class
   declaration@>|. Here we only comment that: the class has a reference
   to the |SymmetrySet| only to know dimension and for access of its
   symmetry storage. Further we have pointers to subordinal |symiterator|
   and its |SymmetrySet|. These are pointers, since the recursion ends at
   length equal to 2, in which case these pointers are |NULL|.
   The constructor creates the iterator which initializes to the first
   symmetry (beginning). */
 class symiterator
 {
  SymmetrySet &s;
  symiterator *subit;
  SymmetrySet *subs;
  bool end_flag;
 public:
  symiterator(SymmetrySet &ss);
  ~symiterator();
  symiterator &operator++();
  bool
  isEnd() const
  {
    return end_flag;
  }
  const Symmetry &
  operator*() const
  {
    return s.sym();
  }
 };
 /* This simple abstraction just constructs a vector of induced
   symmetries from the given equivalence and outer symmetry. A
   permutation might optionally permute the classes of the equivalence. */
 class InducedSymmetries : public vector<Symmetry>
 {
 public:
  InducedSymmetries(const Equivalence &e, const Symmetry &s);
  InducedSymmetries(const Equivalence &e, const Permutation &p, const Symmetry &s);
  void print() const;
 };
 #endif
--- a/dynare++/tl/cc/symmetry.hweb
+++ b/dynare++/tl/cc/symmetry.hweb
@ -1,208 +0,0 @@
@q $Id: symmetry.hweb 841 2006-07-27 14:41:11Z tamas $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Symmetry. This is {\tt symmetry.h} file
 Symmetry is an abstraction for a term of the form $y^3u^2$. It manages
 only indices, not the variable names. So if one uses this
 abstraction, he must keep in mind that $y$ is the first, and $u$ is
 the second.
 In fact, the symmetry is a special case of equivalence, but its
 implementation is much simpler. We do not need an abstraction for the
 term $yyuyu$ but due to Green theorem we can have term $y^3u^2$. That
 is why the equivalence is too general for our purposes.
 One of a main purposes of the tensor library is to calculate something like:
 $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}
 =\left[g_{y^l}\right]_{\gamma_1\ldots\gamma_l}
 \left(\sum_{c\in M_{l,5}}
 \prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha,\beta)}\right)$$
 If, for instance, $l=3$, and $c=\{\{0,4\},\{1,2\},\{3\}\}$, then we
 have to calculate
 $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
 \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
 \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
 \left[g_u\right]^{\gamma_3}_{\beta_2}
 $$
 We must be able to calculate a symmetry induced by symmetry $y^2u^3$
 and by an equivalence class from equivalence $c$. For equivalence
 class $\{0,4\}$ the induced symmetry is $yu$, since we pick first and
 fifth variable from $y^2u^3$. For a given outer symmetry, the class
 |InducedSymmetries| does this for all classes of a given equivalence.
 We need also to cycle through all possible symmetries yielding the
 given dimension. For this purpose we define classes |SymmetrySet| and
 |symiterator|.
 The symmetry is implemented as |IntSequence|, in fact, it inherits
 from it.
@s Symmetry int
@s IntSequence int
@s SymmetrySet int
@s symiterator int
@s OrdSequence int
@s InducedSymmetries int
@c
 #ifndef SYMMETRY_H
 #define SYMMETRY_H
 #include "equivalence.h"
 #include "int_sequence.h"
 #include <list>
 #include <vector>
@<|Symmetry| class declaration@>;
@<|SymmetrySet| class declaration@>;
@<|symiterator| class declaration@>;
@<|InducedSymmetries| class declaration@>;
 #endif
@ Clear. The method |isFull| returns true if and only if the symmetry
 allows for any permutation of indices.
@<|Symmetry| class declaration@>=
 class Symmetry : public IntSequence {
 public:@/
 	@<|Symmetry| constructors@>; 
 	int num() const
 		{@+return size();@+}
 	int dimen() const
 		{@+return sum();@+}
 	int findClass(int i) const;
 	bool isFull() const;
 };
@ We provide three constructors for symmetries of the form $y^n$,
 $y^nu^m$, $y^nu^m\sigma^k$. Also a copy constructor, and finally a
 constructor of implied symmetry for a symmetry and an equivalence
 class. It is already implemented in |IntSequence| so we only call
 appropriate constructor of |IntSequence|. We also provide the
 subsymmetry, which takes the given length of symmetry from the end.
 The last constructor constructs a symmetry from an integer sequence
 (supposed to be ordered) as a symmetry counting successively equal
 items. For instance the sequence $(a,a,a,b,c,c,d,d,d,d)$ produces
 symmetry $(3,1,2,4)$.
@<|Symmetry| constructors@>=
 	Symmetry(int len, const char* dummy)
 		: IntSequence(len, 0)@+ {}
 	Symmetry(int i1)
 		: IntSequence(1, i1)@+ {}
 	Symmetry(int i1, int i2)
 		: IntSequence(2) {@+operator[](0) = i1;@+ operator[](1) = i2;@+}
 	Symmetry(int i1, int i2 ,int i3)
 		: IntSequence(3)
 		{@+
 			operator[](0) = i1;@+
 			operator[](1) = i2;@+
 			operator[](2) = i3;@+
 		}
 	Symmetry(int i1, int i2 ,int i3, int i4)
 		: IntSequence(4)
 		{@+
 			operator[](0) = i1;@+
 			operator[](1) = i2;@+
 			operator[](2) = i3;@+
 			operator[](3) = i4;@+
 		}
 	Symmetry(const Symmetry& s)
 		: IntSequence(s)@+ {}
 	Symmetry(const Symmetry& s, const OrdSequence& cl)
 		: IntSequence(s, cl.getData())@+ {}
 	Symmetry(Symmetry& s, int len)
 		: IntSequence(s, s.size()-len, s.size())@+ {}
 	Symmetry(const IntSequence& s);
@ The class |SymmetrySet| defines a set of symmetries of the given
 length having given dimension. It does not store all the symmetries,
 rather it provides a storage for one symmetry, which is changed as an
 adjoint iterator moves.
 The iterator class is |symiterator|. It is implemented
 recursively. The iterator object, when created, creates subordinal
 iterator, which iterates over a symmetry set whose length is one less,
 and dimension is the former dimension. When the subordinal iterator
 goes to its end, the superordinal iterator increases left most index in
 the symmetry, resets the subordinal symmetry set with different
 dimension, and iterates through the subordinal symmetry set until its
 end, and so on. That's why we provide also |SymmetrySet| constructor
 for construction of a subordinal symmetry set.
 The typical usage of the abstractions for |SymmetrySet| and
 |symiterator| is as follows:
 \kern0.3cm
 \centerline{|for (symiterator si(SymmetrySet(6, 4)); !si.isEnd(); ++si) {body}|}
 \kern0.3cm
 \noindent It goes through all symmetries of size 4 having dimension
 6. One can use |*si| as the symmetry in the body.
@<|SymmetrySet| class declaration@>=
 class SymmetrySet {
 	Symmetry run;
 	int dim;
 public:@;
 	SymmetrySet(int d, int length)
 		: run(length, ""), dim(d)@+ {}
 	SymmetrySet(SymmetrySet& s, int d)
 		: run(s.run, s.size()-1), dim(d)@+ {}
 	int dimen() const
 		{@+ return dim;@+}
 	const Symmetry& sym() const
 		{@+ return run;@+}
 	Symmetry& sym()
 		{@+ return run;@+}
 	int size() const
 		{@+ return run.size();@+}
 };
@ The logic of |symiterator| was described in |@<|SymmetrySet| class
 declaration@>|. Here we only comment that: the class has a reference
 to the |SymmetrySet| only to know dimension and for access of its
 symmetry storage. Further we have pointers to subordinal |symiterator|
 and its |SymmetrySet|. These are pointers, since the recursion ends at
 length equal to 2, in which case these pointers are |NULL|.
 The constructor creates the iterator which initializes to the first
 symmetry (beginning).
@<|symiterator| class declaration@>=
 class symiterator {
 	SymmetrySet& s;
 	symiterator* subit;
 	SymmetrySet* subs;
 	bool end_flag;
 public:@;
 	symiterator(SymmetrySet& ss);
 	~symiterator();
 	symiterator& operator++();
 	bool isEnd() const
 		{@+ return end_flag;@+}
 	const Symmetry& operator*() const
 		{@+ return s.sym();@+}
 };
@ This simple abstraction just constructs a vector of induced
 symmetries from the given equivalence and outer symmetry. A
 permutation might optionally permute the classes of the equivalence.
@<|InducedSymmetries| class declaration@>=
 class InducedSymmetries : public vector<Symmetry> {
 public:@;
 	InducedSymmetries(const Equivalence& e, const Symmetry& s);
 	InducedSymmetries(const Equivalence& e, const Permutation& p, const Symmetry& s);
 	void print() const;
 };
@ End of {\tt symmetry.h} file.
--- a/dynare++/tl/cc/t_container.cc
+++ b/dynare++/tl/cc/t_container.cc
@ -0,0 +1,127 @@
 // Copyright 2004, Ondra Kamenik
 #include "t_container.hh"
 #include "kron_prod.hh"
 #include "ps_tensor.hh"
 #include "pyramid_prod.hh"
 const int FGSContainer::num_one_time = 10;
 // |UGSContainer| conversion from |FGSContainer|
 UGSContainer::UGSContainer(const FGSContainer &c)
  : TensorContainer<UGSTensor>(c.num())
 {
  for (FGSContainer::const_iterator it = c.begin();
       it != c.end(); ++it)
    {
      UGSTensor *unfolded = new UGSTensor(*((*it).second));
      insert(unfolded);
    }
 }
 /* We set |l| to dimension of |t|, this is a tensor which multiplies
   tensors from the container from the left. Also we set |k| to a
   dimension of the resulting tensor. We go through all equivalences on
   |k| element set and pickup only those which have $l$ classes.
   In each loop, we fetch all necessary tensors for the product to the
   vector |ts|. Then we form Kronecker product |KronProdAll| and feed it
   with tensors from |ts|. Then we form unfolded permuted symmetry tensor
   |UPSTensor| as matrix product of |t| and Kronecker product |kp|. Then
   we add the permuted data to |out|. This is done by |UPSTensor| method
   |addTo|. */
 void
 UGSContainer::multAndAdd(const UGSTensor &t, UGSTensor &out) const
 {
  int l = t.dimen();
  int k = out.dimen();
  const EquivalenceSet &eset = ebundle.get(k);
  for (EquivalenceSet::const_iterator it = eset.begin();
       it != eset.end(); ++it)
    {
      if ((*it).numClasses() == l)
        {
          vector<const UGSTensor *> ts
            = fetchTensors(out.getSym(), *it);
          KronProdAllOptim kp(l);
          for (int i = 0; i < l; i++)
            kp.setMat(i, *(ts[i]));
          kp.optimizeOrder();
          UPSTensor ups(out.getDims(), *it, t, kp);
          ups.addTo(out);
        }
    }
 }
 // |FGSContainer| conversion from |UGSContainer|
 FGSContainer::FGSContainer(const UGSContainer &c)
  : TensorContainer<FGSTensor>(c.num())
 {
  for (UGSContainer::const_iterator it = c.begin();
       it != c.end(); ++it)
    {
      FGSTensor *folded = new FGSTensor(*((*it).second));
      insert(folded);
    }
 }
 // |FGSContainer::multAndAdd| folded code
 /* Here we perform one step of the Faa Di Bruno operation. We call the
   |multAndAdd| for unfolded tensor. */
 void
 FGSContainer::multAndAdd(const FGSTensor &t, FGSTensor &out) const
 {
  UGSTensor ut(t);
  multAndAdd(ut, out);
 }
 // |FGSContainer::multAndAdd| unfolded code
 /* This is the same as |@<|UGSContainer::multAndAdd| code@>|
   but we do not construct |UPSTensor| from the Kronecker
   product, but |FPSTensor|. */
 void
 FGSContainer::multAndAdd(const UGSTensor &t, FGSTensor &out) const
 {
  int l = t.dimen();
  int k = out.dimen();
  const EquivalenceSet &eset = ebundle.get(k);
  for (EquivalenceSet::const_iterator it = eset.begin();
       it != eset.end(); ++it)
    {
      if ((*it).numClasses() == l)
        {
          vector<const FGSTensor *> ts
            = fetchTensors(out.getSym(), *it);
          KronProdAllOptim kp(l);
          for (int i = 0; i < l; i++)
            kp.setMat(i, *(ts[i]));
          kp.optimizeOrder();
          FPSTensor fps(out.getDims(), *it, t, kp);
          fps.addTo(out);
        }
    }
 }
 /* This fills a given vector with integer sequences corresponding to
   first |num| indices from interval |start| (including) to |end|
   (excluding). If there are not |num| of such indices, the shorter vector
   is returned. */
 Tensor::index
 FGSContainer::getIndices(int num, vector<IntSequence> &out,
                         const Tensor::index &start,
                         const Tensor::index &end)
 {
  out.clear();
  int i = 0;
  Tensor::index run = start;
  while (i < num && run != end)
    {
      out.push_back(run.getCoor());
      i++;
      ++run;
    }
  return run;
 }
--- a/dynare++/tl/cc/t_container.cweb
+++ b/dynare++/tl/cc/t_container.cweb
@ -1,138 +0,0 @@
@q $Id: t_container.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt t\_container.cpp} file.
@s USubTensor int
@c
 #include "t_container.h" 
 #include "kron_prod.h"
 #include "ps_tensor.h"
 #include "pyramid_prod.h"
 const int FGSContainer::num_one_time = 10;
@<|UGSContainer| conversion from |FGSContainer|@>;
@<|UGSContainer::multAndAdd| code@>;
@<|FGSContainer| conversion from |UGSContainer|@>;
@<|FGSContainer::multAndAdd| folded code@>;
@<|FGSContainer::multAndAdd| unfolded code@>;
@<|FGSContainer::getIndices| code@>;
@ 
@<|UGSContainer| conversion from |FGSContainer|@>=
 UGSContainer::UGSContainer(const FGSContainer& c)
 	: TensorContainer<UGSTensor>(c.num())
 {
 	for (FGSContainer::const_iterator it = c.begin();
 		 it != c.end(); ++it) {
 		UGSTensor* unfolded = new UGSTensor(*((*it).second));
 		insert(unfolded);
 	}
 }
@ We set |l| to dimension of |t|, this is a tensor which multiplies
 tensors from the container from the left. Also we set |k| to a
 dimension of the resulting tensor. We go through all equivalences on
 |k| element set and pickup only those which have $l$ classes.
 In each loop, we fetch all necessary tensors for the product to the
 vector |ts|. Then we form Kronecker product |KronProdAll| and feed it
 with tensors from |ts|. Then we form unfolded permuted symmetry tensor
 |UPSTensor| as matrix product of |t| and Kronecker product |kp|. Then
 we add the permuted data to |out|. This is done by |UPSTensor| method
 |addTo|.
@<|UGSContainer::multAndAdd| code@>=
 void UGSContainer::multAndAdd(const UGSTensor& t, UGSTensor& out) const
 {
 	int l = t.dimen();
 	int k = out.dimen();
 	const EquivalenceSet& eset = ebundle.get(k);
 	for (EquivalenceSet::const_iterator it = eset.begin();
 		 it != eset.end(); ++it) {
 		if ((*it).numClasses() == l) {
 			vector<const UGSTensor*> ts =
 				fetchTensors(out.getSym(), *it);
 			KronProdAllOptim kp(l);
 			for (int i = 0; i < l; i++)
 				kp.setMat(i, *(ts[i]));
 			kp.optimizeOrder();
 			UPSTensor ups(out.getDims(), *it, t, kp);
 			ups.addTo(out);
 		}
 	}
 }
@ 
@<|FGSContainer| conversion from |UGSContainer|@>=
 FGSContainer::FGSContainer(const UGSContainer& c)
 	: TensorContainer<FGSTensor>(c.num())
 {
 	for (UGSContainer::const_iterator it = c.begin();
 		 it != c.end(); ++it) {
 		FGSTensor* folded = new FGSTensor(*((*it).second));
 		insert(folded);
 	}
 }
@ Here we perform one step of the Faa Di Bruno operation. We call the
 |multAndAdd| for unfolded tensor.
@<|FGSContainer::multAndAdd| folded code@>=
 void FGSContainer::multAndAdd(const FGSTensor& t, FGSTensor& out) const
 {
 	UGSTensor ut(t);
 	multAndAdd(ut, out);
 }
@ This is the same as |@<|UGSContainer::multAndAdd| code@>|
 but we do not construct |UPSTensor| from the Kronecker
 product, but |FPSTensor|.
@<|FGSContainer::multAndAdd| unfolded code@>=
 void FGSContainer::multAndAdd(const UGSTensor& t, FGSTensor& out) const
 {
 	int l = t.dimen();
 	int k = out.dimen();
 	const EquivalenceSet& eset = ebundle.get(k);
 	for (EquivalenceSet::const_iterator it = eset.begin();
 		 it != eset.end(); ++it) {
 		if ((*it).numClasses() == l) {
 			vector<const FGSTensor*> ts =
 				fetchTensors(out.getSym(), *it);
 			KronProdAllOptim kp(l);
 			for (int i = 0; i < l; i++)
 				kp.setMat(i, *(ts[i]));
 			kp.optimizeOrder();
 			FPSTensor fps(out.getDims(), *it, t, kp);
 			fps.addTo(out);
 		}
 	}
 }
@ This fills a given vector with integer sequences corresponding to
 first |num| indices from interval |start| (including) to |end|
 (excluding). If there are not |num| of such indices, the shorter vector
 is returned.
@<|FGSContainer::getIndices| code@>=
 Tensor::index
 FGSContainer::getIndices(int num, vector<IntSequence>& out,
 						 const Tensor::index& start,
 						 const Tensor::index& end)
 {
 	out.clear();
 	int i = 0;
 	Tensor::index run = start;
 	while (i < num && run != end) {
 		out.push_back(run.getCoor());
 		i++;
 		++run;
 	}
 	return run;
 }
@ End of {\tt t\_container.cpp} file.
--- a/dynare++/tl/cc/t_container.hh
+++ b/dynare++/tl/cc/t_container.hh
@ -0,0 +1,387 @@
 // Copyright 2004, Ondra Kamenik
 // Tensor containers.
 /* One of primary purposes of the tensor library is to perform one step
   of the Faa Di Bruno formula:
   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
   [h_{y^l}]_{\gamma_1\ldots\gamma_l}\sum_{c\in M_{l,k}}
   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
   $$
   where $h_{y^l}$ and $g_{s^i}$ are tensors, $M_{l,k}$ is a set of all
   equivalences with $l$ classes of $k$ element set, $c_m$ is $m$-the
   class of equivalence $c$, and $\vert c_m\vert$ is its
   cardinality. Further, $c_m(\alpha)$ is a sequence of $\alpha$s picked
   by equivalence class $c_m$.
   In order to accomplish this operation, we basically need some storage
   of all tensors of the form $\left[g_{s^i}\right]$. Note that $s$ can
   be compound, for instance $s=[y,u]$. Then we need storage for
   $\left[g_{y^3}\right]$, $\left[g_{y^2u}\right]$,
   $\left[g_{yu^5}\right]$, etc.
   We need an object holding all tensors of the same type. Here type
   means an information, that coordinates of the tensors can be of type
   $y$, or $u$. We will group only tensors, whose symmetry is described
   by |Symmetry| class. These are only $y^2u^3$, not $yuyu^2$. So, we are
   going to define a class which will hold tensors whose symmetries are
   of type |Symmetry| and have the same symmetry length (number of
   different coordinate types). Also, for each symmetry there will be at
   most one tensor.
   The class has two purposes: The first is to provide storage (insert
   and retrieve). The second is to perform the above step of Faa Di Bruno. This is
   going through all equivalences with $l$ classes, perform the tensor
   product and add to the result.
   We define a template class |TensorContainer|. From different
   instantiations of the template class we will inherit to create concrete
   classes, for example container of unfolded general symmetric
   tensors. The one step of the Faa Di Bruno (we call it |multAndAdd|) is
   implemented in the concrete subclasses, because the implementation
   depends on storage. Note even, that |multAndAdd| has not a template
   common declaration. This is because sparse tensor $h$ is multiplied by
   folded tensors $g$ yielding folded tensor $B$, but unfolded tensor $h$
   is multiplied by unfolded tensors $g$ yielding unfolded tensor $B$. */
 #ifndef T_CONTAINER_H
 #define T_CONTAINER_H
 #include "symmetry.hh"
 #include "gs_tensor.hh"
 #include "tl_exception.hh"
 #include "tl_static.hh"
 #include "sparse_tensor.hh"
 #include "equivalence.hh"
 #include "rfs_tensor.hh"
 #include "Vector.h"
 #include <map>
 #include <string>
 #include <sstream>
 #include <matio.h>
 // |ltsym| predicate
 /* We need a predicate on strict weak ordering of
   symmetries. */
 struct ltsym
 {
  bool
  operator()(const Symmetry &s1, const Symmetry &s2) const
  {
    return s1 < s2;
  }
 };
 /* Here we define the template class for tensor container. We implement
   it as |stl::map|. It is a unique container, no two tensors with same
   symmetries can coexist. Keys of the map are symmetries, values are
   pointers to tensor. The class is responsible for deallocating all
   tensors. Creation of the tensors is done outside.
   The class has integer |n| as its member. It is a number of different
   coordinate types of all contained tensors. Besides intuitive insert
   and retrieve interface, we define a method |fetchTensors|, which for a
   given symmetry and given equivalence calculates symmetries implied by
   the symmetry and all equivalence classes, and fetches corresponding
   tensors in a vector.
   Also, each instance of the container has a reference to
   |EquivalenceBundle| which allows an access to equivalences. */
 template<class _Ttype>
 class TensorContainer
 {
 protected:
  typedef const _Ttype *_const_ptr;
  typedef _Ttype *_ptr;
  typedef map<Symmetry, _ptr, ltsym> _Map;
  typedef typename _Map::value_type _mvtype;
 public:
  typedef typename _Map::iterator iterator;
  typedef typename _Map::const_iterator const_iterator;
 private:
  int n;
  _Map m;
 protected:
  const EquivalenceBundle &ebundle;
 public:
  TensorContainer(int nn)
    : n(nn), ebundle(*(tls.ebundle))
  {
  }
  /* This is just a copy constructor. This makes a hard copy of all tensors. */
  TensorContainer(const TensorContainer<_Ttype> &c)
    : n(c.n), m(), ebundle(c.ebundle)
  {
    for (const_iterator it = c.m.begin(); it != c.m.end(); ++it)
      {
        _Ttype *ten = new _Ttype(*((*it).second));
        insert(ten);
      }
  }
  // |TensorContainer| subtensor constructor
  /* This constructor constructs a new tensor container, whose tensors
     are in-place subtensors of the given container. */
  TensorContainer(int first_row, int num, TensorContainer<_Ttype> &c)
    : n(c.n), ebundle(*(tls.ebundle))
  {
    for (iterator it = c.m.begin(); it != c.m.end(); ++it)
      {
        _Ttype *t = new _Ttype(first_row, num, *((*it).second));
        insert(t);
      }
  }
  _const_ptr
  get(const Symmetry &s) const
  {
    TL_RAISE_IF(s.num() != num(),
                "Incompatible symmetry lookup in TensorContainer::get");
    const_iterator it = m.find(s);
    if (it == m.end())
      {
        TL_RAISE("Symmetry not found in TensorContainer::get");
        return NULL;
      }
    else
      {
        return (*it).second;
      }
  }
  _ptr
  get(const Symmetry &s)
  {
    TL_RAISE_IF(s.num() != num(),
                "Incompatible symmetry lookup in TensorContainer::get");
    iterator it = m.find(s);
    if (it == m.end())
      {
        TL_RAISE("Symmetry not found in TensorContainer::get");
        return NULL;
      }
    else
      {
        return (*it).second;
      }
  }
  bool
  check(const Symmetry &s) const
  {
    TL_RAISE_IF(s.num() != num(),
                "Incompatible symmetry lookup in TensorContainer::check");
    const_iterator it = m.find(s);
    return it != m.end();
  }
  void
  insert(_ptr t)
  {
    TL_RAISE_IF(t->getSym().num() != num(),
                "Incompatible symmetry insertion in TensorContainer::insert");
    TL_RAISE_IF(check(t->getSym()),
                "Tensor already in container in TensorContainer::insert");
    m.insert(_mvtype(t->getSym(), t));
    if (!t->isFinite())
      {
        throw TLException(__FILE__, __LINE__,  "NaN or Inf asserted in TensorContainer::insert");
      }
  }
  void
  remove(const Symmetry &s)
  {
    iterator it = m.find(s);
    if (it != m.end())
      {
        _ptr t = (*it).second;
        m.erase(it);
        delete t;
      }
  }
  void
  clear()
  {
    while (!m.empty())
      {
        delete (*(m.begin())).second;
        m.erase(m.begin());
      }
  }
  int
  getMaxDim() const
  {
    int res = -1;
    for (const_iterator run = m.begin(); run != m.end(); ++run)
      {
        int dim = (*run).first.dimen();
        if (dim > res)
          res = dim;
      }
    return res;
  }
  /* Debug print. */
  void
  print() const
  {
    printf("Tensor container: nvars=%d, tensors=%D\n", n, m.size());
    for (const_iterator it = m.begin(); it != m.end(); ++it)
      {
        printf("Symmetry: ");
        (*it).first.print();
        ((*it).second)->print();
      }
  }
  /* Output to the MAT file. */
  void
  writeMat(mat_t *fd, const char *prefix) const
  {
    for (const_iterator it = begin(); it != end(); ++it)
      {
        char lname[100];
        sprintf(lname, "%s_g", prefix);
        const Symmetry &sym = (*it).first;
        for (int i = 0; i < sym.num(); i++)
          {
            char tmp[10];
            sprintf(tmp, "_%d", sym[i]);
            strcat(lname, tmp);
          }
        ConstTwoDMatrix m(*((*it).second));
        m.writeMat(fd, lname);
      }
  }
  /* Output to the Memory Map. */
  void
  writeMMap(map<string, ConstTwoDMatrix> &mm, const string &prefix) const
  {
    ostringstream lname;
    for (const_iterator it = begin(); it != end(); ++it)
      {
        lname.str(prefix);
        lname << "_g";
        const Symmetry &sym = (*it).first;
        for (int i = 0; i < sym.num(); i++)
          lname << "_" << sym[i];
        mm.insert(make_pair(lname.str(), ConstTwoDMatrix(*((*it).second))));
      }
  }
  /* Here we fetch all tensors given by symmetry and equivalence. We go
     through all equivalence classes, calculate implied symmetry, and
     fetch its tensor storing it in the same order to the vector. */
  vector<_const_ptr>
  fetchTensors(const Symmetry &rsym, const Equivalence &e) const
  {
    vector<_const_ptr> res(e.numClasses());
    int i = 0;
    for (Equivalence::const_seqit it = e.begin();
         it != e.end(); ++it, i++)
      {
        Symmetry s(rsym, *it);
        res[i] = get(s);
      }
    return res;
  }
  virtual ~TensorContainer()
  {
    clear();
  }
  int
  num() const
  {
    return n;
  }
  const EquivalenceBundle &
  getEqBundle() const
  {
    return ebundle;
  }
  const_iterator
  begin() const
  {
    return m.begin();
  }
  const_iterator
  end() const
  {
    return m.end();
  }
  iterator
  begin()
  {
    return m.begin();
  }
  iterator
  end()
  {
    return m.end();
  }
 };
 /* Here is a container storing |UGSTensor|s. We declare |multAndAdd| method. */
 class FGSContainer;
 class UGSContainer : public TensorContainer<UGSTensor>
 {
 public:
  UGSContainer(int nn)
    : TensorContainer<UGSTensor>(nn)
  {
  }
  UGSContainer(const UGSContainer &uc)
    : TensorContainer<UGSTensor>(uc)
  {
  }
  UGSContainer(const FGSContainer &c);
  void multAndAdd(const UGSTensor &t, UGSTensor &out) const;
 };
 /* Here is a container storing |FGSTensor|s. We declare two versions of
   |multAndAdd| method. The first works for folded $B$ and folded $h$
   tensors, the second works for folded $B$ and unfolded $h$. There is no
   point to do it for unfolded $B$ since the algorithm go through all the
   indices of $B$ and calculates corresponding columns. So, if $B$ is
   needed unfolded, it is more effective to calculate its folded version
   and then unfold by conversion.
   The static member |num_one_time| is a number of columns formed from
   product of $g$ tensors at one time. This is subject to change, probably
   we will have to do some tuning and decide about this number based on
   symmetries, and dimensions in the runtime. */
 class FGSContainer : public TensorContainer<FGSTensor>
 {
  static const int num_one_time;
 public:
  FGSContainer(int nn)
    : TensorContainer<FGSTensor>(nn)
  {
  }
  FGSContainer(const FGSContainer &fc)
    : TensorContainer<FGSTensor>(fc)
  {
  }
  FGSContainer(const UGSContainer &c);
  void multAndAdd(const FGSTensor &t, FGSTensor &out) const;
  void multAndAdd(const UGSTensor &t, FGSTensor &out) const;
 private:
  static Tensor::index getIndices(int num, vector<IntSequence> &out,
                                  const Tensor::index &start,
                                  const Tensor::index &end);
 };
 #endif
--- a/dynare++/tl/cc/t_container.hweb
+++ b/dynare++/tl/cc/t_container.hweb
@ -1,380 +0,0 @@
@q $Id: t_container.hweb 2353 2009-09-03 19:22:36Z michel $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Tensor containers. Start of {\tt t\_container.h} file.
 One of primary purposes of the tensor library is to perform one step
 of the Faa Di Bruno formula:
 $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
 [h_{y^l}]_{\gamma_1\ldots\gamma_l}\sum_{c\in M_{l,k}}
 \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
 $$
 where $h_{y^l}$ and $g_{s^i}$ are tensors, $M_{l,k}$ is a set of all
 equivalences with $l$ classes of $k$ element set, $c_m$ is $m$-the
 class of equivalence $c$, and $\vert c_m\vert$ is its
 cardinality. Further, $c_m(\alpha)$ is a sequence of $\alpha$s picked
 by equivalence class $c_m$.
 In order to accomplish this operation, we basically need some storage
 of all tensors of the form $\left[g_{s^i}\right]$. Note that $s$ can
 be compound, for instance $s=[y,u]$. Then we need storage for
 $\left[g_{y^3}\right]$, $\left[g_{y^2u}\right]$,
 $\left[g_{yu^5}\right]$, etc.
 We need an object holding all tensors of the same type. Here type
 means an information, that coordinates of the tensors can be of type
 $y$, or $u$. We will group only tensors, whose symmetry is described
 by |Symmetry| class. These are only $y^2u^3$, not $yuyu^2$. So, we are
 going to define a class which will hold tensors whose symmetries are
 of type |Symmetry| and have the same symmetry length (number of
 different coordinate types). Also, for each symmetry there will be at
 most one tensor.
 The class has two purposes: The first is to provide storage (insert
 and retrieve). The second is to perform the above step of Faa Di Bruno. This is
 going through all equivalences with $l$ classes, perform the tensor
 product and add to the result.
 We define a template class |TensorContainer|. From different
 instantiations of the template class we will inherit to create concrete
 classes, for example container of unfolded general symmetric
 tensors. The one step of the Faa Di Bruno (we call it |multAndAdd|) is
 implemented in the concrete subclasses, because the implementation
 depends on storage. Note even, that |multAndAdd| has not a template
 common declaration. This is because sparse tensor $h$ is multiplied by
 folded tensors $g$ yielding folded tensor $B$, but unfolded tensor $h$
 is multiplied by unfolded tensors $g$ yielding unfolded tensor $B$.
@c
 #ifndef T_CONTAINER_H
 #define T_CONTAINER_H
 #include "symmetry.h"
 #include "gs_tensor.h"
 #include "tl_exception.h"
 #include "tl_static.h"
 #include "sparse_tensor.h"
 #include "equivalence.h"
 #include "rfs_tensor.h"
 #include "Vector.h"
 #include <map>
 #include <string>
 #include <sstream>
 #include <matio.h>
@<|ltsym| predicate@>;
@<|TensorContainer| class definition@>;
@<|UGSContainer| class declaration@>;
@<|FGSContainer| class declaration@>;
 #endif
@ We need a predicate on strict weak ordering of symmetries.
@<|ltsym| predicate@>=
 struct ltsym {
 	bool operator()(const Symmetry& s1, const Symmetry& s2) const
 	{@+ return s1 < s2;@+}
 };
@ Here we define the template class for tensor container. We implement
 it as |stl::map|. It is a unique container, no two tensors with same
 symmetries can coexist. Keys of the map are symmetries, values are
 pointers to tensor. The class is responsible for deallocating all
 tensors. Creation of the tensors is done outside.
 The class has integer |n| as its member. It is a number of different
 coordinate types of all contained tensors. Besides intuitive insert
 and retrieve interface, we define a method |fetchTensors|, which for a
 given symmetry and given equivalence calculates symmetries implied by
 the symmetry and all equivalence classes, and fetches corresponding
 tensors in a vector.
 Also, each instance of the container has a reference to
 |EquivalenceBundle| which allows an access to equivalences.
@s _const_ptr int;
@s _ptr int;
@s _Map int;
@<|TensorContainer| class definition@>=
 template<class _Ttype> class TensorContainer {
 protected:@;
 	typedef const _Ttype* _const_ptr;
 	typedef _Ttype* _ptr;
 	typedef map<Symmetry, _ptr, ltsym> _Map;@/
 	typedef typename _Map::value_type _mvtype;@/
 public:@;
 	typedef typename _Map::iterator iterator;@/
 	typedef typename _Map::const_iterator const_iterator;@/
 private:@;
 	int n;
 	_Map m;
 protected:@;
 	const EquivalenceBundle& ebundle;
 public:@;
 	TensorContainer(int nn)
 		: n(nn), ebundle(*(tls.ebundle)) @+ {}
 	@<|TensorContainer| copy constructor@>;
 	@<|TensorContainer| subtensor constructor@>;
 	@<|TensorContainer:get| code@>;
 	@<|TensorContainer::check| code@>;
 	@<|TensorContainer::insert| code@>;
 	@<|TensorContainer::remove| code@>;
 	@<|TensorContainer::clear| code@>;
 	@<|TensorContainer::fetchTensors| code@>;
 	@<|TensorContainer::getMaxDim| code@>;
 	@<|TensorContainer::print| code@>;
 	@<|TensorContainer::writeMat| code@>;
 	@<|TensorContainer::writeMMap| code@>;
 	virtual ~TensorContainer()
 		{@+ clear();@+}
 	@<|TensorContainer| inline methods@>;
 };
@ 
@<|TensorContainer| inline methods@>=
 	int num() const
 		{@+ return n;@+}
 	const EquivalenceBundle& getEqBundle() const
 		{@+ return ebundle;@+}
 	const_iterator begin() const
 		{@+ return m.begin();@+}
 	const_iterator end() const
 		{@+ return m.end();@+}
 	iterator begin()
 		{@+ return m.begin();@+}
 	iterator end()
 		{@+ return m.end();@+}
@ This is just a copy constructor. This makes a hard copy of all tensors.
@<|TensorContainer| copy constructor@>=
 TensorContainer(const TensorContainer<_Ttype>& c)
 	: n(c.n), m(), ebundle(c.ebundle)
 {
 	for (const_iterator it = c.m.begin(); it != c.m.end(); ++it) {
 		_Ttype* ten = new _Ttype(*((*it).second));
 		insert(ten);
 	}
 }
@ This constructor constructs a new tensor container, whose tensors
 are in-place subtensors of the given container.
@<|TensorContainer| subtensor constructor@>=
 TensorContainer(int first_row, int num, TensorContainer<_Ttype>& c)
 	: n(c.n), ebundle(*(tls.ebundle))
 {
 	for (iterator it = c.m.begin(); it != c.m.end(); ++it) {
 		_Ttype* t = new _Ttype(first_row, num, *((*it).second));
 		insert(t);
 	}
 }
@ 
@<|TensorContainer:get| code@>=
 _const_ptr get(const Symmetry& s) const
 {
 	TL_RAISE_IF(s.num() != num(),
 				"Incompatible symmetry lookup in TensorContainer::get");
 	const_iterator it = m.find(s);
 	if (it == m.end()) {
 		TL_RAISE("Symmetry not found in TensorContainer::get");
 		return NULL;
 	} else {
 		return (*it).second;
 	}
 }
@#
 _ptr get(const Symmetry& s)
 {
 	TL_RAISE_IF(s.num() != num(),
 				"Incompatible symmetry lookup in TensorContainer::get");
 	iterator it = m.find(s);
 	if (it == m.end()) {
 		TL_RAISE("Symmetry not found in TensorContainer::get");
 		return NULL;
 	} else {
 		return (*it).second;
 	}
 }
@ 
@<|TensorContainer::check| code@>=
 bool check(const Symmetry& s) const
 {
 	TL_RAISE_IF(s.num() != num(),
 				"Incompatible symmetry lookup in TensorContainer::check");
 	const_iterator it = m.find(s);
 	return it != m.end();
 }
@ 
@<|TensorContainer::insert| code@>=
 void insert(_ptr t)
 {
 	TL_RAISE_IF(t->getSym().num() != num(),
 				"Incompatible symmetry insertion in TensorContainer::insert");
 	TL_RAISE_IF(check(t->getSym()),
 				"Tensor already in container in TensorContainer::insert");
 	m.insert(_mvtype(t->getSym(),t));
 	if (! t->isFinite()) {
 		throw TLException(__FILE__, __LINE__,  "NaN or Inf asserted in TensorContainer::insert");
 	}
 }
@ 
@<|TensorContainer::remove| code@>=
 void remove(const Symmetry& s)
 {
 	iterator it = m.find(s);
 	if (it != m.end()) {
 		_ptr t = (*it).second;
 		m.erase(it);
 		delete t;
 	}
 }
@ 
@<|TensorContainer::clear| code@>=
 void clear()
 {
 	while (! m.empty()) {
 		delete (*(m.begin())).second;
 		m.erase(m.begin());
 	}
 }
@ 
@<|TensorContainer::getMaxDim| code@>=
 int getMaxDim() const
 {
 	int res = -1;
 	for (const_iterator run = m.begin(); run != m.end(); ++run) {
 		int dim = (*run).first.dimen();
 		if (dim > res)
 			res = dim;
 	}
 	return res;
 }
@ Debug print.
@<|TensorContainer::print| code@>=
 void print() const
 {
 	printf("Tensor container: nvars=%d, tensors=%D\n", n, m.size());
 	for (const_iterator it = m.begin(); it != m.end(); ++it) {
 		printf("Symmetry: ");
 		(*it).first.print();
 		((*it).second)->print();
 	}
 }
@ Output to the MAT file.
@<|TensorContainer::writeMat| code@>=
 void writeMat(mat_t* fd, const char* prefix) const
 {
 	for (const_iterator it = begin(); it != end(); ++it) {
 		char lname[100];
 		sprintf(lname, "%s_g", prefix);
 		const Symmetry& sym = (*it).first;
 		for (int i = 0; i < sym.num(); i++) {
 			char tmp[10];
 			sprintf(tmp, "_%d", sym[i]);
 			strcat(lname, tmp);
 		}
 		ConstTwoDMatrix m(*((*it).second));
 		m.writeMat(fd, lname);
 	}
 }
@ Output to the Memory Map.
@<|TensorContainer::writeMMap| code@>=
 void writeMMap(map<string,ConstTwoDMatrix> &mm, const string &prefix) const
 {
  ostringstream lname;
  for (const_iterator it = begin(); it != end(); ++it) {
    lname.str(prefix);
    lname << "_g";
    const Symmetry& sym = (*it).first;
    for (int i = 0; i < sym.num(); i++)
      lname << "_" << sym[i];
    mm.insert(make_pair(lname.str(), ConstTwoDMatrix(*((*it).second))));
  }
 }
@ Here we fetch all tensors given by symmetry and equivalence. We go
 through all equivalence classes, calculate implied symmetry, and
 fetch its tensor storing it in the same order to the vector.
@<|TensorContainer::fetchTensors| code@>=
 vector<_const_ptr>
 fetchTensors(const Symmetry& rsym, const Equivalence& e) const
 {
 	vector<_const_ptr> res(e.numClasses());
 	int i = 0;
 	for (Equivalence::const_seqit it = e.begin();
 		 it != e.end(); ++it, i++) {
 		Symmetry s(rsym, *it);
 		res[i] = get(s);
 	}
 	return res;
 }
@ Here is a container storing |UGSTensor|s. We declare |multAndAdd| method.
@<|UGSContainer| class declaration@>=
 class FGSContainer;
 class UGSContainer : public TensorContainer<UGSTensor> {
 public:@;
 	UGSContainer(int nn)
 		: TensorContainer<UGSTensor>(nn)@+ {}
 	UGSContainer(const UGSContainer& uc)
 		: TensorContainer<UGSTensor>(uc)@+ {}
 	UGSContainer(const FGSContainer& c);
 	void multAndAdd(const UGSTensor& t, UGSTensor& out) const;
 };
@ Here is a container storing |FGSTensor|s. We declare two versions of
 |multAndAdd| method. The first works for folded $B$ and folded $h$
 tensors, the second works for folded $B$ and unfolded $h$. There is no
 point to do it for unfolded $B$ since the algorithm go through all the
 indices of $B$ and calculates corresponding columns. So, if $B$ is
 needed unfolded, it is more effective to calculate its folded version
 and then unfold by conversion.
 The static member |num_one_time| is a number of columns formed from
 product of $g$ tensors at one time. This is subject to change, probably
 we will have to do some tuning and decide about this number based on
 symmetries, and dimensions in the runtime.
@s FGSContainer int
@<|FGSContainer| class declaration@>=
 class FGSContainer : public TensorContainer<FGSTensor> {
 	static const int num_one_time;
 public:@;
 	FGSContainer(int nn)
 		: TensorContainer<FGSTensor>(nn)@+ {}
 	FGSContainer(const FGSContainer& fc)
 		: TensorContainer<FGSTensor>(fc)@+ {}
 	FGSContainer(const UGSContainer& c);
 	void multAndAdd(const FGSTensor& t, FGSTensor& out) const;
 	void multAndAdd(const UGSTensor& t, FGSTensor& out) const;
 private:@;
 	static Tensor::index
 	getIndices(int num, vector<IntSequence>& out,
 			   const Tensor::index& start,
 			   const Tensor::index& end);
 };
@ End of {\tt t\_container.h} file.
--- a/dynare++/tl/cc/t_polynomial.cc
+++ b/dynare++/tl/cc/t_polynomial.cc
@ -0,0 +1,68 @@
 // Copyright 2004, Ondra Kamenik
 #include "t_polynomial.hh"
 #include "kron_prod.hh"
 // |PowerProvider::getNext| unfolded code
 /* This method constructs unfolded |ut| of higher dimension, deleting
   the previous. */
 const URSingleTensor &
 PowerProvider::getNext(const URSingleTensor *dummy)
 {
  if (ut)
    {
      URSingleTensor *ut_new = new URSingleTensor(nv, ut->dimen()+1);
      KronProd::kronMult(ConstVector(origv), ConstVector(ut->getData()), ut_new->getData());
      delete ut;
      ut = ut_new;
    }
  else
    {
      ut = new URSingleTensor(nv, 1);
      ut->getData() = origv;
    }
  return *ut;
 }
 // |PowerProvider::getNext| folded code
 /* This method just constructs next unfolded |ut| and creates folded
   |ft|. */
 const FRSingleTensor &
 PowerProvider::getNext(const FRSingleTensor *dummy)
 {
  getNext(ut);
  if (ft)
    delete ft;
  ft = new FRSingleTensor(*ut);
  return *ft;
 }
 PowerProvider::~PowerProvider()
 {
  if (ut)
    delete ut;
  if (ft)
    delete ft;
 }
 UTensorPolynomial::UTensorPolynomial(const FTensorPolynomial &fp)
  : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(fp.nrows(), fp.nvars())
 {
  for (FTensorPolynomial::const_iterator it = fp.begin();
       it != fp.end(); ++it)
    {
      insert(new UFSTensor(*((*it).second)));
    }
 }
 FTensorPolynomial::FTensorPolynomial(const UTensorPolynomial &up)
  : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(up.nrows(), up.nvars())
 {
  for (UTensorPolynomial::const_iterator it = up.begin();
       it != up.end(); ++it)
    {
      insert(new FFSTensor(*((*it).second)));
    }
 }
--- a/dynare++/tl/cc/t_polynomial.cweb
+++ b/dynare++/tl/cc/t_polynomial.cweb
@ -1,80 +0,0 @@
@q $Id: t_polynomial.cweb 1210 2007-03-19 21:38:49Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt t\_polynomial.cpp} file.
@c
 #include "t_polynomial.h"
 #include "kron_prod.h"
@<|PowerProvider::getNext| unfolded code@>;
@<|PowerProvider::getNext| folded code@>;
@<|PowerProvider| destructor code@>;
@<|UTensorPolynomial| constructor conversion code@>;
@<|FTensorPolynomial| constructor conversion code@>;
@ This method constructs unfolded |ut| of higher dimension, deleting
 the previous.
@<|PowerProvider::getNext| unfolded code@>=
 const URSingleTensor& PowerProvider::getNext(const URSingleTensor* dummy)
 {
 	if (ut) {
 		URSingleTensor* ut_new = new URSingleTensor(nv, ut->dimen()+1);
 		KronProd::kronMult(ConstVector(origv), ConstVector(ut->getData()), ut_new->getData());
 		delete ut;
 		ut = ut_new;
 	} else {
 		ut = new URSingleTensor(nv, 1);
 		ut->getData() = origv;
 	}
 	return *ut;
 }
@ This method just constructs next unfolded |ut| and creates folded
 |ft|.
@<|PowerProvider::getNext| folded code@>=
 const FRSingleTensor& PowerProvider::getNext(const FRSingleTensor* dummy)
 {
 	getNext(ut);
 	if (ft)
 		delete ft;
 	ft = new FRSingleTensor(*ut);
 	return *ft;
 }
@ 
@<|PowerProvider| destructor code@>=
 PowerProvider::~PowerProvider()
 {
 	if (ut)
 		delete ut;
 	if (ft)
 		delete ft;
 }
@ Clear.
@<|UTensorPolynomial| constructor conversion code@>=
 UTensorPolynomial::UTensorPolynomial(const FTensorPolynomial& fp)
 	: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(fp.nrows(), fp.nvars())
 {
 	for (FTensorPolynomial::const_iterator it = fp.begin();
 		 it != fp.end(); ++it) {
 		insert(new UFSTensor(*((*it).second)));
 	}
 }
@ Clear.
@<|FTensorPolynomial| constructor conversion code@>=
 FTensorPolynomial::FTensorPolynomial(const UTensorPolynomial& up)
 	: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(up.nrows(), up.nvars())
 {
 	for (UTensorPolynomial::const_iterator it = up.begin();
 		 it != up.end(); ++it) {
 		insert(new FFSTensor(*((*it).second)));
 	}
 }
@ End of {\tt t\_polynomial.cpp} file.
--- a/dynare++/tl/cc/t_polynomial.hh
+++ b/dynare++/tl/cc/t_polynomial.hh
@ -0,0 +1,536 @@
 // Copyright 2004, Ondra Kamenik
 // Tensor polynomial evaluation.
 /* We need to evaluate a tensor polynomial of the form:
   $$
   \left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
   \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
   \ldots+
   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
   $$
   where $x$ is a column vector.
   We have basically two options. The first is to use the formula above,
   the second is to use a Horner-like formula:
   $$
   \left[\cdots\left[\left[\left[g_{x^{n-1}}\right]+
   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_{n-1}\alpha_n}
   [x]^{\alpha_n}\right]_{\alpha_1\ldots\alpha_{n-2}\alpha_{n-1}}
   [x]^{\alpha_{n-1}}\right]\cdots\right]_{\alpha_1}
   [x]^{\alpha_1}
   $$
   Alternativelly, we can put the the polynomial into a more compact form
   $$\left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
   \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
   \ldots+
   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
   = [G]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n\left[\matrix{1\cr x}\right]^{\alpha_i}
   $$
   Then the polynomial evaluation becomes just a matrix multiplication of the vector power.
   Here we define the tensor polynomial as a container of full symmetry
   tensors and add an evaluation methods. We have two sorts of
   containers, folded and unfolded. For each type we declare two methods
   implementing the above formulas. We define classes for the
   compactification of the polynomial. The class derives from the tensor
   and has a eval method. */
 #include "t_container.hh"
 #include "fs_tensor.hh"
 #include "rfs_tensor.hh"
 #include "tl_static.hh"
 /* Just to make the code nicer, we implement a Kronecker power of a
   vector encapsulated in the following class. It has |getNext| method
   which returns either folded or unfolded row-oriented single column
   Kronecker power of the vector according to the type of a dummy
   argument. This allows us to use the type dependent code in templates
   below.
   The implementation of the Kronecker power is that we maintain the last
   unfolded power. If unfolded |getNext| is called, we Kronecker multiply
   the last power with a vector and return it. If folded |getNext| is
   called, we do the same plus we fold it.
   |getNext| returns the vector for the first call (first power), the
   second power is returned on the second call, and so on. */
 class PowerProvider
 {
  Vector origv;
  URSingleTensor *ut;
  FRSingleTensor *ft;
  int nv;
 public:
  PowerProvider(const ConstVector &v)
    : origv(v), ut(NULL), ft(NULL), nv(v.length())
  {
  }
  ~PowerProvider();
  const URSingleTensor&getNext(const URSingleTensor *dummy);
  const FRSingleTensor&getNext(const FRSingleTensor *dummy);
 };
 /* The tensor polynomial is basically a tensor container which is more
   strict on insertions. It maintains number of rows and number of
   variables and allows insertions only of those tensors, which yield
   these properties. The maximum dimension is maintained by |insert|
   method.
   So we re-implement |insert| method and implement |evalTrad|
   (traditional polynomial evaluation) and horner-like evaluation
   |evalHorner|.
   In addition, we implement derivatives of the polynomial and its
   evaluation. The evaluation of a derivative is different from the
   evaluation of the whole polynomial, simply because the evaluation of
   the derivatives is a tensor, and the evaluation of the polynomial is a
   vector (zero dimensional tensor). See documentation to
   |@<|TensorPolynomial::derivative| code@>| and
   |@<|TensorPolynomial::evalPartially| code@>| for details. */
 template <class _Ttype, class _TGStype, class _Stype>
 class TensorPolynomial : public TensorContainer<_Ttype>
 {
  int nr;
  int nv;
  int maxdim;
  typedef TensorContainer<_Ttype> _Tparent;
  typedef typename _Tparent::_ptr _ptr;
 public:
  TensorPolynomial(int rows, int vars)
    : TensorContainer<_Ttype>(1),
    nr(rows), nv(vars), maxdim(0)
  {
  }
  TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &tp, int k)
    : TensorContainer<_Ttype>(tp),
    nr(tp.nr), nv(tp.nv), maxdim(0)
  {
    derivative(k);
  }
  TensorPolynomial(int first_row, int num, TensorPolynomial<_Ttype, _TGStype, _Stype> &tp)
    : TensorContainer<_Ttype>(first_row, num, tp),
    nr(num), nv(tp.nv), maxdim(tp.maxdim)
  {
  }
  // |TensorPolynomial| contract constructor code@
  /* This constructor takes a tensor polynomial
     $$P(x,y)=\sum^m_{k=0}[g_{(xy)^k}]_{\alpha_1\ldots\alpha_k}
     \left[\matrix{x\cr y}\right]^{\alpha_1\ldots\alpha_k}$$
     and for a given $x$ it makes a polynomial
     $$Q(y)=P(x,y).$$
     The algorithm for each full symmetry $(xy)^k$ works with subtensors (slices) of
     symmetry $x^iy^j$ (with $i+j=k$), and contracts these subtensors with respect to
     $x^i$ to obtain a tensor of full symmetry $y^j$. Since the column
     $x^i$ is calculated by |PowerProvider| we cycle for $i=1,...,m$. Then
     we have to add everything for $i=0$.
     The code works as follows: For slicing purposes we need stack sizes
     |ss| corresponing to lengths of $x$ and $y$, and then identity |pp|
     for unfolding a symmetry of the slice to obtain stack coordinates of
     the slice. Then we do the calculations for $i=1,\ldots,m$ and then for
     $i=0$. */
  TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &tp, const Vector &xval)
    : TensorContainer<_Ttype>(1),
    nr(tp.nrows()), nv(tp.nvars() - xval.length()), maxdim(0)
  {
    TL_RAISE_IF(nvars() < 0,
                "Length of xval too big in TensorPolynomial contract constructor");
    IntSequence ss(2); ss[0] = xval.length(); ss[1] = nvars();
    IntSequence pp(2); pp[0] = 0; pp[1] = 1;
    // do contraction for all $i>0$
    /* Here we setup the |PowerProvider|, and cycle through
       $i=1,\ldots,m$. Within the loop we cycle through $j=0,\ldots,m-i$. If
       there is a tensor with symmetry $(xy)^{i+j}$ in the original
       polynomial, we make its slice with symmetry $x^iy^j$, and
       |contractAndAdd| it to the tensor |ten| in the |this| polynomial with
       a symmetry $y^j$.
       Note three things: First, the tensor |ten| is either created and put
       to |this| container or just got from the container, this is done in
       |@<initialize |ten| of dimension |j|@>|. Second, the contribution to
       the |ten| tensor must be multiplied by $\left(\matrix{i+j\cr
       j}\right)$, since there are exactly that number of slices of
       $(xy)^{i+j}$ of the symmetry $x^iy^j$ and all must be added. Third,
       the tensor |ten| is fully symmetric and |_TGStype::contractAndAdd|
       works with general symmetry, that is why we have to in-place convert
       fully syummetric |ten| to a general symmetry tensor. */
    PowerProvider pwp(xval);
    for (int i = 1; i <= tp.maxdim; i++)
      {
        const _Stype &xpow = pwp.getNext((const _Stype *) NULL);
        for (int j = 0; j <= tp.maxdim-i; j++)
          {
            if (tp.check(Symmetry(i+j)))
              {
                // initialize |ten| of dimension |j|
                /* The pointer |ten| is either a new tensor or got from |this| container. */
                _Ttype *ten;
                if (_Tparent::check(Symmetry(j)))
                  {
                    ten = _Tparent::get(Symmetry(j));
                  }
                else
                  {
                    ten = new _Ttype(nrows(), nvars(), j);
                    ten->zeros();
                    insert(ten);
                  }
                Symmetry sym(i, j);
                IntSequence coor(sym, pp);
                _TGStype slice(*(tp.get(Symmetry(i+j))), ss, coor, TensorDimens(sym, ss));
                slice.mult(Tensor::noverk(i+j, j));
                _TGStype tmp(*ten);
                slice.contractAndAdd(0, tmp, xpow);
              }
          }
      }
    // do contraction for $i=0$
    /* This is easy. The code is equivalent to code |@<do contraction for
       all $i>0$@>| as for $i=0$. The contraction here takes a form of a
       simple addition. */
    for (int j = 0; j <= tp.maxdim; j++)
      {
        if (tp.check(Symmetry(j)))
          {
            // initialize |ten| of dimension |j|
            /* Same code as above */
            _Ttype *ten;
            if (_Tparent::check(Symmetry(j)))
              {
                ten = _Tparent::get(Symmetry(j));
              }
            else
              {
                ten = new _Ttype(nrows(), nvars(), j);
                ten->zeros();
                insert(ten);
              }
            Symmetry sym(0, j);
            IntSequence coor(sym, pp);
            _TGStype slice(*(tp.get(Symmetry(j))), ss, coor, TensorDimens(sym, ss));
            ten->add(1.0, slice);
          }
      }
  }
  TensorPolynomial(const TensorPolynomial &tp)
    : TensorContainer<_Ttype>(tp), nr(tp.nr), nv(tp.nv), maxdim(tp.maxdim)
  {
  }
  int
  nrows() const
  {
    return nr;
  }
  int
  nvars() const
  {
    return nv;
  }
  /* Here we cycle up to the maximum dimension, and if a tensor exists in
     the container, then we multiply it with the Kronecker power of the
     vector supplied by |PowerProvider|. */
  void
  evalTrad(Vector &out, const ConstVector &v) const
  {
    if (_Tparent::check(Symmetry(0)))
      out = _Tparent::get(Symmetry(0))->getData();
    else
      out.zeros();
    PowerProvider pp(v);
    for (int d = 1; d <= maxdim; d++)
      {
        const _Stype &p = pp.getNext((const _Stype *) NULL);
        Symmetry cs(d);
        if (_Tparent::check(cs))
          {
            const _Ttype *t = _Tparent::get(cs);
            t->multaVec(out, p.getData());
          }
      }
  }
  /* Here we construct by contraction |maxdim-1| tensor first, and then
     cycle. The code is clear, the only messy thing is |new| and |delete|. */
  void
  evalHorner(Vector &out, const ConstVector &v) const
  {
    if (_Tparent::check(Symmetry(0)))
      out = _Tparent::get(Symmetry(0))->getData();
    else
      out.zeros();
    if (maxdim == 0)
      return;
    _Ttype *last;
    if (maxdim == 1)
      last = new _Ttype(*(_Tparent::get(Symmetry(1))));
    else
      last = new _Ttype(*(_Tparent::get(Symmetry(maxdim))), v);
    for (int d = maxdim-1; d >= 1; d--)
      {
        Symmetry cs(d);
        if (_Tparent::check(cs))
          {
            const _Ttype *nt = _Tparent::get(cs);
            last->add(1.0, ConstTwoDMatrix(*nt));
          }
        if (d > 1)
          {
            _Ttype *new_last = new _Ttype(*last, v);
            delete last;
            last = new_last;
          }
      }
    last->multaVec(out, v);
    delete last;
  }
  /* Before a tensor is inserted, we check for the number of rows, and
     number of variables. Then we insert and update the |maxdim|. */
  void
  insert(_ptr t)
  {
    TL_RAISE_IF(t->nrows() != nr,
                "Wrong number of rows in TensorPolynomial::insert");
    TL_RAISE_IF(t->nvar() != nv,
                "Wrong number of variables in TensorPolynomial::insert");
    TensorContainer<_Ttype>::insert(t);
    if (maxdim < t->dimen())
      maxdim = t->dimen();
  }
  /* The polynomial takes the form
     $$\sum_{i=0}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
     \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_i},$$ where
     $\left[g_{y^i}\right]$ are $i$-order derivatives of the polynomial. We
     assume that ${1\over i!}\left[g_{y^i}\right]$ are items in the tensor
     container.  This method differentiates the polynomial by one order to
     yield:
     $$\sum_{i=1}^n{1\over i!}\left[i\cdot g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
     \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_{i-1}},$$
     where $\left[i\cdot{1\over i!}\cdot g_{y^i}\right]$ are put to the container.
     A polynomial can be derivative of some order, and the order cannot be
     recognized from the object. That is why we need to input the order. */
  void
  derivative(int k)
  {
    for (int d = 1; d <= maxdim; d++)
      {
        if (_Tparent::check(Symmetry(d)))
          {
            _Ttype *ten = _Tparent::get(Symmetry(d));
            ten->mult((double) max((d-k), 0));
          }
      }
  }
  /* Now let us suppose that we have an |s| order derivative of a
     polynomial whose $i$ order derivatives are $\left[g_{y^i}\right]$, so
     we have
     $$\sum_{i=s}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
     \prod_{k=1}^{i-s}\left[y\right]^{\alpha_k},$$
     where ${1\over i!}\left[g_{y^i}\right]$ are tensors in the container.
     This methods performs this evaluation. The result is an |s| dimensional
     tensor. Note that when combined with the method |derivative|, they
     evaluate a derivative of some order. For example a sequence of calls
     |g.derivative(0)|, |g.derivative(1)| and |der=g.evalPartially(2, v)|
     calculates $2!$ multiple of the second derivative of |g| at |v|. */
  _Ttype *
  evalPartially(int s, const ConstVector &v)
  {
    TL_RAISE_IF(v.length() != nvars(),
                "Wrong length of vector for TensorPolynomial::evalPartially");
    _Ttype *res = new _Ttype(nrows(), nvars(), s);
    res->zeros();
    if (_Tparent::check(Symmetry(s)))
      res->add(1.0, *(_Tparent::get(Symmetry(s))));
    for (int d = s+1; d <= maxdim; d++)
      {
        if (_Tparent::check(Symmetry(d)))
          {
            const _Ttype &ltmp = *(_Tparent::get(Symmetry(d)));
            _Ttype *last = new _Ttype(ltmp);
            for (int j = 0; j < d - s; j++)
              {
                _Ttype *newlast = new _Ttype(*last, v);
                delete last;
                last = newlast;
              }
            res->add(1.0, *last);
            delete last;
          }
      }
    return res;
  }
 };
 /* This just gives a name to unfolded tensor polynomial. */
 class FTensorPolynomial;
 class UTensorPolynomial : public TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>
 {
 public:
  UTensorPolynomial(int rows, int vars)
    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(rows, vars)
  {
  }
  UTensorPolynomial(const UTensorPolynomial &up, int k)
    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(up, k)
  {
  }
  UTensorPolynomial(const FTensorPolynomial &fp);
  UTensorPolynomial(const UTensorPolynomial &tp, const Vector &xval)
    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(tp, xval)
  {
  }
  UTensorPolynomial(int first_row, int num, UTensorPolynomial &tp)
    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(first_row, num, tp)
  {
  }
 };
 /* This just gives a name to folded tensor polynomial. */
 class FTensorPolynomial : public TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>
 {
 public:
  FTensorPolynomial(int rows, int vars)
    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(rows, vars)
  {
  }
  FTensorPolynomial(const FTensorPolynomial &fp, int k)
    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fp, k)
  {
  }
  FTensorPolynomial(const UTensorPolynomial &up);
  FTensorPolynomial(const FTensorPolynomial &tp, const Vector &xval)
    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(tp, xval)
  {
  }
  FTensorPolynomial(int first_row, int num, FTensorPolynomial &tp)
    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(first_row, num, tp)
  {
  }
 };
 /* The compact form of |TensorPolynomial| is in fact a full symmetry
   tensor, with the number of variables equal to the number of variables
   of the polynomial plus 1 for $1$. */
 template <class _Ttype, class _TGStype, class _Stype>
 class CompactPolynomial : public _Ttype
 {
 public:
  /* This constructor copies matrices from the given tensor polynomial to
     the appropriate location in this matrix. It creates a dummy tensor
     |dum| with two variables (one corresponds to $1$, the other to
     $x$). The index goes through this dummy tensor and the number of
     columns of the folded/unfolded general symmetry tensor corresponding
     to the selections of $1$ or $x$ given by the index. Length of $1$ is
     one, and length of $x$ is |pol.nvars()|. This nvs information is
     stored in |dumnvs|. The symmetry of this general symmetry dummy tensor
     |dumgs| is given by a number of ones and x's in the index. We then
     copy the matrix, if it exists in the polynomial and increase |offset|
     for the following cycle. */
  CompactPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &pol)
    : _Ttype(pol.nrows(), pol.nvars()+1, pol.getMaxDim())
  {
    _Ttype::zeros();
    IntSequence dumnvs(2);
    dumnvs[0] = 1;
    dumnvs[1] = pol.nvars();
    int offset = 0;
    _Ttype dum(0, 2, _Ttype::dimen());
    for (Tensor::index i = dum.begin(); i != dum.end(); ++i)
      {
        int d = i.getCoor().sum();
        Symmetry symrun(_Ttype::dimen()-d, d);
        _TGStype dumgs(0, TensorDimens(symrun, dumnvs));
        if (pol.check(Symmetry(d)))
          {
            TwoDMatrix subt(*this, offset, dumgs.ncols());
            subt.add(1.0, *(pol.get(Symmetry(d))));
          }
        offset += dumgs.ncols();
      }
  }
  /* We create |x1| to be a concatenation of $1$ and $x$, and then create
     |PowerProvider| to make a corresponding power |xpow| of |x1|, and
     finally multiply this matrix with the power. */
  void
  eval(Vector &out, const ConstVector &v) const
  {
    TL_RAISE_IF(v.length()+1 != _Ttype::nvar(),
                "Wrong input vector length in CompactPolynomial::eval");
    TL_RAISE_IF(out.length() != _Ttype::nrows(),
                "Wrong output vector length in CompactPolynomial::eval");
    Vector x1(v.length()+1);
    Vector x1p(x1, 1, v.length());
    x1p = v;
    x1[0] = 1.0;
    if (_Ttype::dimen() == 0)
      out = ConstVector(*this, 0);
    else
      {
        PowerProvider pp(x1);
        const _Stype &xpow = pp.getNext((const _Stype *) NULL);
        for (int i = 1; i < _Ttype::dimen(); i++)
          xpow = pp.getNext((const _Stype *) NULL);
        multVec(0.0, out, 1.0, xpow);
      }
  }
 };
 /* Specialization of the |CompactPolynomial| for unfolded tensor. */
 class UCompactPolynomial : public CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>
 {
 public:
  UCompactPolynomial(const UTensorPolynomial &upol)
    : CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>(upol)
  {
  }
 };
 /* Specialization of the |CompactPolynomial| for folded tensor. */
 class FCompactPolynomial : public CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>
 {
 public:
  FCompactPolynomial(const FTensorPolynomial &fpol)
    : CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fpol)
  {
  }
 };
--- a/dynare++/tl/cc/t_polynomial.hweb
+++ b/dynare++/tl/cc/t_polynomial.hweb
@ -1,507 +0,0 @@
@q $Id: t_polynomial.hweb 2336 2009-01-14 10:37:02Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Tensor polynomial evaluation. Start of {\tt t\_polynomial.h} file.
 We need to evaluate a tensor polynomial of the form:
 $$
 \left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
 \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
 \ldots+
 \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
 $$
 where $x$ is a column vector.
 We have basically two options. The first is to use the formula above,
 the second is to use a Horner-like formula:
 $$
 \left[\cdots\left[\left[\left[g_{x^{n-1}}\right]+
 \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_{n-1}\alpha_n}
 [x]^{\alpha_n}\right]_{\alpha_1\ldots\alpha_{n-2}\alpha_{n-1}}
 [x]^{\alpha_{n-1}}\right]\cdots\right]_{\alpha_1}
 [x]^{\alpha_1}
 $$
 Alternativelly, we can put the the polynomial into a more compact form
 $$\left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
 \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
 \ldots+
 \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
 = [G]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n\left[\matrix{1\cr x}\right]^{\alpha_i}
 $$
 Then the polynomial evaluation becomes just a matrix multiplication of the vector power.
 Here we define the tensor polynomial as a container of full symmetry
 tensors and add an evaluation methods. We have two sorts of
 containers, folded and unfolded. For each type we declare two methods
 implementing the above formulas. We define classes for the
 compactification of the polynomial. The class derives from the tensor
 and has a eval method.
@s PowerProvider int
@s TensorPolynomial int
@s UTensorPolynomial int
@s FTensorPolynomial int
@s CompactPolynomial int
@s UCompactPolynomial int
@s FCompactPolynomial int
@c
 #include "t_container.h"
 #include "fs_tensor.h"
 #include "rfs_tensor.h"
 #include"tl_static.h"
@<|PowerProvider| class declaration@>;
@<|TensorPolynomial| class declaration@>;
@<|UTensorPolynomial| class declaration@>;
@<|FTensorPolynomial| class declaration@>;
@<|CompactPolynomial| class declaration@>;
@<|UCompactPolynomial| class declaration@>;
@<|FCompactPolynomial| class declaration@>;
@ Just to make the code nicer, we implement a Kronecker power of a
 vector encapsulated in the following class. It has |getNext| method
 which returns either folded or unfolded row-oriented single column
 Kronecker power of the vector according to the type of a dummy
 argument. This allows us to use the type dependent code in templates
 below.
 The implementation of the Kronecker power is that we maintain the last
 unfolded power. If unfolded |getNext| is called, we Kronecker multiply
 the last power with a vector and return it. If folded |getNext| is
 called, we do the same plus we fold it.
 |getNext| returns the vector for the first call (first power), the
 second power is returned on the second call, and so on.
@<|PowerProvider| class declaration@>=
 class PowerProvider {
 	Vector origv;
 	URSingleTensor* ut;
 	FRSingleTensor* ft;
 	int nv;
 public:@;
 	PowerProvider(const ConstVector& v)
 		: origv(v), ut(NULL), ft(NULL), nv(v.length())@+ {}
 	~PowerProvider();
 	const URSingleTensor& getNext(const URSingleTensor* dummy);
 	const FRSingleTensor& getNext(const FRSingleTensor* dummy);
 };
@ The tensor polynomial is basically a tensor container which is more
 strict on insertions. It maintains number of rows and number of
 variables and allows insertions only of those tensors, which yield
 these properties. The maximum dimension is maintained by |insert|
 method.
 So we re-implement |insert| method and implement |evalTrad|
 (traditional polynomial evaluation) and horner-like evaluation
 |evalHorner|.
 In addition, we implement derivatives of the polynomial and its
 evaluation. The evaluation of a derivative is different from the
 evaluation of the whole polynomial, simply because the evaluation of
 the derivatives is a tensor, and the evaluation of the polynomial is a
 vector (zero dimensional tensor). See documentation to
 |@<|TensorPolynomial::derivative| code@>| and
 |@<|TensorPolynomial::evalPartially| code@>| for details.
@s _Stype int
@s _TGStype int
@<|TensorPolynomial| class declaration@>=
 template <class _Ttype, class _TGStype, class _Stype>@;
 class TensorPolynomial : public TensorContainer<_Ttype> {
 	int nr;
 	int nv;
 	int maxdim;
 	typedef TensorContainer<_Ttype> _Tparent;
 	typedef typename _Tparent::_ptr _ptr;
 public:@;
 	TensorPolynomial(int rows, int vars)
 		: TensorContainer<_Ttype>(1),
 		  nr(rows), nv(vars), maxdim(0) {}
 	TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& tp, int k)
 		: TensorContainer<_Ttype>(tp),
 		  nr(tp.nr), nv(tp.nv), maxdim(0) {@+ derivative(k);@+}
 	TensorPolynomial(int first_row, int num, TensorPolynomial<_Ttype, _TGStype, _Stype>& tp)
 		: TensorContainer<_Ttype>(first_row, num, tp),
 		  nr(num), nv(tp.nv), maxdim(tp.maxdim)@+ {}
 	@<|TensorPolynomial| contract constructor code@>;
 	TensorPolynomial(const TensorPolynomial& tp)
 		: TensorContainer<_Ttype>(tp), nr(tp.nr), nv(tp.nv), maxdim(tp.maxdim)@+ {}
 	int nrows() const
 		{@+ return nr;@+}
 	int nvars() const
 		{@+ return nv;@+}
 	@<|TensorPolynomial::evalTrad| code@>;
 	@<|TensorPolynomial::evalHorner| code@>;
 	@<|TensorPolynomial::insert| code@>;
 	@<|TensorPolynomial::derivative| code@>;
 	@<|TensorPolynomial::evalPartially| code@>;
 };
@ This constructor takes a tensor polynomial 
 $$P(x,y)=\sum^m_{k=0}[g_{(xy)^k}]_{\alpha_1\ldots\alpha_k}
 \left[\matrix{x\cr y}\right]^{\alpha_1\ldots\alpha_k}$$
 and for a given $x$ it makes a polynomial
 $$Q(y)=P(x,y).$$
 The algorithm for each full symmetry $(xy)^k$ works with subtensors (slices) of
 symmetry $x^iy^j$ (with $i+j=k$), and contracts these subtensors with respect to
 $x^i$ to obtain a tensor of full symmetry $y^j$. Since the column
 $x^i$ is calculated by |PowerProvider| we cycle for $i=1,...,m$. Then
 we have to add everything for $i=0$.
 The code works as follows: For slicing purposes we need stack sizes
 |ss| corresponing to lengths of $x$ and $y$, and then identity |pp|
 for unfolding a symmetry of the slice to obtain stack coordinates of
 the slice. Then we do the calculations for $i=1,\ldots,m$ and then for
 $i=0$.
@<|TensorPolynomial| contract constructor code@>=
 TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& tp, const Vector& xval)
 	: TensorContainer<_Ttype>(1),
 	  nr(tp.nrows()), nv(tp.nvars() - xval.length()), maxdim(0)
 {
 	TL_RAISE_IF(nvars() < 0,
 				"Length of xval too big in TensorPolynomial contract constructor");
 	IntSequence ss(2);@+ ss[0] = xval.length();@+ ss[1] = nvars();
 	IntSequence pp(2);@+ pp[0] = 0;@+ pp[1] = 1;
 	@<do contraction for all $i>0$@>;
 	@<do contraction for $i=0$@>;
 }
@ Here we setup the |PowerProvider|, and cycle through
 $i=1,\ldots,m$. Within the loop we cycle through $j=0,\ldots,m-i$. If
 there is a tensor with symmetry $(xy)^{i+j}$ in the original
 polynomial, we make its slice with symmetry $x^iy^j$, and
 |contractAndAdd| it to the tensor |ten| in the |this| polynomial with
 a symmetry $y^j$.
 Note three things: First, the tensor |ten| is either created and put
 to |this| container or just got from the container, this is done in
 |@<initialize |ten| of dimension |j|@>|. Second, the contribution to
 the |ten| tensor must be multiplied by $\left(\matrix{i+j\cr
 j}\right)$, since there are exactly that number of slices of
 $(xy)^{i+j}$ of the symmetry $x^iy^j$ and all must be added. Third,
 the tensor |ten| is fully symmetric and |_TGStype::contractAndAdd|
 works with general symmetry, that is why we have to in-place convert
 fully syummetric |ten| to a general symmetry tensor.
@<do contraction for all $i>0$@>=
 	PowerProvider pwp(xval);
 	for (int i = 1; i <= tp.maxdim; i++) {
 		const _Stype& xpow = pwp.getNext((const _Stype*)NULL);
 		for (int j = 0; j <= tp.maxdim-i; j++) {
 			if (tp.check(Symmetry(i+j))) {
 				@<initialize |ten| of dimension |j|@>;
 				Symmetry sym(i,j);
 				IntSequence coor(sym, pp);
 				_TGStype slice(*(tp.get(Symmetry(i+j))), ss, coor, TensorDimens(sym, ss));
 				slice.mult(Tensor::noverk(i+j, j));
 				_TGStype tmp(*ten);
 				slice.contractAndAdd(0, tmp, xpow);
 			}
 		}
 	}
@ This is easy. The code is equivalent to code |@<do contraction for
 all $i>0$@>| as for $i=0$. The contraction here takes a form of a
 simple addition.
@<do contraction for $i=0$@>=
 	for (int j = 0; j <= tp.maxdim; j++) {
 		if (tp.check(Symmetry(j))) {
 			@<initialize |ten| of dimension |j|@>;
 			Symmetry sym(0, j);
 			IntSequence coor(sym, pp);
 			_TGStype slice(*(tp.get(Symmetry(j))), ss, coor, TensorDimens(sym, ss));
 			ten->add(1.0, slice);
 		}
 	}
@ The pointer |ten| is either a new tensor or got from |this| container.
@<initialize |ten| of dimension |j|@>=
 	_Ttype* ten;
 	if (_Tparent::check(Symmetry(j))) {
 		ten = _Tparent::get(Symmetry(j));
 	} else {
 		ten = new _Ttype(nrows(), nvars(), j);
 		ten->zeros();
 		insert(ten);
 	}
@ Here we cycle up to the maximum dimension, and if a tensor exists in
 the container, then we multiply it with the Kronecker power of the
 vector supplied by |PowerProvider|.
@<|TensorPolynomial::evalTrad| code@>=
 void evalTrad(Vector& out, const ConstVector& v) const
 {
 	if (_Tparent::check(Symmetry(0)))
 		out = _Tparent::get(Symmetry(0))->getData();
 	else
 		out.zeros();
 	PowerProvider pp(v);
 	for (int d = 1; d <= maxdim; d++) {
 		const _Stype& p = pp.getNext((const _Stype*)NULL);
 		Symmetry cs(d);
 		if (_Tparent::check(cs)) {
 			const _Ttype* t = _Tparent::get(cs);
 			t->multaVec(out, p.getData());
 		}
 	}
 }
@ Here we construct by contraction |maxdim-1| tensor first, and then
 cycle. The code is clear, the only messy thing is |new| and |delete|.
@<|TensorPolynomial::evalHorner| code@>=
 void evalHorner(Vector& out, const ConstVector& v) const
 {
 	if (_Tparent::check(Symmetry(0)))
 		out = _Tparent::get(Symmetry(0))->getData();
 	else
 		out.zeros();
 	if (maxdim == 0)
 		return;
 	_Ttype* last;
 	if (maxdim == 1)
 		last = new _Ttype(*(_Tparent::get(Symmetry(1))));
 	else 
 		last = new _Ttype(*(_Tparent::get(Symmetry(maxdim))), v);
 	for (int d = maxdim-1; d >=1; d--) {
 		Symmetry cs(d);
 		if (_Tparent::check(cs)) {
 			const _Ttype* nt = _Tparent::get(cs);
 			last->add(1.0, ConstTwoDMatrix(*nt));
 		}
 		if (d > 1) {
 			_Ttype* new_last = new _Ttype(*last, v);
 			delete last;
 			last = new_last;
 		}
 	}
 	last->multaVec(out, v);
 	delete last;
 }
@ Before a tensor is inserted, we check for the number of rows, and
 number of variables. Then we insert and update the |maxdim|.
@<|TensorPolynomial::insert| code@>=
 void insert(_ptr t)
 {
 	TL_RAISE_IF(t->nrows() != nr,
 				"Wrong number of rows in TensorPolynomial::insert");
 	TL_RAISE_IF(t->nvar() != nv,
 				"Wrong number of variables in TensorPolynomial::insert");
 	TensorContainer<_Ttype>::insert(t);
 	if (maxdim < t->dimen())
 		maxdim = t->dimen();
 }
@ The polynomial takes the form
 $$\sum_{i=0}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
 \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_i},$$ where
 $\left[g_{y^i}\right]$ are $i$-order derivatives of the polynomial. We
 assume that ${1\over i!}\left[g_{y^i}\right]$ are items in the tensor
 container.  This method differentiates the polynomial by one order to
 yield:
 $$\sum_{i=1}^n{1\over i!}\left[i\cdot g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
 \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_{i-1}},$$
 where $\left[i\cdot{1\over i!}\cdot g_{y^i}\right]$ are put to the container.
 A polynomial can be derivative of some order, and the order cannot be
 recognized from the object. That is why we need to input the order.
@<|TensorPolynomial::derivative| code@>=
 void derivative(int k)
 {
 	for (int d = 1; d <= maxdim; d++) {
 		if (_Tparent::check(Symmetry(d))) {
 			_Ttype* ten = _Tparent::get(Symmetry(d));
 			ten->mult((double) max((d-k), 0));
 		}
 	}
 }
@ Now let us suppose that we have an |s| order derivative of a
 polynomial whose $i$ order derivatives are $\left[g_{y^i}\right]$, so
 we have
 $$\sum_{i=s}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
 \prod_{k=1}^{i-s}\left[y\right]^{\alpha_k},$$
 where ${1\over i!}\left[g_{y^i}\right]$ are tensors in the container.
 This methods performs this evaluation. The result is an |s| dimensional
 tensor. Note that when combined with the method |derivative|, they
 evaluate a derivative of some order. For example a sequence of calls
 |g.derivative(0)|, |g.derivative(1)| and |der=g.evalPartially(2, v)|
 calculates $2!$ multiple of the second derivative of |g| at |v|.
@<|TensorPolynomial::evalPartially| code@>=
 _Ttype* evalPartially(int s, const ConstVector& v)
 {
 	TL_RAISE_IF(v.length() != nvars(),
 				"Wrong length of vector for TensorPolynomial::evalPartially");
 	_Ttype* res = new _Ttype(nrows(), nvars(), s);
 	res->zeros();
 	if (_Tparent::check(Symmetry(s)))
 		res->add(1.0, *(_Tparent::get(Symmetry(s))));
 	for (int d = s+1; d <= maxdim; d++) {
 		if (_Tparent::check(Symmetry(d))) {
 			const _Ttype& ltmp = *(_Tparent::get(Symmetry(d)));
 			_Ttype* last = new _Ttype(ltmp);
 			for (int j = 0; j < d - s; j++) {
 				_Ttype* newlast = new _Ttype(*last, v);
 				delete last;
 				last = newlast;
 			}
 			res->add(1.0, *last);
 			delete last;
 		}
 	}
 	return res;
 }
@ This just gives a name to unfolded tensor polynomial.
@<|UTensorPolynomial| class declaration@>=
 class FTensorPolynomial;
 class UTensorPolynomial : public TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor> {
 public:@;
 	UTensorPolynomial(int rows, int vars)
 		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(rows, vars)@+ {}
 	UTensorPolynomial(const UTensorPolynomial& up, int k)
 		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(up, k)@+ {}
 	UTensorPolynomial(const FTensorPolynomial& fp);
 	UTensorPolynomial(const UTensorPolynomial& tp, const Vector& xval)
 		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(tp, xval)@+ {}
 	UTensorPolynomial(int first_row, int num, UTensorPolynomial& tp)
 		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(first_row, num, tp)@+ {}
 };
@ This just gives a name to folded tensor polynomial.
@<|FTensorPolynomial| class declaration@>=
 class FTensorPolynomial : public TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor> {
 public:@;
 	FTensorPolynomial(int rows, int vars)
 		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(rows, vars)@+ {}
 	FTensorPolynomial(const FTensorPolynomial& fp, int k)
 		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fp, k)@+ {}
 	FTensorPolynomial(const UTensorPolynomial& up);
 	FTensorPolynomial(const FTensorPolynomial& tp, const Vector& xval)
 		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(tp, xval)@+ {}
 	FTensorPolynomial(int first_row, int num, FTensorPolynomial& tp)
 		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(first_row, num, tp)@+ {}
 };
@ The compact form of |TensorPolynomial| is in fact a full symmetry
 tensor, with the number of variables equal to the number of variables
 of the polynomial plus 1 for $1$.
@<|CompactPolynomial| class declaration@>=
 template <class _Ttype, class _TGStype, class _Stype>@;
 class CompactPolynomial : public _Ttype {
 public:@;
 	@<|CompactPolynomial| constructor code@>;
 	@<|CompactPolynomial::eval| method code@>;
 };
@ This constructor copies matrices from the given tensor polynomial to
 the appropriate location in this matrix. It creates a dummy tensor
 |dum| with two variables (one corresponds to $1$, the other to
 $x$). The index goes through this dummy tensor and the number of
 columns of the folded/unfolded general symmetry tensor corresponding
 to the selections of $1$ or $x$ given by the index. Length of $1$ is
 one, and length of $x$ is |pol.nvars()|. This nvs information is
 stored in |dumnvs|. The symmetry of this general symmetry dummy tensor
 |dumgs| is given by a number of ones and x's in the index. We then
 copy the matrix, if it exists in the polynomial and increase |offset|
 for the following cycle.
@<|CompactPolynomial| constructor code@>=
 CompactPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& pol)
 	: _Ttype(pol.nrows(), pol.nvars()+1, pol.getMaxDim())
 {
 	_Ttype::zeros();
 	IntSequence dumnvs(2);
 	dumnvs[0] = 1;
 	dumnvs[1] = pol.nvars();
 	int offset = 0;
 	_Ttype dum(0, 2, _Ttype::dimen());
 	for (Tensor::index i = dum.begin(); i != dum.end(); ++i) {
 		int d = i.getCoor().sum();
 		Symmetry symrun(_Ttype::dimen()-d, d);
 		_TGStype dumgs(0, TensorDimens(symrun, dumnvs));
 		if (pol.check(Symmetry(d))) {
 			TwoDMatrix subt(*this, offset, dumgs.ncols());
 			subt.add(1.0, *(pol.get(Symmetry(d))));	
 		}
 		offset += dumgs.ncols();
 	}
 }
@ We create |x1| to be a concatenation of $1$ and $x$, and then create
 |PowerProvider| to make a corresponding power |xpow| of |x1|, and
 finally multiply this matrix with the power.
@<|CompactPolynomial::eval| method code@>=
 void eval(Vector& out, const ConstVector& v) const
 {
 	TL_RAISE_IF(v.length()+1 != _Ttype::nvar(),
 				"Wrong input vector length in CompactPolynomial::eval");
 	TL_RAISE_IF(out.length() != _Ttype::nrows(),
 				"Wrong output vector length in CompactPolynomial::eval");
 	Vector x1(v.length()+1);
 	Vector x1p(x1, 1, v.length());
 	x1p = v;
 	x1[0] = 1.0;
 	if (_Ttype::dimen() == 0)
 		out = ConstVector(*this, 0);
 	else {
 		PowerProvider pp(x1);
 		const _Stype& xpow = pp.getNext((const _Stype*)NULL);
 		for (int i = 1; i < _Ttype::dimen(); i++)
 			xpow = pp.getNext((const _Stype*)NULL);
 		multVec(0.0, out, 1.0, xpow);
 	}
 }
@ Specialization of the |CompactPolynomial| for unfolded tensor.
@<|UCompactPolynomial| class declaration@>=
 class UCompactPolynomial : public CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor> {
 public:@;
 	UCompactPolynomial(const UTensorPolynomial& upol)
 		: CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>(upol)@+ {}
 };
@ Specialization of the |CompactPolynomial| for folded tensor.
@<|FCompactPolynomial| class declaration@>=
 class FCompactPolynomial : public CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor> {
 public:@;
 	FCompactPolynomial(const FTensorPolynomial& fpol)
 		: CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fpol)@+ {}
 };
@ End of {\tt t\_polynomial.h} file.
--- a/dynare++/tl/cc/tensor.cc
+++ b/dynare++/tl/cc/tensor.cc
@ -0,0 +1,222 @@
 // Copyright 2004, Ondra Kamenik
 #include "tensor.hh"
 #include "tl_exception.hh"
 #include "tl_static.hh"
 // |Tensor| static methods
 /* Here we implement calculation of $\pmatrix{n\cr k}$ where $n-k$ is
   usually bigger than $k$.
   Also we implement $a^b$. */
 int
 Tensor::noverk(int n, int k)
 {
  return tls.ptriang->noverk(n, k);
 }
 int
 Tensor::power(int a, int b)
 {
  int res = 1;
  for (int i = 0; i < b; i++)
    res *= a;
  return res;
 }
 // |Tensor::noverseq_ip| static method
 /* Here we calculate a generalized combination number
   $\left(\matrix{a\cr b_1,\ldots,b_n}\right)$, where $a=b_1+\ldots+
   b_n$. We use the identity
   $$\left(\matrix{a\cr b_1,\ldots,b_n}\right)=\left(\matrix{b_1+b_2\cr b_1}\right)\cdot
   \left(\matrix{a\cr b_1+b_2,b_3,\ldots,b_n}\right)$$
   This number is exactly a number of unfolded indices corresponding to
   one folded index, where the sequence $b_1,\ldots,b_n$ is the symmetry
   of the index. */
 int
 Tensor::noverseq_ip(IntSequence &s)
 {
  if (s.size() == 0 || s.size() == 1)
    return 1;
  s[1] += s[0];
  return noverk(s[1], s[0]) * noverseq(IntSequence(s, 1, s.size()));
 }
 /* Here we increment a given sequence within full symmetry given by
   |nv|, which is number of variables in each dimension. The underlying
   tensor is unfolded, so we increase the rightmost by one, and if it is
   |nv| we zero it and increase the next one to the left. */
 void
 UTensor::increment(IntSequence &v, int nv)
 {
  if (v.size() == 0)
    return;
  int i = v.size()-1;
  v[i]++;
  while (i > 0 && v[i] == nv)
    {
      v[i] = 0;
      v[--i]++;
    }
 }
 /* This is dual to |UTensor::increment(IntSequence& v, int nv)|. */
 void
 UTensor::decrement(IntSequence &v, int nv)
 {
  if (v.size() == 0)
    return;
  int i = v.size()-1;
  v[i]--;
  while (i > 0 && v[i] == -1)
    {
      v[i] = nv -1;
      v[--i]--;
    }
 }
 /* Here we increment index for general symmetry for unfolded
   storage. The sequence |nvmx| assigns for each coordinate a number of
   variables. Since the storage is unfolded, we do not need information
   about what variables are symmetric, everything necessary is given by
   |nvmx|. */
 void
 UTensor::increment(IntSequence &v, const IntSequence &nvmx)
 {
  if (v.size() == 0)
    return;
  int i = v.size()-1;
  v[i]++;
  while (i > 0 && v[i] == nvmx[i])
    {
      v[i] = 0;
      v[--i]++;
    }
 }
 /* This is a dual code to |UTensor::increment(IntSequence& v, const
   IntSequence& nvmx)|. */
 void
 UTensor::decrement(IntSequence &v, const IntSequence &nvmx)
 {
  if (v.size() == 0)
    return;
  int i = v.size()-1;
  v[i]--;
  while (i > 0 && v[i] == -1)
    {
      v[i] = nvmx[i] -1;
      v[--i]--;
    }
 }
 /* Here we return an offset for a given coordinates of unfolded full
   symmetry tensor. This is easy. */
 int
 UTensor::getOffset(const IntSequence &v, int nv)
 {
  int pow = 1;
  int res = 0;
  for (int i = v.size()-1; i >= 0; i--)
    {
      res += v[i]*pow;
      pow *= nv;
    }
  return res;
 }
 /* Also easy. */
 int
 UTensor::getOffset(const IntSequence &v, const IntSequence &nvmx)
 {
  int pow = 1;
  int res = 0;
  for (int i = v.size()-1; i >= 0; i--)
    {
      res += v[i]*pow;
      pow *= nvmx[i];
    }
  return res;
 }
 /* Decrementing of coordinates of folded index is not that easy. Note
   that if a trailing part of coordinates is $(b, a, a, a)$ (for
   instance) with $b<a$, then a preceding coordinates are $(b, a-1, n-1,
   n-1)$, where $n$ is a number of variables |nv|. So we find the left
   most element which is equal to the last element, decrease it by one,
   and then set all elements to the right to $n-1$. */
 void
 FTensor::decrement(IntSequence &v, int nv)
 {
  int i = v.size()-1;
  while (i > 0 && v[i-1] == v[i])
    i--;
  v[i]--;
  for (int j = i+1; j < v.size(); j++)
    v[j] = nv-1;
 }
 /* This calculates order of the given index of our ordering of
   indices. In order to understand how it works, let us take number of
   variables $n$ and dimension $k$, and write down all the possible
   combinations of indices in our ordering. For example for $n=4$ and
   $k=3$, the sequence looks as:
   \def\tr#1#2#3{\hbox{\rlap{#1}\hskip 0.7em\rlap{#2}\hskip 0.7em\rlap{#3}\hskip 0.7em}}
   \halign{\tabskip=3em \hskip2cm #&#&#&#\cr
   \tr 000 &\tr 111 &\tr 222 &\tr 333\cr
   \tr 001 &\tr 112 &\tr 223 \cr
   \tr 002 &\tr 113 &\tr 233 \cr
   \tr 003 &\tr 122 \cr
   \tr 011 &\tr 123\cr
   \tr 012 &\tr 133\cr
   \tr 013\cr
   \tr 022\cr
   \tr 023\cr
   \tr 033\cr
   }
   Now observe, that a number of sequences starting with zero is the same
   as total number of sequences with the same number of variables but
   with dimension minus one. More generally, if $S_{n,k}$ denotes number
   of indices of $n$ variables and dimension $k$, then the number of
   indices beginning with $m$ is exactly $S_{n-m,k-1}$. This is because $m$
   can be subtracted from all items, and we obtain sequence of indices of
   $n-m$ variables. So we have formula:
   $$S_{n,k}=S_{n,k-1}+S_{n-1,k-1}+\ldots+S_{1,k-1}$$
   Now it is easy to calculate offset of index of the form
   $(m,\ldots,m)$. It is a sum of all above it, this is
   $S_{n,k-1}+\ldots+S_{n-m,k-1}$. We know that $S_{n,k}=\pmatrix{n+k-1\cr
   k}$. Using above formula, we can calculate offset of $(m,\ldots,m)$ as
   $$\pmatrix{n+k-1\cr k}-\pmatrix{n-m+k-1\cr k}$$
   The offset of general index $(m_1,m_2,\ldots,m_k)$ is calculated
   recursively, since it is offset of $(m_1,\ldots,m_1)$ for $n$
   variables plus offset of $(m_2-m_1,m_3-m_1,\ldots,m_k-m_1)$ for
   $n-m_1$ variables. */
 int
 FTensor::getOffsetRecurse(IntSequence &v, int nv)
 {
  if (v.size() == 0)
    return 0;
  int prefix = v.getPrefixLength();
  int m = v[0];
  int k = v.size();
  int s1 = noverk(nv+k-1, k) - noverk(nv-m+k-1, k);
  IntSequence subv(v, prefix, k);
  subv.add(-m);
  int s2 = getOffsetRecurse(subv, nv-m);
  return s1+s2;
 }
--- a/dynare++/tl/cc/tensor.cweb
+++ b/dynare++/tl/cc/tensor.cweb
@ -1,229 +0,0 @@
@q $Id: tensor.cweb 429 2005-08-16 15:20:09Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@ Start of {\tt tensor.cpp} file.
@c
 #include "tensor.h"
 #include "tl_exception.h"
 #include "tl_static.h"
@<|Tensor| static methods@>;
@<|Tensor::noverseq_ip| static method@>;
@<|UTensor::increment| code 1@>;
@<|UTensor::decrement| code 1@>;
@<|UTensor::increment| code 2@>;
@<|UTensor::decrement| code 2@>;
@<|UTensor::getOffset| code 1@>;
@<|UTensor::getOffset| code 2@>;
@<|FTensor::decrement| code@>;
@<|FTensor::getOffsetRecurse| code@>;
@ Here we implement calculation of $\pmatrix{n\cr k}$ where $n-k$ is
 usually bigger than $k$.
 Also we implement $a^b$.
@<|Tensor| static methods@>=
 int Tensor::noverk(int n, int k)
 {
 	return tls.ptriang->noverk(n,k);
 }
@#
 int Tensor::power(int a, int b)
 {
 	int res = 1;
 	for (int i = 0; i < b; i++)
 		res *= a;
 	return res;
 }
@ Here we calculate a generalized combination number
 $\left(\matrix{a\cr b_1,\ldots,b_n}\right)$, where $a=b_1+\ldots+
 b_n$. We use the identity
 $$\left(\matrix{a\cr b_1,\ldots,b_n}\right)=\left(\matrix{b_1+b_2\cr b_1}\right)\cdot
 \left(\matrix{a\cr b_1+b_2,b_3,\ldots,b_n}\right)$$
 This number is exactly a number of unfolded indices corresponding to
 one folded index, where the sequence $b_1,\ldots,b_n$ is the symmetry
 of the index.
@<|Tensor::noverseq_ip| static method@>=
 int Tensor::noverseq_ip(IntSequence& s)
 {
 	if (s.size() == 0 || s.size() == 1)
 		return 1;
 	s[1] += s[0];
 	return noverk(s[1],s[0]) * noverseq(IntSequence(s, 1, s.size()));
 }
@ Here we increment a given sequence within full symmetry given by
 |nv|, which is number of variables in each dimension. The underlying
 tensor is unfolded, so we increase the rightmost by one, and if it is
 |nv| we zero it and increase the next one to the left.
@<|UTensor::increment| code 1@>=
 void UTensor::increment(IntSequence& v, int nv)
 {
 	if (v.size() == 0)
 		return;
 	int i = v.size()-1;
 	v[i]++;
 	while (i > 0 && v[i] == nv) {
 		v[i] = 0;
 		v[--i]++;
 	}
 }
@ This is dual to |UTensor::increment(IntSequence& v, int nv)|.
@<|UTensor::decrement| code 1@>=
 void UTensor::decrement(IntSequence& v, int nv)
 {
 	if (v.size() == 0)
 		return;
 	int i = v.size()-1;
 	v[i]--;
 	while (i > 0 && v[i] == -1) {
 		v[i] = nv -1;
 		v[--i]--;
 	}
 }
@ Here we increment index for general symmetry for unfolded
 storage. The sequence |nvmx| assigns for each coordinate a number of
 variables. Since the storage is unfolded, we do not need information
 about what variables are symmetric, everything necessary is given by
 |nvmx|.
@<|UTensor::increment| code 2@>=
 void UTensor::increment(IntSequence& v, const IntSequence& nvmx)
 {
 	if (v.size() == 0)
 		return;
 	int i = v.size()-1;
 	v[i]++;
 	while (i > 0 && v[i] == nvmx[i]) {
 		v[i] = 0;
 		v[--i]++;
 	}
 }
@ This is a dual code to |UTensor::increment(IntSequence& v, const
 IntSequence& nvmx)|.
@<|UTensor::decrement| code 2@>=
 void UTensor::decrement(IntSequence& v, const IntSequence& nvmx)
 {
 	if (v.size() == 0)
 		return;
 	int i = v.size()-1;
 	v[i]--;
 	while (i > 0 && v[i] == -1) {
 		v[i] = nvmx[i] -1;
 		v[--i]--;
 	}
 }
@ Here we return an offset for a given coordinates of unfolded full
 symmetry tensor. This is easy.
@<|UTensor::getOffset| code 1@>=
 int UTensor::getOffset(const IntSequence& v, int nv)
 {
 	int pow = 1;
 	int res = 0;
 	for (int i = v.size()-1; i >= 0; i--) {
 		res += v[i]*pow;
 		pow *= nv;
 	}
 	return res;
 }
@ Also easy.
@<|UTensor::getOffset| code 2@>=
 int UTensor::getOffset(const IntSequence& v, const IntSequence& nvmx)
 {
 	int pow = 1;
 	int res = 0;
 	for (int i = v.size()-1; i >= 0; i--) {
 		res += v[i]*pow;
 		pow *= nvmx[i];
 	}
 	return res;
 }
@ Decrementing of coordinates of folded index is not that easy. Note
 that if a trailing part of coordinates is $(b, a, a, a)$ (for
 instance) with $b<a$, then a preceding coordinates are $(b, a-1, n-1,
 n-1)$, where $n$ is a number of variables |nv|. So we find the left
 most element which is equal to the last element, decrease it by one,
 and then set all elements to the right to $n-1$.
@<|FTensor::decrement| code@>=
 void FTensor::decrement(IntSequence& v, int nv)
 {
 	int i = v.size()-1;
 	while (i > 0 && v[i-1]==v[i])
 		i--;
 	v[i]--;
 	for (int j = i+1; j < v.size(); j++)
 		v[j] = nv-1;
 }
@ This calculates order of the given index of our ordering of
 indices. In order to understand how it works, let us take number of
 variables $n$ and dimension $k$, and write down all the possible
 combinations of indices in our ordering. For example for $n=4$ and
 $k=3$, the sequence looks as:
 \def\tr#1#2#3{\hbox{\rlap{#1}\hskip 0.7em\rlap{#2}\hskip 0.7em\rlap{#3}\hskip 0.7em}}
 \halign{\tabskip=3em \hskip2cm #&#&#&#\cr
 \tr 000 &\tr 111 &\tr 222 &\tr 333\cr
 \tr 001 &\tr 112 &\tr 223 \cr
 \tr 002 &\tr 113 &\tr 233 \cr 
 \tr 003 &\tr 122 \cr
 \tr 011 &\tr 123\cr
 \tr 012 &\tr 133\cr
 \tr 013\cr
 \tr 022\cr
 \tr 023\cr
 \tr 033\cr
 }
 Now observe, that a number of sequences starting with zero is the same
 as total number of sequences with the same number of variables but
 with dimension minus one. More generally, if $S_{n,k}$ denotes number
 of indices of $n$ variables and dimension $k$, then the number of
 indices beginning with $m$ is exactly $S_{n-m,k-1}$. This is because $m$
 can be subtracted from all items, and we obtain sequence of indices of
 $n-m$ variables. So we have formula:
 $$S_{n,k}=S_{n,k-1}+S_{n-1,k-1}+\ldots+S_{1,k-1}$$
 Now it is easy to calculate offset of index of the form
 $(m,\ldots,m)$. It is a sum of all above it, this is
 $S_{n,k-1}+\ldots+S_{n-m,k-1}$. We know that $S_{n,k}=\pmatrix{n+k-1\cr
 k}$. Using above formula, we can calculate offset of $(m,\ldots,m)$ as
 $$\pmatrix{n+k-1\cr k}-\pmatrix{n-m+k-1\cr k}$$
 The offset of general index $(m_1,m_2,\ldots,m_k)$ is calculated
 recursively, since it is offset of $(m_1,\ldots,m_1)$ for $n$
 variables plus offset of $(m_2-m_1,m_3-m_1,\ldots,m_k-m_1)$ for
 $n-m_1$ variables.
@<|FTensor::getOffsetRecurse| code@>=
 int FTensor::getOffsetRecurse(IntSequence& v, int nv)
 {
 	if (v.size() == 0) return 0;
 	int prefix = v.getPrefixLength();
 	int m = v[0];
 	int k = v.size();
 	int s1 = noverk(nv+k-1,k) - noverk(nv-m+k-1,k);
 	IntSequence subv(v, prefix, k);
 	subv.add(-m);
 	int s2 = getOffsetRecurse(subv, nv-m);
 	return s1+s2;
 }
@ End of {\tt tensor.cpp} file.
--- a/dynare++/tl/cc/tensor.hh
+++ b/dynare++/tl/cc/tensor.hh
@ -0,0 +1,309 @@
 // Copyright 2004, Ondra Kamenik
 // Tensor concept.
 /* Here we define a tensor class. Tensor is a mathematical object
   corresponding to a $(n+1)$-dimensional array. An element of such array
   is denoted $[B]_{\alpha_1\ldots\alpha_n}^\beta$, where $\beta$ is a
   special index and $\alpha_1\ldots\alpha_n$ are other indices. The
   class |Tensor| and its subclasses view such array as a 2D matrix,
   where $\beta$ corresponds to one dimension, and
   $\alpha_1\ldots\alpha_2$ unfold to the other dimension. Whether
   $\beta$ correspond to rows or columns is decided by tensor subclasses,
   however, most of our tensors will have rows indexed by $\beta$, and
   $\alpha_1\ldots\alpha_n$ will unfold column-wise.
   There might be some symmetries in the tensor data. For instance, if
   $\alpha_1$ is interchanged with $\alpha_3$ and the both elements equal
   for all possible $\alpha_i$, and $\beta$, then there is a symmetry
   of $\alpha_1$ and $\alpha_3$.
   For any symmetry, there are basically two possible storages of the
   data. The first is unfolded storage, which stores all elements
   regardless the symmetry. The other storage type is folded, which
   stores only elements which do not repeat. We declare abstract classes
   for unfolded tensor, and folded tensor.
   Also, here we also define a concept of tensor index which is the
   $n$-tuple $\alpha_1\ldots\alpha_n$. It is an iterator, which iterates
   in dependence of symmetry and storage of the underlying tensor.
   Although we do not decide about possible symmetries at this point, it
   is worth noting that we implement two kinds of symmetries. The first
   one is a full symmetry where all indices are interchangeable. The
   second one is a generalization of the first. We define tensor of a
   symmetry, where there are a few groups of indices interchangeable
   within a group and not across. Moreover, the groups are required to be
   consequent partitions of the index $n$-tuple. This is, we do not allow
   $\alpha_1$ be interchangeable with $\alpha_3$ and not with $\alpha_2$
   at the same time.
   However, some intermediate results are, in fact, tensors of a symmetry
   not fitting to our concept. We develop the tensor abstraction for it,
   but these objects are not used very often. They have limited usage
   due to their specialized constructor. */
 #ifndef TENSOR_H
 #define TENSOR_H
 #include "int_sequence.hh"
 #include "twod_matrix.hh"
 /* The index represents $n$-tuple $\alpha_1\ldots\alpha_n$. Since its
   movement is dependent on the underlying tensor (with storage and
   symmetry), we maintain a pointer to that tensor, we maintain the
   $n$-tuple (or coordinates) as |IntSequence| and also we maintain the
   offset number (column, or row) of the index in the tensor. The pointer
   is const, since we do not need to change data through the index.
   Here we require the |tensor| to implement |increment| and |decrement|
   methods, which calculate following and preceding $n$-tuple. Also, we
   need to calculate offset number from the given coordinates, so the
   tensor must implement method |getOffset|. This method is used only in
   construction of the index from the given coordinates. As the index is
   created, the offset is automatically incremented, and decremented
   together with index. The|getOffset| method can be relatively
   computationally complex. This must be kept in mind.  Also we generally
   suppose that n-tuple of all zeros is the first offset (first columns
   or row).
   What follows is a definition of index class, the only
   interesting point is |operator==| which decides only according to
   offset, not according to the coordinates. This is useful since there
   can be more than one of coordinate representations of past-the-end
   index. */
 template<class _Tptr>
 class _index
 {
  typedef _index<_Tptr> _Self;
  _Tptr tensor;
  int offset;
  IntSequence coor;
 public:
  _index(_Tptr t, int n)
    : tensor(t), offset(0), coor(n, 0)
  {
  }
  _index(_Tptr t, const IntSequence &cr, int c)
    : tensor(t), offset(c), coor(cr)
  {
  }
  _index(_Tptr t, const IntSequence &cr)
    : tensor(t), offset(tensor->getOffset(cr)), coor(cr)
  {
  }
  _index(const _index &ind)
    : tensor(ind.tensor), offset(ind.offset), coor(ind.coor)
  {
  }
  const _Self &
  operator=(const _Self &in)
  {
    tensor = in.tensor; offset = in.offset; coor = in.coor;
    return *this;
  }
  _Self &
  operator++()
  {
    tensor->increment(coor); offset++; return *this;
  }
  _Self &
  operator--()
  {
    tensor->decrement(coor); offset--; return *this;
  }
  int
  operator*() const
  {
    return offset;
  }
  bool
  operator==(const _index &n) const
  {
    return offset == n.offset;
  }
  bool
  operator!=(const _index &n) const
  {
    return offset != n.offset;
  }
  const IntSequence &
  getCoor() const
  {
    return coor;
  }
  void
  print() const
  {
    printf("%4d: ", offset);  coor.print();
  }
 };
 /* Here is the |Tensor| class, which is nothing else than a simple subclass
   of |TwoDMatrix|. The unique semantically new member is |dim| which is tensor
   dimension (length of $\alpha_1\ldots\alpha_n$). We also declare
   |increment|, |decrement| and |getOffset| methods as pure virtual.
   We also add members for index begin and index end. This is useful,
   since |begin| and |end| methods do not return instance but only
   references, which prevent making additional copy of index (for example
   in for cycles as |in != end()| which would do a copy of index for each
   cycle). The index begin |in_beg| is constructed as a sequence of all
   zeros, and |in_end| is constructed from the sequence |last| passed to
   the constructor, since it depends on subclasses. Also we have to say,
   along what coordinate is the multidimensional index. This is used only
   for initialization of |in_end|.
   Also, we declare static auxiliary functions for $\pmatrix{n\cr k}$
   which is |noverk| and $a^b$, which is |power|. */
 class Tensor : public TwoDMatrix
 {
 public:
  enum indor {along_row, along_col};
  typedef _index<const Tensor *> index;
 protected:
  const index in_beg;
  const index in_end;
  int dim;
 public:
  Tensor(indor io, const IntSequence &last, int r, int c, int d)
    : TwoDMatrix(r, c),
      in_beg(this, d),
      in_end(this, last, (io == along_row) ? r : c),
      dim(d)
  {
  }
  Tensor(indor io, const IntSequence &first, const IntSequence &last,
         int r, int c, int d)
    : TwoDMatrix(r, c),
      in_beg(this, first, 0),
      in_end(this, last, (io == along_row) ? r : c),
      dim(d)
  {
  }
  Tensor(int first_row, int num, Tensor &t)
    : TwoDMatrix(first_row, num, t),
      in_beg(t.in_beg),
      in_end(t.in_end),
      dim(t.dim)
  {
  }
  Tensor(const Tensor &t)
    : TwoDMatrix(t),
      in_beg(this, t.in_beg.getCoor(), *(t.in_beg)),
      in_end(this, t.in_end.getCoor(), *(t.in_end)),
      dim(t.dim)
  {
  }
  virtual ~Tensor()
  {
  }
  virtual void increment(IntSequence &v) const = 0;
  virtual void decrement(IntSequence &v) const = 0;
  virtual int getOffset(const IntSequence &v) const = 0;
  int
  dimen() const
  {
    return dim;
  }
  const index &
  begin() const
  {
    return in_beg;
  }
  const index &
  end() const
  {
    return in_end;
  }
  static int noverk(int n, int k);
  static int power(int a, int b);
  static int
  noverseq(const IntSequence &s)
  {
    IntSequence seq(s);
    return noverseq_ip((IntSequence &) s);
  }
 private:
  static int noverseq_ip(IntSequence &s);
 };
 /* Here is an abstraction for unfolded tensor. We provide a pure
   virtual method |fold| which returns a new instance of folded tensor of
   the same symmetry. Also we provide static methods for incrementing and
   decrementing an index with full symmetry and general symmetry as
   defined above. */
 class FTensor;
 class UTensor : public Tensor
 {
 public:
  UTensor(indor io, const IntSequence &last, int r, int c, int d)
    : Tensor(io, last, r, c, d)
  {
  }
  UTensor(const UTensor &ut)
    : Tensor(ut)
  {
  }
  UTensor(int first_row, int num, UTensor &t)
    : Tensor(first_row, num, t)
  {
  }
  virtual ~UTensor()
  {
  }
  virtual FTensor&fold() const = 0;
  static void increment(IntSequence &v, int nv);
  static void decrement(IntSequence &v, int nv);
  static void increment(IntSequence &v, const IntSequence &nvmx);
  static void decrement(IntSequence &v, const IntSequence &nvmx);
  static int getOffset(const IntSequence &v, int nv);
  static int getOffset(const IntSequence &v, const IntSequence &nvmx);
 };
 /* This is an abstraction for folded tensor. It only provides a method
   |unfold|, which returns the unfolded version of the same symmetry, and
   static methods for decrementing indices.
   We also provide static methods for decrementing the |IntSequence| in
   folded fashion and also calculating an offset for a given
   |IntSequence|. However, this is relatively complex calculation, so
   this should be avoided if possible. */
 class FTensor : public Tensor
 {
 public:
  FTensor(indor io, const IntSequence &last, int r, int c, int d)
    : Tensor(io, last, r, c, d)
  {
  }
  FTensor(const FTensor &ft)
    : Tensor(ft)
  {
  }
  FTensor(int first_row, int num, FTensor &t)
    : Tensor(first_row, num, t)
  {
  }
  virtual ~FTensor()
  {
  }
  virtual UTensor&unfold() const = 0;
  static void decrement(IntSequence &v, int nv);
  static int
  getOffset(const IntSequence &v, int nv)
  {
    IntSequence vtmp(v); return getOffsetRecurse(vtmp, nv);
  }
 private:
  static int getOffsetRecurse(IntSequence &v, int nv);
 };
 #endif
--- a/dynare++/tl/cc/tensor.hweb
+++ b/dynare++/tl/cc/tensor.hweb
@ -1,252 +0,0 @@
@q $Id: tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Tensor concept. Start of {\tt tensor.h} file.
 Here we define a tensor class. Tensor is a mathematical object
 corresponding to a $(n+1)$-dimensional array. An element of such array
 is denoted $[B]_{\alpha_1\ldots\alpha_n}^\beta$, where $\beta$ is a
 special index and $\alpha_1\ldots\alpha_n$ are other indices. The
 class |Tensor| and its subclasses view such array as a 2D matrix,
 where $\beta$ corresponds to one dimension, and
 $\alpha_1\ldots\alpha_2$ unfold to the other dimension. Whether
 $\beta$ correspond to rows or columns is decided by tensor subclasses,
 however, most of our tensors will have rows indexed by $\beta$, and
 $\alpha_1\ldots\alpha_n$ will unfold column-wise.
 There might be some symmetries in the tensor data. For instance, if
 $\alpha_1$ is interchanged with $\alpha_3$ and the both elements equal
 for all possible $\alpha_i$, and $\beta$, then there is a symmetry
 of $\alpha_1$ and $\alpha_3$.
 For any symmetry, there are basically two possible storages of the
 data. The first is unfolded storage, which stores all elements
 regardless the symmetry. The other storage type is folded, which
 stores only elements which do not repeat. We declare abstract classes
 for unfolded tensor, and folded tensor.
 Also, here we also define a concept of tensor index which is the
 $n$-tuple $\alpha_1\ldots\alpha_n$. It is an iterator, which iterates
 in dependence of symmetry and storage of the underlying tensor.
 Although we do not decide about possible symmetries at this point, it
 is worth noting that we implement two kinds of symmetries. The first
 one is a full symmetry where all indices are interchangeable. The
 second one is a generalization of the first. We define tensor of a
 symmetry, where there are a few groups of indices interchangeable
 within a group and not across. Moreover, the groups are required to be
 consequent partitions of the index $n$-tuple. This is, we do not allow
 $\alpha_1$ be interchangeable with $\alpha_3$ and not with $\alpha_2$
 at the same time.
 However, some intermediate results are, in fact, tensors of a symmetry
 not fitting to our concept. We develop the tensor abstraction for it,
 but these objects are not used very often. They have limited usage
 due to their specialized constructor.
@c
 #ifndef TENSOR_H
 #define TENSOR_H
 #include "int_sequence.h"
 #include "twod_matrix.h"
@<index class definition@>;
@<|Tensor| class declaration@>;
@<|UTensor| class declaration@>;
@<|FTensor| class declaration@>;
 #endif
@ The index represents $n$-tuple $\alpha_1\ldots\alpha_n$. Since its
 movement is dependent on the underlying tensor (with storage and
 symmetry), we maintain a pointer to that tensor, we maintain the
 $n$-tuple (or coordinates) as |IntSequence| and also we maintain the
 offset number (column, or row) of the index in the tensor. The pointer
 is const, since we do not need to change data through the index.
 Here we require the |tensor| to implement |increment| and |decrement|
 methods, which calculate following and preceding $n$-tuple. Also, we
 need to calculate offset number from the given coordinates, so the
 tensor must implement method |getOffset|. This method is used only in
 construction of the index from the given coordinates. As the index is
 created, the offset is automatically incremented, and decremented
 together with index. The|getOffset| method can be relatively
 computationally complex. This must be kept in mind.  Also we generally
 suppose that n-tuple of all zeros is the first offset (first columns
 or row).
 What follows is a definition of index class, the only
 interesting point is |operator==| which decides only according to
 offset, not according to the coordinates. This is useful since there
 can be more than one of coordinate representations of past-the-end
 index.
@s _Tptr int
@s _Self int
@<index class definition@>=
 template<class _Tptr> class _index {
 	typedef _index<_Tptr> _Self;
 	_Tptr tensor;
 	int offset;
 	IntSequence coor;
 public:@;
 	_index(_Tptr t, int n) 
 		: tensor(t), offset(0), coor(n, 0)@+ {}
 	_index(_Tptr t, const IntSequence& cr, int c)
 		: tensor(t), offset(c), coor(cr)@+ {}
 	_index(_Tptr t, const IntSequence& cr)
 		: tensor(t), offset(tensor->getOffset(cr)), coor(cr)@+ {}
 	_index(const _index& ind)
 		: tensor(ind.tensor), offset(ind.offset), coor(ind.coor)@+ {}
 	const _Self& operator=(const _Self& in)
 		{@+ tensor = in.tensor;@+ offset = in.offset;@+ coor = in.coor;
 		return *this;@+}
 	_Self& operator++()
 		{@+ tensor->increment(coor);@+ offset++;@+ return *this;@+}
 	_Self& operator--()
 		{@+ tensor->decrement(coor);@+ offset--;@+ return *this;@+}
 	int operator*() const
 		{@+ return offset;@+}
 	bool operator==(const _index& n) const
 		{@+ return offset == n.offset;@+}
 	bool operator!=(const _index& n) const
 		{@+ return offset != n.offset;@+}
 	const IntSequence& getCoor() const
 		{@+ return coor;@+}
 	void print() const
 		{@+ printf("%4d: ", offset);@+  coor.print();@+}
 };
@ Here is the |Tensor| class, which is nothing else than a simple subclass
 of |TwoDMatrix|. The unique semantically new member is |dim| which is tensor
 dimension (length of $\alpha_1\ldots\alpha_n$). We also declare
 |increment|, |decrement| and |getOffset| methods as pure virtual.
 We also add members for index begin and index end. This is useful,
 since |begin| and |end| methods do not return instance but only
 references, which prevent making additional copy of index (for example
 in for cycles as |in != end()| which would do a copy of index for each
 cycle). The index begin |in_beg| is constructed as a sequence of all
 zeros, and |in_end| is constructed from the sequence |last| passed to
 the constructor, since it depends on subclasses. Also we have to say,
 along what coordinate is the multidimensional index. This is used only
 for initialization of |in_end|.
 Also, we declare static auxiliary functions for $\pmatrix{n\cr k}$
 which is |noverk| and $a^b$, which is |power|.
@s indor int
@<|Tensor| class declaration@>=
 class Tensor : public TwoDMatrix {
 public:@;
 	enum indor {along_row, along_col};
 	typedef _index<const Tensor*> index;
 protected:@;
 	const index in_beg;
 	const index in_end;
 	int dim;
 public:@;
 	Tensor(indor io, const IntSequence& last, int r, int c, int d)
 		: TwoDMatrix(r, c),
 		  in_beg(this, d),
 		  in_end(this, last, (io == along_row)? r:c),
 		  dim(d)@+ {}
 	Tensor(indor io, const IntSequence& first, const IntSequence& last,
 		   int r, int c, int d)
 		: TwoDMatrix(r, c),
 		  in_beg(this, first, 0),
 		  in_end(this, last, (io == along_row)? r:c),
 		  dim(d)@+ {}
 	Tensor(int first_row, int num, Tensor& t)
 		: TwoDMatrix(first_row, num, t),
 		  in_beg(t.in_beg),
 		  in_end(t.in_end),
 		  dim(t.dim)@+ {}
 	Tensor(const Tensor& t)
 		: TwoDMatrix(t),
 		  in_beg(this, t.in_beg.getCoor(), *(t.in_beg)),
 		  in_end(this, t.in_end.getCoor(), *(t.in_end)),
 		  dim(t.dim)@+ {}
 	virtual ~Tensor()@+ {}
 	virtual void increment(IntSequence& v) const =0;
 	virtual void decrement(IntSequence& v) const =0;
 	virtual int getOffset(const IntSequence& v) const =0;
 	int dimen() const
 		{@+ return dim;@+}
 	const index& begin() const
 		{@+ return in_beg;@+}
 	const index& end() const
 		{@+ return in_end;@+}
 	static int noverk(int n, int k);
 	static int power(int a, int b);
 	static int noverseq(const IntSequence& s)
 		{
 			IntSequence seq(s);
 			return noverseq_ip((IntSequence&)s);
 		}
 private:@;
 	static int noverseq_ip(IntSequence& s);
 };
@ Here is an abstraction for unfolded tensor. We provide a pure
 virtual method |fold| which returns a new instance of folded tensor of
 the same symmetry. Also we provide static methods for incrementing and
 decrementing an index with full symmetry and general symmetry as
 defined above.
@<|UTensor| class declaration@>=
 class FTensor;
 class UTensor : public Tensor {
 public:@;
 	UTensor(indor io, const IntSequence& last, int r, int c, int d)
 		: Tensor(io, last, r, c, d)@+ {}
 	UTensor(const UTensor& ut)
 		: Tensor(ut)@+ {}
 	UTensor(int first_row, int num, UTensor& t)
 		: Tensor(first_row, num, t)@+ {}
 	virtual ~UTensor()@+ {}
 	virtual FTensor& fold() const =0;
 	static void increment(IntSequence& v, int nv);
 	static void decrement(IntSequence& v, int nv);
 	static void increment(IntSequence& v, const IntSequence& nvmx);
 	static void decrement(IntSequence& v, const IntSequence& nvmx);
 	static int getOffset(const IntSequence& v, int nv);
 	static int getOffset(const IntSequence& v, const IntSequence& nvmx);
 };
@ This is an abstraction for folded tensor. It only provides a method
 |unfold|, which returns the unfolded version of the same symmetry, and
 static methods for decrementing indices.
 We also provide static methods for decrementing the |IntSequence| in
 folded fashion and also calculating an offset for a given
 |IntSequence|. However, this is relatively complex calculation, so
 this should be avoided if possible.
@<|FTensor| class declaration@>=
 class FTensor : public Tensor {
 public:@;
 	FTensor(indor io, const IntSequence& last, int r, int c, int d)
 		: Tensor(io, last, r, c, d)@+ {}
 	FTensor(const FTensor& ft)
 		: Tensor(ft)@+ {}
 	FTensor(int first_row, int num, FTensor& t)
 		: Tensor(first_row, num, t)@+ {}
 	virtual ~FTensor()@+ {}
 	virtual UTensor& unfold() const =0;
 	static void decrement(IntSequence& v, int nv);
 	static int getOffset(const IntSequence& v, int nv)
 		{@+IntSequence vtmp(v);@+ return getOffsetRecurse(vtmp, nv);@+}
 private:@;
 	static int getOffsetRecurse(IntSequence& v, int nv);
 };
@ End of {\tt tensor.h} file.
--- a/dynare++/tl/cc/tl_exception.hh
+++ b/dynare++/tl/cc/tl_exception.hh
@ -0,0 +1,74 @@
 // Copyright 2004, Ondra Kamenik
 // Exception.
 /* Within the code we often check some state of variables, typically
   preconditions or postconditions. If the state is not as required, it
   is worthless to continue, since this means some fatal error in
   algorithms. In this case we raise an exception which can be caught at
   some higher level. This header file defines a simple infrastructure
   for this. */
 #ifndef TL_EXCEPTION_H
 #define TL_EXCEPTION_H
 #include <cstring>
 #include <cstdio>
 /* The basic idea of raising an exception if some condition fails is
   that the conditions is checked only if required. We define global
   |TL_DEBUG| macro which is integer and says, how many debug messages
   the programm has to emit. We also define |TL_DEBUG_EXCEPTION| which
   says, for what values of |TL_DEBUG| we will check for conditions of
   the exceptions. If the |TL_DEBUG| is equal or higher than
   |TL_DEBUG_EXCEPTION|, the exception conditions are checked.
   We define |TL_RAISE|, and |TL_RAISE_IF| macros which throw an instance
   of |TLException| if |TL_DEBUG >= TL_DEBUG_EXCEPTION|. The first is
   unconditional throw, the second is conditioned by a given
   expression. Note that if |TL_DEBUG < TL_DEBUG_EXCEPTION| then the code
   is compiled but evaluation of the condition is passed. If code is
   optimized, the optimizer also passes evaluation of |TL_DEBUG| and
   |TL_DEBUG_EXCEPTION| comparison (I hope).
   We provide default values for |TL_DEBUG| and |TL_DEBUG_EXCEPTION|. */
 #ifndef TL_DEBUG_EXCEPTION
 # define TL_DEBUG_EXCEPTION 1
 #endif
 #ifndef TL_DEBUG
 # define TL_DEBUG 0
 #endif
 #define TL_RAISE(mes)                                                   \
  if (TL_DEBUG >= TL_DEBUG_EXCEPTION) throw TLException(__FILE__, __LINE__, mes);
 #define TL_RAISE_IF(expr, mes)                                          \
  if (TL_DEBUG >= TL_DEBUG_EXCEPTION && (expr)) throw TLException(__FILE__, __LINE__, mes);
 /* Primitive exception class containing file name, line number and message. */
 class TLException
 {
  char fname[50];
  int lnum;
  char message[500];
 public:
  TLException(const char *f, int l, const char *mes)
  {
    strncpy(fname, f, 50); fname[49] = '\0';
    strncpy(message, mes, 500); message[499] = '\0';
    lnum = l;
  }
  virtual ~TLException()
  {
  }
  virtual void
  print() const
  {
    printf("At %s:%d:%s\n", fname, lnum, message);
  }
 };
 #endif
--- a/dynare++/tl/cc/tl_exception.hweb
+++ b/dynare++/tl/cc/tl_exception.hweb
@ -1,79 +0,0 @@
@q $Id: tl_exception.hweb 332 2005-07-15 13:41:48Z kamenik $ @>
@q Copyright 2004, Ondra Kamenik @>
@*2 Exception. Start of {\tt tl\_exception.h} file.
 Within the code we often check some state of variables, typically
 preconditions or postconditions. If the state is not as required, it
 is worthless to continue, since this means some fatal error in
 algorithms. In this case we raise an exception which can be caught at
 some higher level. This header file defines a simple infrastructure
 for this.
@s TLException int
@c
 #ifndef TL_EXCEPTION_H
 #define TL_EXCEPTION_H
 #include <cstring>
 #include <cstdio>
@<body of tl\_exception header@>;
 #endif
@ The basic idea of raising an exception if some condition fails is
 that the conditions is checked only if required. We define global
 |TL_DEBUG| macro which is integer and says, how many debug messages
 the programm has to emit. We also define |TL_DEBUG_EXCEPTION| which
 says, for what values of |TL_DEBUG| we will check for conditions of
 the exceptions. If the |TL_DEBUG| is equal or higher than
 |TL_DEBUG_EXCEPTION|, the exception conditions are checked.
 We define |TL_RAISE|, and |TL_RAISE_IF| macros which throw an instance
 of |TLException| if |TL_DEBUG >= TL_DEBUG_EXCEPTION|. The first is
 unconditional throw, the second is conditioned by a given
 expression. Note that if |TL_DEBUG < TL_DEBUG_EXCEPTION| then the code
 is compiled but evaluation of the condition is passed. If code is
 optimized, the optimizer also passes evaluation of |TL_DEBUG| and
 |TL_DEBUG_EXCEPTION| comparison (I hope).
 We provide default values for |TL_DEBUG| and |TL_DEBUG_EXCEPTION|.
@<body of tl\_exception header@>=
 #ifndef TL_DEBUG_EXCEPTION
 #define TL_DEBUG_EXCEPTION 1
 #endif
 #ifndef TL_DEBUG
 #define TL_DEBUG 0
 #endif
 #define TL_RAISE(mes) \
 if (TL_DEBUG >= TL_DEBUG_EXCEPTION) throw TLException(__FILE__, __LINE__, mes);
 #define TL_RAISE_IF(expr, mes) \
 if (TL_DEBUG >= TL_DEBUG_EXCEPTION && (expr)) throw TLException(__FILE__, __LINE__, mes);
@<|TLException| class definition@>;
@ Primitive exception class containing file name, line number and message.
@<|TLException| class definition@>=
 class TLException {
 	char fname[50];
 	int lnum;
 	char message[500];
 public:@;
 	TLException(const char* f, int l, const char* mes)
 		{
 			strncpy(fname, f, 50);@+ fname[49] = '\0';
 			strncpy(message, mes, 500);@+ message[499] = '\0';
 			lnum = l;
 		}
 	virtual ~TLException()@+ {}
 	virtual void print() const
 		{@+ printf("At %s:%d:%s\n", fname, lnum, message);@+}
 };
@ End of {\tt tl\_exception.h} file.
--- a/dynare++/tl/cc/tl_static.cc
+++ b/dynare++/tl/cc/tl_static.cc
@ -0,0 +1,82 @@
 // Copyright 2004, Ondra Kamenik
 #include "tl_static.hh"
 #include "tl_exception.hh"
 TLStatic tls;
 /* Note that we allow for repeated calls of |init|. This is not normal
   and the only purpose of allowing this is the test suite. */
 TLStatic::TLStatic()
 {
  ebundle = NULL;
  pbundle = NULL;
  ptriang = NULL;
 }
 TLStatic::~TLStatic()
 {
  if (ebundle)
    delete ebundle;
  if (pbundle)
    delete pbundle;
  if (ptriang)
    delete ptriang;
 }
 void
 TLStatic::init(int dim, int nvar)
 {
  if (ebundle)
    ebundle->generateUpTo(dim);
  else
    ebundle = new EquivalenceBundle(dim);
  if (pbundle)
    pbundle->generateUpTo(dim);
  else
    pbundle = new PermutationBundle(dim);
  if (ptriang)
    delete ptriang;
  ptriang = new PascalTriangle(nvar, dim);
 }
 /* The coefficients are stored in |data| row by row where a row are
   coeffs with the same $k$.
   We first initialize the first row with ones. Then for each other row
   we initialize the first item to one, and other items are a sum of
   coefficients of $n-1$ which is in code |i+j-1|. */
 PascalTriangle::PascalTriangle(int n, int k)
  : data(new int[(n+1)*(k+1)]), kmax(k), nmax(n)
 {
  for (int i = 0; i <= n; i++)
    data[i] = 1;
  for (int j = 1; j <= k; j++)
    {
      data[j*(nmax+1)] = 1;
      for (int i = 1; i <= n; i++)
        data[j*(nmax+1)+i] = noverk(i+j-1, j) + noverk(i+j-1, j-1);
    }
 }
 /* Clear. Recall, that there are |nmax+1| items in a row. */
 int
 PascalTriangle::noverk(int n, int k) const
 {
  TL_RAISE_IF(k > n || n < 0,
              "Wrong arguments for PascalTriangle::noverk");
  if (k <= kmax && n-k <= nmax)
    return data[k*(nmax+1)+n-k];
  if (n-k <= kmax && k <= nmax)
    return data[(n-k)*(nmax+1)+k];
  TL_RAISE("n or k out of range in PascalTriangle::noverk");
  return 0;
 }
--- a/Show More
+++ b/Show More