dynare++ / tensor library (TL): move away from CWEB

By the way apply Dynare C++ coding style and extensions (.cc/.hh).
2019-01-08 16:09:25 +01:00 · 2019-01-08 16:09:25 +01:00 · ce1ef47093
parent 84255f9e9a
commit ce1ef47093
119 changed files with 12604 additions and 12727 deletions
--- a/.gitignore
+++ b/.gitignore
@ -137,9 +137,6 @@ mex/build/matlab/run_m2html.m
 /dynare++/src/dynglob_ll.cc
 /dynare++/src/dynglob_tab.cc
 /dynare++/src/dynglob_tab.hh
-/dynare++/tl/cc/*.cpp
-/dynare++/tl/cc/*.h
-/dynare++/tl/cc/main.tex
 /dynare++/tl/testing/tests
 /dynare++/tl/testing/tests.exe
 !/dynare++/extern/R/Makefile
--- a/dynare++/extern/matlab/dynare_simul.cpp
+++ b/dynare++/extern/matlab/dynare_simul.cpp
@ -23,7 +23,7 @@
 #include "mex.h"

 #include "decision_rule.hh"
-#include "fs_tensor.h"
+#include "fs_tensor.hh"
 #include "SylvException.h"

 extern "C" {
--- a/dynare++/integ/cc/product.cc
+++ b/dynare++/integ/cc/product.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik

 #include "product.hh"
-#include "symmetry.h"
+#include "symmetry.hh"

 prodpit::prodpit()
  : prodq(NULL), level(0), npoints(0), jseq(NULL),
--- a/dynare++/integ/cc/product.hh
+++ b/dynare++/integ/cc/product.hh
@ -16,7 +16,7 @@
 #ifndef PRODUCT_H
 #define PRODUCT_H

-#include "int_sequence.h"
+#include "int_sequence.hh"
 #include "vector_function.hh"
 #include "quadrature.hh"

--- a/dynare++/integ/cc/quadrature.hh
+++ b/dynare++/integ/cc/quadrature.hh
@ -32,8 +32,8 @@

 #include <cstdlib>
 #include "vector_function.hh"
-#include "int_sequence.h"
-#include "sthread.h"
+#include "int_sequence.hh"
+#include "sthread.hh"

 /* This pure virtual class represents a concept of one-dimensional
   (non-nested) quadrature. So, one dimensional quadrature must return
--- a/dynare++/integ/cc/quasi_mcarlo.hh
+++ b/dynare++/integ/cc/quasi_mcarlo.hh
@ -25,7 +25,7 @@
 #ifndef QUASI_MCARLO_H
 #define QUASI_MCARLO_H

-#include "int_sequence.h"
+#include "int_sequence.hh"
 #include "quadrature.hh"

 #include "Vector.h"
--- a/dynare++/integ/cc/smolyak.cc
+++ b/dynare++/integ/cc/smolyak.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik

 #include "smolyak.hh"
-#include "symmetry.h"
+#include "symmetry.hh"

 smolpit::smolpit()
  : smolq(NULL), isummand(0), jseq(NULL), sig(NULL), p(NULL)
--- a/dynare++/integ/cc/smolyak.hh
+++ b/dynare++/integ/cc/smolyak.hh
@ -17,8 +17,8 @@
 #ifndef SMOLYAK_H
 #define SMOLYAK_H

-#include "int_sequence.h"
-#include "tl_static.h"
+#include "int_sequence.hh"
+#include "tl_static.hh"
 #include "vector_function.hh"
 #include "quadrature.hh"

--- a/dynare++/kord/dynamic_model.hh
+++ b/dynare++/kord/dynamic_model.hh
@ -10,8 +10,8 @@
 #ifndef DYNAMIC_MODEL_H
 #define DYNAMIC_MODEL_H

-#include "t_container.h"
-#include "sparse_tensor.h"
+#include "t_container.hh"
+#include "sparse_tensor.hh"

 #include "Vector.h"

--- a/dynare++/kord/faa_di_bruno.cc
+++ b/dynare++/kord/faa_di_bruno.cc
@ -1,7 +1,7 @@
 // Copyright 2005, Ondra Kamenik

 #include "faa_di_bruno.hh"
-#include "fine_container.h"
+#include "fine_container.hh"

 #include <cmath>

--- a/dynare++/kord/faa_di_bruno.hh
+++ b/dynare++/kord/faa_di_bruno.hh
@ -12,10 +12,10 @@
 #define FAA_DI_BRUNO_H

 #include "journal.hh"
-#include "stack_container.h"
-#include "t_container.h"
-#include "sparse_tensor.h"
-#include "gs_tensor.h"
+#include "stack_container.hh"
+#include "t_container.hh"
+#include "sparse_tensor.hh"
+#include "gs_tensor.hh"

 /* Nothing special here. See |@<|FaaDiBruno::calculate| folded sparse
   code@>| for reason of having |magic_mult|. */
--- a/dynare++/kord/journal.hh
+++ b/dynare++/kord/journal.hh
@ -5,7 +5,7 @@
 #ifndef JOURNAL_H
 #define JOURNAL_H

-#include "int_sequence.h"
+#include "int_sequence.hh"

 #include <sys/time.h>
 #include <cstdio>
--- a/dynare++/kord/korder.hh
+++ b/dynare++/kord/korder.hh
@ -25,13 +25,13 @@
 #ifndef KORDER_H
 #define KORDER_H

-#include "int_sequence.h"
-#include "fs_tensor.h"
-#include "gs_tensor.h"
-#include "t_container.h"
-#include "stack_container.h"
-#include "normal_moments.h"
-#include "t_polynomial.h"
+#include "int_sequence.hh"
+#include "fs_tensor.hh"
+#include "gs_tensor.hh"
+#include "t_container.hh"
+#include "stack_container.hh"
+#include "normal_moments.hh"
+#include "t_polynomial.hh"
 #include "faa_di_bruno.hh"
 #include "journal.hh"

--- a/dynare++/kord/normal_conjugate.hh
+++ b/dynare++/kord/normal_conjugate.hh
@ -33,7 +33,7 @@
 #ifndef NORMAL_CONJUGATE_H
 #define NORMAL_CONJUGATE_H

-#include "twod_matrix.h"
+#include "twod_matrix.hh"

 /* The class is described by the four parameters: $\mu$, $\kappa$, $\nu$ and
   $\Lambda$. */
--- a/dynare++/src/dynare3.cpp
+++ b/dynare++/src/dynare3.cpp
@ -8,7 +8,7 @@
 #include "utils/cc/exception.h"
 #include "parser/cc/parser_exception.h"
 #include "parser/cc/atom_substitutions.h"
-#include "../tl/cc/tl_exception.h"
+#include "../tl/cc/tl_exception.hh"
 #include "../kord/kord_exception.hh"

 #ifndef DYNVERSION
--- a/dynare++/src/dynare3.h
+++ b/dynare++/src/dynare3.h
@ -4,8 +4,8 @@
 #ifndef DYNARE3_H
 #define DYNARE3_H

-#include "../tl/cc/t_container.h"
-#include "../tl/cc/sparse_tensor.h"
+#include "../tl/cc/t_container.hh"
+#include "../tl/cc/sparse_tensor.hh"
 #include "../kord/decision_rule.hh"
 #include "../kord/dynamic_model.hh"

--- a/dynare++/src/dynare_model.h
+++ b/dynare++/src/dynare_model.h
@ -7,7 +7,7 @@
 #include "parser/cc/atom_assignings.h"

 #include "dynare_atoms.h"
-#include "twod_matrix.h"
+#include "twod_matrix.hh"

 #include "Vector.h"
 #include "GeneralMatrix.h"
--- a/dynare++/src/nlsolve.h
+++ b/dynare++/src/nlsolve.h
@ -5,7 +5,7 @@
 #ifndef OGU_NLSOLVE_H
 #define OGU_NLSOLVE_H

-#include "twod_matrix.h"
+#include "twod_matrix.hh"
 #include "journal.hh"

 namespace ogu
--- a/dynare++/tl/cc/Makefile.am
+++ b/dynare++/tl/cc/Makefile.am
@ -1,120 +1,48 @@
-CWEBSRC = \
-	normal_moments.cweb \
-	int_sequence.cweb \
-	tensor.cweb \
-	ps_tensor.cweb \
-	pyramid_prod2.cweb \
-	equivalence.cweb \
-	fine_container.cweb \
-	kron_prod.cweb \
-	ps_tensor.hweb \
-	t_polynomial.cweb \
-	symmetry.cweb \
-	stack_container.cweb \
-	sthread.hweb \
-	twod_matrix.hweb \
-	twod_matrix.cweb \
-	symmetry.hweb \
-	sparse_tensor.cweb \
-	fine_container.hweb \
-	sthread.cweb \
-	int_sequence.hweb \
-	tl_exception.hweb \
-	pyramid_prod2.hweb \
-	t_container.hweb \
-	permutation.hweb \
-	tensor.hweb \
-	gs_tensor.cweb \
-	rfs_tensor.hweb \
-	pyramid_prod.hweb \
-	t_polynomial.hweb \
-	pyramid_prod.cweb \
-	fs_tensor.cweb \
-	sparse_tensor.hweb \
-	permutation.cweb \
-	equivalence.hweb \
-	gs_tensor.hweb \
-	normal_moments.hweb \
-	tl_static.hweb \
-	kron_prod.hweb \
-	fs_tensor.hweb \
-	stack_container.hweb \
-	rfs_tensor.cweb \
-	t_container.cweb \
-	tl_static.cweb
-
-GENERATED_FILES = \
-	normal_moments.cpp \
-	int_sequence.cpp \
-	tensor.cpp \
-	ps_tensor.cpp \
-	pyramid_prod2.cpp \
-	equivalence.cpp \
-	fine_container.cpp \
-	kron_prod.cpp \
-	ps_tensor.h \
-	t_polynomial.cpp \
-	symmetry.cpp \
-	stack_container.cpp \
-	sthread.h \
-	twod_matrix.h \
-	twod_matrix.cpp \
-	symmetry.h \
-	sparse_tensor.cpp \
-	fine_container.h \
-	sthread.cpp \
-	int_sequence.h \
-	tl_exception.h \
-	pyramid_prod2.h \
-	t_container.h \
-	permutation.h \
-	tensor.h \
-	gs_tensor.cpp \
-	rfs_tensor.h \
-	pyramid_prod.h \
-	t_polynomial.h \
-	pyramid_prod.cpp \
-	fs_tensor.cpp \
-	sparse_tensor.h \
-	permutation.cpp \
-	equivalence.h \
-	gs_tensor.h \
-	normal_moments.h \
-	tl_static.h \
-	kron_prod.h \
-	fs_tensor.h \
-	stack_container.h \
-	rfs_tensor.cpp \
-	t_container.cpp \
-	tl_static.cpp
-
 noinst_LIBRARIES = libtl.a

-libtl_a_SOURCES = $(CWEBSRC) $(GENERATED_FILES)
+libtl_a_SOURCES = \
+	equivalence.cc \
+	equivalence.hh \
+	fine_container.cc \
+	fine_container.hh \
+	fs_tensor.cc \
+	fs_tensor.hh \
+	gs_tensor.cc \
+	gs_tensor.hh \
+	int_sequence.cc \
+	int_sequence.hh \
+	kron_prod.cc \
+	kron_prod.hh \
+	normal_moments.cc \
+	normal_moments.hh \
+	permutation.cc \
+	permutation.hh \
+	ps_tensor.cc \
+	ps_tensor.hh \
+	pyramid_prod.cc \
+	pyramid_prod.hh \
+	pyramid_prod2.cc \
+	pyramid_prod2.hh \
+	rfs_tensor.cc \
+	rfs_tensor.hh \
+	sparse_tensor.cc \
+	sparse_tensor.hh \
+	stack_container.cc \
+	stack_container.hh \
+	sthread.cc \
+	sthread.hh \
+	symmetry.cc \
+	symmetry.hh \
+	t_container.cc \
+	t_container.hh \
+	t_polynomial.cc \
+	t_polynomial.hh \
+	tensor.cc \
+	tensor.hh \
+	tl_exception.hh \
+	tl_static.cc \
+	tl_static.hh \
+	twod_matrix.cc \
+	twod_matrix.hh
 libtl_a_CPPFLAGS = -I../../sylv/cc $(CPPFLAGS_MATIO)
 libtl_a_CXXFLAGS = $(AM_CXXFLAGS) $(PTHREAD_CFLAGS)
-
-BUILT_SOURCES = $(GENERATED_FILES)
-
-EXTRA_DIST = main.web dummy.ch
-
-%.cpp: %.cweb dummy.ch
-	$(CTANGLE) -bhp $< dummy.ch $@
-
-%.h: %.hweb dummy.ch
-	$(CTANGLE) -bhp $< dummy.ch $@
-
-if HAVE_CWEAVE
-if HAVE_PDFTEX
-if HAVE_EPLAIN
-pdf-local: tl.pdf
-
-tl.pdf: main.web $(CWEBSRC)
-	$(CWEAVE) -bhp main.web
-	$(PDFTEX) main
-	mv main.pdf tl.pdf
-endif
-endif
-endif
-
-CLEANFILES = tl.pdf main.idx main.log main.scn main.tex main.toc
--- a/dynare++/tl/cc/dummy.ch
+++ b/dynare++/tl/cc/dummy.ch
--- a/dynare++/tl/cc/equivalence.cc
+++ b/dynare++/tl/cc/equivalence.cc
@ -0,0 +1,435 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "equivalence.hh"
+#include "permutation.hh"
+#include "tl_exception.hh"
+
+#include <cstring>
+
+int
+OrdSequence::operator[](int i) const
+{
+  TL_RAISE_IF((i < 0 || i >= length()),
+              "Index out of range in OrdSequence::operator[]");
+  return data[i];
+}
+
+/* Here we implement the ordering. It can be changed, or various
+   orderings can be used for different problem sizes. We order them
+   according to the average, and then according to the first item. */
+
+bool
+OrdSequence::operator<(const OrdSequence &s) const
+{
+  double ta = average();
+  double sa = s.average();
+  return (ta < sa || ((ta == sa) && (operator[](0) > s[0])));
+}
+
+bool
+OrdSequence::operator==(const OrdSequence &s) const
+{
+  if (length() != s.length())
+    return false;
+
+  int i = 0;
+  while (i < length() && operator[](i) == s[i])
+    i++;
+
+  return (i == length());
+}
+
+/* The first |add| adds a given integer to the class, the second
+   iterates through a given sequence and adds everything found in the
+   given class. */
+
+void
+OrdSequence::add(int i)
+{
+  vector<int>::iterator vit = data.begin();
+  while (vit != data.end() && *vit < i)
+    ++vit;
+  if (vit != data.end() && *vit == i)
+    return;
+  data.insert(vit, i);
+}
+
+void
+OrdSequence::add(const OrdSequence &s)
+{
+  vector<int>::const_iterator vit = s.data.begin();
+  while (vit != s.data.end())
+    {
+      add(*vit);
+      ++vit;
+    }
+}
+
+/* Answers |true| if a given number is in the class. */
+bool
+OrdSequence::has(int i) const
+{
+  vector<int>::const_iterator vit = data.begin();
+  while (vit != data.end())
+    {
+      if (*vit == i)
+        return true;
+      ++vit;
+    }
+  return false;
+}
+
+/* Return an average of the class. */
+double
+OrdSequence::average() const
+{
+  double res = 0;
+  for (unsigned int i = 0; i < data.size(); i++)
+    res += data[i];
+  TL_RAISE_IF(data.size() == 0,
+              "Attempt to take average of empty class in OrdSequence::average");
+  return res/data.size();
+}
+
+/* Debug print. */
+void
+OrdSequence::print(const char *prefix) const
+{
+  printf("%s", prefix);
+  for (unsigned int i = 0; i < data.size(); i++)
+    printf("%d ", data[i]);
+  printf("\n");
+}
+
+Equivalence::Equivalence(int num)
+  : n(num)
+{
+  for (int i = 0; i < num; i++)
+    {
+      OrdSequence s;
+      s.add(i);
+      classes.push_back(s);
+    }
+}
+
+Equivalence::Equivalence(int num, const char *dummy)
+  : n(num)
+{
+  OrdSequence s;
+  for (int i = 0; i < num; i++)
+    s.add(i);
+  classes.push_back(s);
+}
+
+/* Copy constructors. The second also glues a given couple. */
+
+Equivalence::Equivalence(const Equivalence &e)
+  : n(e.n),
+    classes(e.classes)
+{
+}
+
+Equivalence::Equivalence(const Equivalence &e, int i1, int i2)
+  : n(e.n),
+    classes(e.classes)
+{
+  seqit s1 = find(i1);
+  seqit s2 = find(i2);
+  if (s1 != s2)
+    {
+      OrdSequence ns(*s1);
+      ns.add(*s2);
+      classes.erase(s1);
+      classes.erase(s2);
+      insert(ns);
+    }
+}
+
+const Equivalence &
+Equivalence::operator=(const Equivalence &e)
+{
+  classes.clear();
+  n = e.n;
+  classes = e.classes;
+  return *this;
+}
+
+bool
+Equivalence::operator==(const Equivalence &e) const
+{
+  if (!std::operator==(classes, e.classes))
+    return false;
+
+  if (n != e.n)
+    return false;
+
+  return true;
+}
+
+/* Return an iterator pointing to a class having a given integer. */
+
+Equivalence::const_seqit
+Equivalence::findHaving(int i) const
+{
+  const_seqit si = classes.begin();
+  while (si != classes.end())
+    {
+      if ((*si).has(i))
+        return si;
+      ++si;
+    }
+  TL_RAISE_IF(si == classes.end(),
+              "Couldn't find equivalence class in Equivalence::findHaving");
+  return si;
+}
+
+Equivalence::seqit
+Equivalence::findHaving(int i)
+{
+  seqit si = classes.begin();
+  while (si != classes.end())
+    {
+      if ((*si).has(i))
+        return si;
+      ++si;
+    }
+  TL_RAISE_IF(si == classes.end(),
+              "Couldn't find equivalence class in Equivalence::findHaving");
+  return si;
+}
+
+/* Find $j$-th class for a given $j$. */
+
+Equivalence::const_seqit
+Equivalence::find(int j) const
+{
+  const_seqit si = classes.begin();
+  int i = 0;
+  while (si != classes.end() && i < j)
+    {
+      ++si;
+      i++;
+    }
+  TL_RAISE_IF(si == classes.end(),
+              "Couldn't find equivalence class in Equivalence::find");
+  return si;
+}
+
+Equivalence::seqit
+Equivalence::find(int j)
+{
+  seqit si = classes.begin();
+  int i = 0;
+  while (si != classes.end() && i < j)
+    {
+      ++si;
+      i++;
+    }
+  TL_RAISE_IF(si == classes.end(),
+              "Couldn't find equivalence class in Equivalence::find");
+  return si;
+}
+
+/* Insert a new class yielding the ordering. */
+void
+Equivalence::insert(const OrdSequence &s)
+{
+  seqit si = classes.begin();
+  while (si != classes.end() && *si < s)
+    ++si;
+  classes.insert(si, s);
+}
+
+/* Trace the equivalence into the integer sequence. The classes are in
+   some order (described earlier), and items within classes are ordered,
+   so this implies, that the data can be linearized. This method
+   ``prints'' them to the sequence. We allow for tracing only a given
+   number of classes from the beginning. */
+
+void
+Equivalence::trace(IntSequence &out, int num) const
+{
+  int i = 0;
+  int nc = 0;
+  for (const_seqit it = begin(); it != end() && nc < num; ++it, ++nc)
+    for (int j = 0; j < (*it).length(); j++, i++)
+      {
+        TL_RAISE_IF(i >= out.size(),
+                    "Wrong size of output sequence in Equivalence::trace");
+        out[i] = (*it)[j];
+      }
+}
+
+void
+Equivalence::trace(IntSequence &out, const Permutation &per) const
+{
+  TL_RAISE_IF(out.size() != n,
+              "Wrong size of output sequence in Equivalence::trace");
+  TL_RAISE_IF(per.size() != numClasses(),
+              "Wrong permutation for permuted Equivalence::trace");
+  int i = 0;
+  for (int iclass = 0; iclass < numClasses(); iclass++)
+    {
+      const_seqit itper = find(per.getMap()[iclass]);
+      for (int j = 0; j < (*itper).length(); j++, i++)
+        out[i] = (*itper)[j];
+    }
+}
+
+/* Debug print. */
+void
+Equivalence::print(const char *prefix) const
+{
+  int i = 0;
+  for (const_seqit it = classes.begin();
+       it != classes.end();
+       ++it, i++)
+    {
+      printf("%sclass %d: ", prefix, i);
+      (*it).print("");
+    }
+}
+
+/* Here we construct a set of all equivalences over $n$-element
+   set. The construction proceeds as follows. We maintain a list of added
+   equivalences. At each iteration we pop front of the list, try to add
+   all parents of the popped equivalence. This action adds new
+   equivalences to the object and also to the added list. We finish the
+   iterations when the added list is empty.
+
+   In the beginning we start with
+   $\{\{0\},\{1\},\ldots,\{n-1\}\}$. Adding of parents is an action which
+   for a given equivalence tries to glue all possible couples and checks
+   whether a new equivalence is already in the equivalence set. This is
+   not effective, but we will do the construction only ones.
+
+   In this way we breath-first search a lattice of all equivalences. Note
+   that the lattice is modular, that is why the result of a construction
+   is a list with a property that between two equivalences with the same
+   number of classes there are only equivalences with that number of
+   classes. Obviously, the list is decreasing in a number of classes
+   (since it is constructed by gluing attempts). */
+
+EquivalenceSet::EquivalenceSet(int num)
+  : n(num),
+    equis()
+{
+  list<Equivalence> added;
+  Equivalence first(n);
+  equis.push_back(first);
+  addParents(first, added);
+  while (!added.empty())
+    {
+      addParents(added.front(), added);
+      added.pop_front();
+    }
+  if (n > 1)
+    {
+      Equivalence last(n, "");
+      equis.push_back(last);
+    }
+}
+
+/* This method is used in |addParents| and returns |true| if the object
+   already has that equivalence. We trace list of equivalences in reverse
+   order since equivalences are ordered in the list from the most
+   primitive (nothing equivalent) to maximal (all is equivalent). Since
+   we will have much more results of |has| method as |true|, and
+   |operator==| between equivalences is quick if number of classes
+   differ, and in time we will compare with equivalences with less
+   classes, then it is more efficient to trace the equivalences from less
+   classes to more classes. hence the reverse order. */
+
+bool
+EquivalenceSet::has(const Equivalence &e) const
+{
+  list<Equivalence>::const_reverse_iterator rit = equis.rbegin();
+  while (rit != equis.rend() && *rit != e)
+    ++rit;
+  if (rit != equis.rend())
+    return true;
+  return false;
+}
+
+/* Responsibility of this methods is to try to glue all possible
+   couples within a given equivalence and add those which are not in the
+   list yet. These are added also to the |added| list.
+
+   If number of classes is 2 or 1, we exit, because there is nothing to
+   be added. */
+
+void
+EquivalenceSet::addParents(const Equivalence &e,
+                           list<Equivalence> &added)
+{
+  if (e.numClasses() == 2 || e.numClasses() == 1)
+    return;
+
+  for (int i1 = 0; i1 < e.numClasses(); i1++)
+    for (int i2 = i1+1; i2 < e.numClasses(); i2++)
+      {
+        Equivalence ns(e, i1, i2);
+        if (!has(ns))
+          {
+            added.push_back(ns);
+            equis.push_back(ns);
+          }
+      }
+}
+
+/* Debug print. */
+void
+EquivalenceSet::print(const char *prefix) const
+{
+  char tmp[100];
+  strcpy(tmp, prefix);
+  strcat(tmp, "    ");
+  int i = 0;
+  for (list<Equivalence>::const_iterator it = equis.begin();
+       it != equis.end();
+       ++it, i++)
+    {
+      printf("%sequivalence %d:(classes %d)\n", prefix, i, (*it).numClasses());
+      (*it).print(tmp);
+    }
+}
+
+/* Construct the bundle. |nmax| is a maximum size of underlying set. */
+EquivalenceBundle::EquivalenceBundle(int nmax)
+{
+  nmax = max(nmax, 1);
+  generateUpTo(nmax);
+}
+
+/* Destruct bundle. Just free all pointers. */
+EquivalenceBundle::~EquivalenceBundle()
+{
+  for (unsigned int i = 0; i < bundle.size(); i++)
+    delete bundle[i];
+}
+
+/* Remember, that the first item is |EquivalenceSet(1)|. */
+const EquivalenceSet &
+EquivalenceBundle::get(int n) const
+{
+  if (n > (int) (bundle.size()) || n < 1)
+    {
+      TL_RAISE("Equivalence set not found in EquivalenceBundle::get");
+      return *(bundle[0]);
+    }
+  else
+    {
+      return *(bundle[n-1]);
+    }
+}
+
+/* Get |curmax| which is a maximum size in the bundle, and generate for
+   all sizes from |curmax+1| up to |nmax|. */
+
+void
+EquivalenceBundle::generateUpTo(int nmax)
+{
+  int curmax = bundle.size();
+  for (int i = curmax+1; i <= nmax; i++)
+    bundle.push_back(new EquivalenceSet(i));
+}
--- a/dynare++/tl/cc/equivalence.cweb
+++ b/dynare++/tl/cc/equivalence.cweb
@ -1,477 +0,0 @@
-@q $Id: equivalence.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt equivalence.cpp} file.
-
-@c
-#include "equivalence.h"
-#include "permutation.h"
-#include "tl_exception.h"
-
-#include <cstring>
-
-@<|OrdSequence| method codes@>;
-@<|Equivalence| method codes@>;
-@<|EquivalenceSet| method codes@>;
-@<|EquivalenceBundle| method codes@>;
- 
-@ 
-@<|OrdSequence| method codes@>=
-@<|OrdSequence::operator[]| code@>;
-@<|OrdSequence::operator<| code@>;
-@<|OrdSequence::operator==| code@>;
-@<|OrdSequence::add| codes@>;
-@<|OrdSequence::has| code@>;
-@<|OrdSequence::average()| code@>;
-@<|OrdSequence::print| code@>;
-
-@ 
-@<|Equivalence| method codes@>=
-@<|Equivalence| constructors@>;
-@<|Equivalence| copy constructors@>;
-@<|Equivalence::findHaving| codes@>;
-@<|Equivalence::find| codes@>;
-@<|Equivalence::insert| code@>;
-@<|Equivalence::operator=| code@>;
-@<|Equivalence::operator==| code@>;
-@<|Equivalence::trace| code@>;
-@<|Equivalence::trace| permuted code@>;
-@<|Equivalence::print| code@>;
-
-@ 
-@<|EquivalenceSet| method codes@>=
-@<|EquivalenceSet| constructor code@>;
-@<|EquivalenceSet::has| code@>;
-@<|EquivalenceSet::addParents| code@>;
-@<|EquivalenceSet::print| code@>;
-
-@ 
-@<|EquivalenceBundle| method codes@>=
-@<|EquivalenceBundle| constructor code@>;
-@<|EquivalenceBundle| destructor code@>;
-@<|EquivalenceBundle::get| code@>;
-@<|EquivalenceBundle::generateUpTo| code@>;
-
-
-@ 
-@<|OrdSequence::operator[]| code@>=
-int OrdSequence::operator[](int i) const
-{
-	TL_RAISE_IF((i<0 || i>=length()),
-				"Index out of range in OrdSequence::operator[]");
-	return data[i];
-}
-
-@ Here we implement the ordering. It can be changed, or various
-orderings can be used for different problem sizes. We order them
-according to the average, and then according to the first item.
-
-@<|OrdSequence::operator<| code@>=
-bool OrdSequence::operator<(const OrdSequence& s) const
-{
-	double ta = average();
-	double sa = s.average();
-	return (ta < sa || ((ta == sa) && (operator[](0) > s[0])));
-}
-
-@ 
-@<|OrdSequence::operator==| code@>=
-bool OrdSequence::operator==(const OrdSequence& s) const
-{
-	if (length() != s.length())
-		return false;
-
-	int i = 0;
-	while (i < length() && operator[](i) == s[i])
-		i++;
-
-	return (i == length());
-}
-
-
-@ The first |add| adds a given integer to the class, the second
-iterates through a given sequence and adds everything found in the
-given class.
-
-@<|OrdSequence::add| codes@>=
-void OrdSequence::add(int i)
-{
-	vector<int>::iterator vit = data.begin();
-	while (vit != data.end() && *vit < i)
-		++vit;
-	if (vit != data.end() && *vit == i)
-		return;
-	data.insert(vit, i);
-}
-@#
-void OrdSequence::add(const OrdSequence& s)
-{
-	vector<int>::const_iterator vit = s.data.begin();
-	while (vit != s.data.end()) {
-		add(*vit);
-		++vit;
-	}
-}
-
-@ Answers |true| if a given number is in the class.
-@<|OrdSequence::has| code@>=
-bool OrdSequence::has(int i) const
-{
-	vector<int>::const_iterator vit = data.begin();
-	while (vit != data.end()) {
-		if (*vit == i)
-			return true;
-		++vit;
-	}
-	return false;
-}
-
-@ Return an average of the class. 
-@<|OrdSequence::average()| code@>=
-double OrdSequence::average() const
-{
-	double res = 0;
-	for (unsigned int i = 0; i < data.size(); i++)
-		res += data[i];
-	TL_RAISE_IF(data.size() == 0,
-				"Attempt to take average of empty class in OrdSequence::average");
-	return res/data.size();
-}
-
-@ Debug print.
-@<|OrdSequence::print| code@>=
-void OrdSequence::print(const char* prefix) const
-{
-	printf("%s",prefix);
-	for (unsigned int i = 0; i < data.size(); i++)
-		printf("%d ",data[i]);
-	printf("\n");
-}
-
-@ 
-@<|Equivalence| constructors@>=
-Equivalence::Equivalence(int num)
-	: n(num)
-{
-	for (int i = 0; i < num; i++) {
-		OrdSequence s;
-		s.add(i);
-		classes.push_back(s);
-	}
-}
-@#
-Equivalence::Equivalence(int num, const char* dummy)
-	: n(num)
-{
-	OrdSequence s;
-	for (int i = 0; i < num; i++)
-		s.add(i);
-	classes.push_back(s);
-}
-
-@ Copy constructors. The second also glues a given couple.
-@<|Equivalence| copy constructors@>=
-Equivalence::Equivalence(const Equivalence& e)
-	: n(e.n),
-	  classes(e.classes)
-{
-}
-@#
-Equivalence::Equivalence(const Equivalence& e, int i1, int i2)
-	: n(e.n),
-	  classes(e.classes)
-{
-	seqit s1 = find(i1);
-	seqit s2 = find(i2);
-	if (s1 != s2) {
-		OrdSequence ns(*s1);
-		ns.add(*s2);
-		classes.erase(s1);
-		classes.erase(s2);
-		insert(ns);
-	}
-}
-
-@ 
-@<|Equivalence::operator=| code@>=
-const Equivalence& Equivalence::operator=(const Equivalence& e)
-{
-	classes.clear();
-	n = e.n;
-	classes = e.classes;
-	return *this;
-}
-
-@ 
-@<|Equivalence::operator==| code@>=
-bool Equivalence::operator==(const Equivalence& e) const
-{
-	if (! std::operator==(classes, e.classes))
-		return false;
-
-	if (n != e.n)
-		return false;
-
-	return true;
-}
-
-
-@ Return an iterator pointing to a class having a given integer.
-@<|Equivalence::findHaving| codes@>=
-Equivalence::const_seqit Equivalence::findHaving(int i) const
-{
-	const_seqit si = classes.begin();
-	while (si != classes.end()) {
-		if ((*si).has(i))
-			return si;
-		++si;
-	}
-	TL_RAISE_IF(si == classes.end(),
-				"Couldn't find equivalence class in Equivalence::findHaving");
-	return si;
-}
-@#
-Equivalence::seqit Equivalence::findHaving(int i)
-{
-	seqit si = classes.begin();
-	while (si != classes.end()) {
-		if ((*si).has(i))
-			return si;
-		++si;
-	}
-	TL_RAISE_IF(si == classes.end(),
-				"Couldn't find equivalence class in Equivalence::findHaving");
-	return si;
-}
-
-
-@ Find $j$-th class for a given $j$.
-@<|Equivalence::find| codes@>=
-Equivalence::const_seqit Equivalence::find(int j) const
-{
-	const_seqit si = classes.begin();
-	int i = 0;
-	while (si != classes.end() && i < j) {
-		++si;
-		i++;
-	}
-	TL_RAISE_IF(si == classes.end(),
-				"Couldn't find equivalence class in Equivalence::find");
-	return si;
-}
-@#
-Equivalence::seqit Equivalence::find(int j)
-{
-	seqit si = classes.begin();
-	int i = 0;
-	while (si != classes.end() && i < j) {
-		++si;
-		i++;
-	}
-	TL_RAISE_IF(si == classes.end(),
-				"Couldn't find equivalence class in Equivalence::find");
-	return si;
-}
-
-
-@ Insert a new class yielding the ordering.
-@<|Equivalence::insert| code@>=
-void Equivalence::insert(const OrdSequence& s)
-{
-	seqit si = classes.begin();
-	while (si != classes.end() && *si < s) 
-		++si;
-	classes.insert(si, s);
-}
-
-@ Trace the equivalence into the integer sequence. The classes are in
-some order (described earlier), and items within classes are ordered,
-so this implies, that the data can be linearized. This method
-``prints'' them to the sequence. We allow for tracing only a given
-number of classes from the beginning.
-
-@<|Equivalence::trace| code@>=
-void Equivalence::trace(IntSequence& out, int num) const
-{
-	int i = 0;
-	int nc = 0;
-	for (const_seqit it = begin(); it != end() && nc < num; ++it, ++nc)
-		for (int j = 0;	j < (*it).length(); j++, i++) {
-			TL_RAISE_IF(i >= out.size(),
-						"Wrong size of output sequence in Equivalence::trace");
-			out[i] = (*it)[j];
-		}
-}
-
-@ 
-@<|Equivalence::trace| permuted code@>=
-void Equivalence::trace(IntSequence& out, const Permutation& per) const
-{
-	TL_RAISE_IF(out.size() != n,
-				"Wrong size of output sequence in Equivalence::trace");
-	TL_RAISE_IF(per.size() != numClasses(),
-				"Wrong permutation for permuted Equivalence::trace");
-	int i = 0;
-	for (int iclass = 0; iclass < numClasses(); iclass++) {
-		const_seqit itper = find(per.getMap()[iclass]);
-		for (int j = 0; j < (*itper).length(); j++, i++)
-			out[i] = (*itper)[j];
-	}
-}
-
-
-@ Debug print.
-@<|Equivalence::print| code@>=
-void Equivalence::print(const char* prefix) const
-{
-	int i = 0;
-	for (const_seqit it = classes.begin();
-		 it != classes.end();
-		 ++it, i++) {
-		printf("%sclass %d: ",prefix,i);
-		(*it).print("");
-	}
-}
-
-@ Here we construct a set of all equivalences over $n$-element
-set. The construction proceeds as follows. We maintain a list of added
-equivalences. At each iteration we pop front of the list, try to add
-all parents of the popped equivalence. This action adds new
-equivalences to the object and also to the added list. We finish the
-iterations when the added list is empty.
-
-In the beginning we start with
-$\{\{0\},\{1\},\ldots,\{n-1\}\}$. Adding of parents is an action which
-for a given equivalence tries to glue all possible couples and checks
-whether a new equivalence is already in the equivalence set. This is
-not effective, but we will do the construction only ones.
-
-In this way we breath-first search a lattice of all equivalences. Note
-that the lattice is modular, that is why the result of a construction
-is a list with a property that between two equivalences with the same
-number of classes there are only equivalences with that number of
-classes. Obviously, the list is decreasing in a number of classes
-(since it is constructed by gluing attempts).
-
-
-@<|EquivalenceSet| constructor code@>=
-EquivalenceSet::EquivalenceSet(int num)
-	: n(num),
-	  equis()
-{
-	list<Equivalence> added;
-	Equivalence first(n);
-	equis.push_back(first);
-	addParents(first, added);
-	while (! added.empty()) {
-		addParents(added.front(), added);
-		added.pop_front();
-	}
-	if (n > 1) {
-		Equivalence last(n, "");
-		equis.push_back(last);
-	}
-}
-
-@ This method is used in |addParents| and returns |true| if the object
-already has that equivalence. We trace list of equivalences in reverse
-order since equivalences are ordered in the list from the most
-primitive (nothing equivalent) to maximal (all is equivalent). Since
-we will have much more results of |has| method as |true|, and
-|operator==| between equivalences is quick if number of classes
-differ, and in time we will compare with equivalences with less
-classes, then it is more efficient to trace the equivalences from less
-classes to more classes. hence the reverse order.
-
-@<|EquivalenceSet::has| code@>=
-bool EquivalenceSet::has(const Equivalence& e) const
-{
-	list<Equivalence>::const_reverse_iterator rit = equis.rbegin();
-	while (rit != equis.rend() && *rit != e)
-		++rit;
-	if (rit != equis.rend())
-		return true;
-	return false;
-}
-
-@ Responsibility of this methods is to try to glue all possible
-couples within a given equivalence and add those which are not in the
-list yet. These are added also to the |added| list.
-
-If number of classes is 2 or 1, we exit, because there is nothing to
-be added.
-
-@<|EquivalenceSet::addParents| code@>=
-void EquivalenceSet::addParents(const Equivalence& e,
-								list<Equivalence>& added)
-{
-	if (e.numClasses() == 2 || e.numClasses() == 1)
-		return;
-
-	for (int i1 = 0; i1 < e.numClasses(); i1++)
-		for (int i2 = i1+1; i2 < e.numClasses(); i2++) {
-			Equivalence ns(e, i1, i2);
-			if (! has(ns)) {
-				added.push_back(ns);
-				equis.push_back(ns);
-			}
-		}
-}
-			
-@ Debug print.
-@<|EquivalenceSet::print| code@>=
-void EquivalenceSet::print(const char* prefix) const
-{
-	char tmp[100];
-	strcpy(tmp, prefix);
-	strcat(tmp, "    ");
-	int i = 0;
-	for (list<Equivalence>::const_iterator it = equis.begin();
-		 it != equis.end();
-		 ++it, i++) {
-		printf("%sequivalence %d:(classes %d)\n",prefix,i,(*it).numClasses());
-		(*it).print(tmp);
-	}
-}
-
-@ Construct the bundle. |nmax| is a maximum size of underlying set.
-@<|EquivalenceBundle| constructor code@>=
-EquivalenceBundle::EquivalenceBundle(int nmax)
-{
-	nmax = max(nmax, 1);
-	generateUpTo(nmax);
-}
-
-@ Destruct bundle. Just free all pointers.
-@<|EquivalenceBundle| destructor code@>=
-EquivalenceBundle::~EquivalenceBundle()
-{
-	for (unsigned int i = 0; i < bundle.size(); i++)
-		delete bundle[i];
-}
-
-@ Remember, that the first item is |EquivalenceSet(1)|.
-@<|EquivalenceBundle::get| code@>=
-const EquivalenceSet& EquivalenceBundle::get(int n) const
-{
-	if (n > (int)(bundle.size()) || n < 1) {
-		TL_RAISE("Equivalence set not found in EquivalenceBundle::get");
-		return *(bundle[0]);
-	} else {
-		return *(bundle[n-1]);
-	}
-}
-
-@ Get |curmax| which is a maximum size in the bundle, and generate for
-all sizes from |curmax+1| up to |nmax|.
-
-@<|EquivalenceBundle::generateUpTo| code@>=
-void EquivalenceBundle::generateUpTo(int nmax)
-{
-	int curmax = bundle.size();
-	for (int i = curmax+1; i <= nmax; i++)
-		bundle.push_back(new EquivalenceSet(i));
-}
-
-
-@ End of {\tt equivalence.cpp} file.
--- a/dynare++/tl/cc/equivalence.hh
+++ b/dynare++/tl/cc/equivalence.hh
@ -0,0 +1,226 @@
+// Copyright 2004, Ondra Kamenik
+
+// Equivalences.
+
+/* Here we define an equivalence of a set of integers $\{0, 1, \ldots,
+   k-1\}$. The purpose is clear, in the tensor library we often iterate
+   through all equivalences and sum matrices. We need an abstraction for
+   an equivalence class, equivalence and a set of all equivalences.
+
+   The equivalence class (which is basically a set of integers) is here
+   implemented as ordered integer sequence. The ordered sequence is not
+   implemented via |IntSequence|, but via |vector<int>| since we need
+   insertions. The equivalence is implemented as an ordered list of
+   equivalence classes, and equivalence set is a list of equivalences.
+
+   The ordering of the equivalence classes within an equivalence is very
+   important. For instance, if we iterate through equivalences for $k=5$
+   and pickup some equivalence class, say $\{\{0,4\},\{1,2\},\{3\}\}$, we
+   then evaluate something like:
+   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
+   \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
+   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
+   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
+   \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
+   $$
+   If the tensors are unfolded, we can evaluate this expression as
+   $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)\cdot P,$$
+   where $P$ is a suitable permutation of columns of the expressions,
+   which permutes them so that the index
+   $(\alpha_1,\beta_3,\alpha_2,\beta_1,\beta_2)$ would go to
+   $(\alpha_1,\alpha_2,\beta_1,\beta_2,\beta_3)$.
+   The permutation $P$ can be very ineffective (copying great amount of
+   small chunks of data) if the equivalence class ordering is chosen
+   badly. However, we do not provide any heuristic minimizing a total
+   time spent in all permutations. We choose an ordering which orders the
+   classes according to their averages, and according to the smallest
+   equivalence class element if the averages are the same. */
+
+#ifndef EQUIVALENCE_H
+#define EQUIVALENCE_H
+
+#include "int_sequence.hh"
+
+#include <vector>
+#include <list>
+
+using namespace std;
+
+/* Here is the abstraction for an equivalence class. We implement it as
+   |vector<int>|. We have a constructor for empty class, copy
+   constructor. What is important here is the ordering operator
+   |operator<| and methods for addition of an integer, and addition of
+   another sequence. Also we provide method |has| which returns true if a
+   given integer is contained. */
+
+class OrdSequence
+{
+  vector<int> data;
+public:
+  OrdSequence() : data()
+  {
+  }
+  OrdSequence(const OrdSequence &s) : data(s.data)
+  {
+  }
+  const OrdSequence &
+  operator=(const OrdSequence &s)
+  {
+    data = s.data; return *this;
+  }
+  bool operator==(const OrdSequence &s) const;
+  int operator[](int i) const;
+  bool operator<(const OrdSequence &s) const;
+  const vector<int> &
+  getData() const
+  {
+    return data;
+  }
+  int
+  length() const
+  {
+    return data.size();
+  }
+  void add(int i);
+  void add(const OrdSequence &s);
+  bool has(int i) const;
+  void print(const char *prefix) const;
+private:
+  double average() const;
+};
+
+/* Here is the abstraction for the equivalence. It is a list of
+   equivalence classes. Also we remember |n|, which is a size of
+   underlying set $\{0, 1, \ldots, n-1\}$.
+
+   Method |trace| ``prints'' the equivalence into the integer sequence. */
+
+class Permutation;
+class Equivalence
+{
+private:
+  int n;
+  list<OrdSequence> classes;
+public:
+  typedef list<OrdSequence>::const_iterator const_seqit;
+  typedef list<OrdSequence>::iterator seqit;
+
+  /* The first constructor constructs $\{\{0\},\{1\},\ldots,\{n-1\}\}$.
+
+     The second constructor constructs $\{\{0,1,\ldots,n-1\}\}$.
+
+     The third is the copy constructor. And the fourth is the copy
+     constructor plus gluing |i1| and |i2| in one class. */
+  Equivalence(int num);
+  Equivalence(int num, const char *dummy);
+  Equivalence(const Equivalence &e);
+  Equivalence(const Equivalence &e, int i1, int i2);
+
+  const Equivalence &operator=(const Equivalence &e);
+  bool operator==(const Equivalence &e) const;
+  bool
+  operator!=(const Equivalence &e) const
+  {
+    return !operator==(e);
+  }
+  int
+  getN() const
+  {
+    return n;
+  }
+  int
+  numClasses() const
+  {
+    return classes.size();
+  }
+  void trace(IntSequence &out, int n) const;
+  void
+  trace(IntSequence &out) const
+  {
+    trace(out, numClasses());
+  }
+  void trace(IntSequence &out, const Permutation &per) const;
+  void print(const char *prefix) const;
+  seqit
+  begin()
+  {
+    return classes.begin();
+  }
+  const_seqit
+  begin() const
+  {
+    return classes.begin();
+  }
+  seqit
+  end()
+  {
+    return classes.end();
+  }
+  const_seqit
+  end() const
+  {
+    return classes.end();
+  }
+  const_seqit find(int i) const;
+  seqit find(int i);
+protected:
+  /* Here we have find methods. We can find an equivalence class having a
+     given number or we can find an equivalence class of a given index within
+     the ordering.
+
+     We have also an |insert| method which inserts a given class
+     according to the class ordering. */
+  const_seqit findHaving(int i) const;
+  seqit findHaving(int i);
+  void insert(const OrdSequence &s);
+
+};
+
+/* The |EquivalenceSet| is a list of equivalences. The unique
+   constructor constructs a set of all equivalences over $n$-element
+   set. The equivalences are sorted in the list so that equivalences with
+   fewer number of classes are in the end.
+
+   The two methods |has| and |addParents| are useful in the constructor. */
+
+class EquivalenceSet
+{
+  int n;
+  list<Equivalence> equis;
+public:
+  typedef list<Equivalence>::const_iterator const_iterator;
+  EquivalenceSet(int num);
+  void print(const char *prefix) const;
+  const_iterator
+  begin() const
+  {
+    return equis.begin();
+  }
+  const_iterator
+  end() const
+  {
+    return equis.end();
+  }
+private:
+  bool has(const Equivalence &e) const;
+  void addParents(const Equivalence &e, list<Equivalence> &added);
+};
+
+/* The equivalence bundle class only encapsulates |EquivalenceSet|s
+   from 1 up to a given number. It is able to retrieve the equivalence set
+   over $n$-element set for a given $n$, and also it can generate some more
+   sets on request.
+
+   It is fully responsible for storage needed for |EquivalenceSet|s. */
+
+class EquivalenceBundle
+{
+  vector<EquivalenceSet *> bundle;
+public:
+  EquivalenceBundle(int nmax);
+  ~EquivalenceBundle();
+  const EquivalenceSet&get(int n) const;
+  void generateUpTo(int nmax);
+};
+
+#endif
--- a/dynare++/tl/cc/equivalence.hweb
+++ b/dynare++/tl/cc/equivalence.hweb
@ -1,203 +0,0 @@
-@q $Id: equivalence.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Equivalences. Start of {\tt equivalence.h} file
-
-Here we define an equivalence of a set of integers $\{0, 1, \ldots,
-k-1\}$. The purpose is clear, in the tensor library we often iterate
-through all equivalences and sum matrices. We need an abstraction for
-an equivalence class, equivalence and a set of all equivalences.
-
-The equivalence class (which is basically a set of integers) is here
-implemented as ordered integer sequence. The ordered sequence is not
-implemented via |IntSequence|, but via |vector<int>| since we need
-insertions. The equivalence is implemented as an ordered list of
-equivalence classes, and equivalence set is a list of equivalences.
-
-The ordering of the equivalence classes within an equivalence is very
-important. For instance, if we iterate through equivalences for $k=5$
-and pickup some equivalence class, say $\{\{0,4\},\{1,2\},\{3\}\}$, we
-then evaluate something like:
-$$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
-\cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
-\left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
-\left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
-\left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
-$$ 
-If the tensors are unfolded, we can evaluate this expression as
-$$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)\cdot P,$$
-where $P$ is a suitable permutation of columns of the expressions,
-which permutes them so that the index
-$(\alpha_1,\beta_3,\alpha_2,\beta_1,\beta_2)$ would go to
-$(\alpha_1,\alpha_2,\beta_1,\beta_2,\beta_3)$.
-The permutation $P$ can be very ineffective (copying great amount of
-small chunks of data) if the equivalence class ordering is chosen
-badly. However, we do not provide any heuristic minimizing a total
-time spent in all permutations. We choose an ordering which orders the
-classes according to their averages, and according to the smallest
-equivalence class element if the averages are the same.
-
-
-
-@s OrdSequence int
-@s Equivalence int
-@s EquivalenceSet int
-
-@c
-#ifndef EQUIVALENCE_H
-#define EQUIVALENCE_H
-
-#include "int_sequence.h"
-
-#include <vector>
-#include <list>
-
-using namespace std;
-
-@<|OrdSequence| class declaration@>;
-@<|Equivalence| class declaration@>;
-@<|EquivalenceSet| class declaration@>;
-@<|EquivalenceBundle| class declaration@>;
-
-#endif
-
-
-@ Here is the abstraction for an equivalence class. We implement it as
-|vector<int>|. We have a constructor for empty class, copy
-constructor. What is important here is the ordering operator
-|operator<| and methods for addition of an integer, and addition of
-another sequence. Also we provide method |has| which returns true if a
-given integer is contained.
-
-@<|OrdSequence| class declaration@>=
-class OrdSequence {
-	vector<int> data;
-public:@/
-	OrdSequence() : data()@+ {}
-	OrdSequence(const OrdSequence& s) : data(s.data)@+ {}
-	const OrdSequence& operator=(const OrdSequence& s)
-		{@+ data = s.data;@+ return *this;@+}
-	bool operator==(const OrdSequence& s) const;
-	int operator[](int i) const;
-	bool operator<(const OrdSequence& s) const;
-	const vector<int>& getData() const
-		{@+ return data;@+}
-	int length() const {@+ return data.size();@+}
-	void add(int i);
-	void add(const OrdSequence& s);
-	bool has(int i) const;
-	void print(const char* prefix) const;
-private:@/
-	double average() const;
-};
-
-
-@ Here is the abstraction for the equivalence. It is a list of
-equivalence classes. Also we remember |n|, which is a size of
-underlying set $\{0, 1, \ldots, n-1\}$.
-
-Method |trace| ``prints'' the equivalence into the integer sequence.
-
-@<|Equivalence| class declaration@>=
-class Permutation;
-class Equivalence {
-private:
-	int n;
-	list<OrdSequence> classes;
-public:@;
-	typedef list<OrdSequence>::const_iterator const_seqit;
-	typedef list<OrdSequence>::iterator seqit;
-
-	@<|Equivalence| constructors@>;
-	const Equivalence& operator=(const Equivalence& e);
-	bool operator==(const Equivalence& e) const;
-	bool operator!=(const Equivalence& e) const
-		{@+ return ! operator==(e);@+}
-	int getN() const {@+ return n;@+}
-	int numClasses() const {@+ return classes.size();@+}
-	void trace(IntSequence& out, int n) const;
-	void trace(IntSequence& out) const
-		{@+ trace(out, numClasses()); @+}
-	void trace(IntSequence& out, const Permutation& per) const;
-	void print(const char* prefix) const;
-	@<|Equivalence| begin and end methods@>;
-	const_seqit find(int i) const;
-	seqit find(int i);
-protected:@;
-	@<|Equivalence| protected methods@>;
-};
-
-@ The |EquivalenceSet| is a list of equivalences. The unique
-constructor constructs a set of all equivalences over $n$-element
-set. The equivalences are sorted in the list so that equivalences with
-fewer number of classes are in the end.
-
-The two methods |has| and |addParents| are useful in the constructor.
-
-@<|EquivalenceSet| class declaration@>=
-class EquivalenceSet {
-	int n;
-	list<Equivalence> equis;
-public:@;
-	typedef list<Equivalence>::const_iterator const_iterator; 
-	EquivalenceSet(int num);
-	void print(const char* prefix) const;
-	const_iterator begin() const
-		{@+ return equis.begin();@+}
-	const_iterator end() const
-		{@+ return equis.end();@+}
-private:@;
-	bool has(const Equivalence& e) const;
-	void addParents(const Equivalence& e, list<Equivalence>& added);
-};
-
-@ The equivalence bundle class only encapsulates |EquivalenceSet|s
-from 1 up to a given number. It is able to retrieve the equivalence set
-over $n$-element set for a given $n$, and also it can generate some more
-sets on request.
-
-It is fully responsible for storage needed for |EquivalenceSet|s.
-
-@<|EquivalenceBundle| class declaration@>=
-class EquivalenceBundle {
-	vector<EquivalenceSet*> bundle;
-public:@;
-	EquivalenceBundle(int nmax);
-	~EquivalenceBundle();
-	const EquivalenceSet& get(int n) const;
-	void generateUpTo(int nmax);
-};
-
-@ The first constructor constructs $\{\{0\},\{1\},\ldots,\{n-1\}\}$.
-
-The second constructor constructs $\{\{0,1,\ldots,n-1\}\}$.
-
-The third is the copy constructor. And the fourth is the copy
-constructor plus gluing |i1| and |i2| in one class.
-
-@<|Equivalence| constructors@>=
-	Equivalence(int num);
-	Equivalence(int num, const char* dummy);
-	Equivalence(const Equivalence& e);
-	Equivalence(const Equivalence& e, int i1, int i2);
-
-@ 
-@<|Equivalence| begin and end methods@>=
-	seqit begin() {@+ return classes.begin();@+}
-	const_seqit begin() const {@+ return classes.begin();@+}
-	seqit end() {@+ return classes.end();@+}
-	const_seqit end() const {@+ return classes.end();@+}
-
-@ Here we have find methods. We can find an equivalence class having a
-given number or we can find an equivalence class of a given index within
-the ordering.
-
-We have also an |insert| method which inserts a given class
-according to the class ordering.
-
-@<|Equivalence| protected methods@>=
-	const_seqit findHaving(int i) const;
-	seqit findHaving(int i);
-	void insert(const OrdSequence& s);
-
-@ End of {\tt equivalence.h} file.
--- a/dynare++/tl/cc/fine_container.cc
+++ b/dynare++/tl/cc/fine_container.cc
@ -0,0 +1,35 @@
+// Copyright 2005, Ondra Kamenik
+
+#include "fine_container.hh"
+
+#include <cmath>
+
+/* Here we construct the vector of new sizes of containers (before
+   |nc|) and copy all remaining sizes behind |nc|. */
+
+SizeRefinement::SizeRefinement(const IntSequence &s, int nc, int max)
+{
+  new_nc = 0;
+  for (int i = 0; i < nc; i++)
+    {
+      int nr = s[i]/max;
+      if (s[i] % max != 0)
+        nr++;
+      int ss = (nr > 0) ? (int) round(((double) s[i])/nr) : 0;
+      for (int j = 0; j < nr - 1; j++)
+        {
+          rsizes.push_back(ss);
+          ind_map.push_back(i);
+          new_nc++;
+        }
+      rsizes.push_back(s[i]-(nr-1)*ss);
+      ind_map.push_back(i);
+      new_nc++;
+    }
+
+  for (int i = nc; i < s.size(); i++)
+    {
+      rsizes.push_back(s[i]);
+      ind_map.push_back(i);
+    }
+}
--- a/dynare++/tl/cc/fine_container.cweb
+++ b/dynare++/tl/cc/fine_container.cweb
@ -1,41 +0,0 @@
-@q $Id: fine_container.cweb 1833 2008-05-18 20:22:39Z kamenik $ @>
-@q Copyright 2005, Ondra Kamenik @>
-
-@ Start of {\tt stack\_container.cpp} file.
-
-@c
-#include "fine_container.h"
-
-#include <cmath>
-
-@<|SizeRefinement| constructor code@>;
-
-@ Here we construct the vector of new sizes of containers (before
-|nc|) and copy all remaining sizes behind |nc|.
-
-@<|SizeRefinement| constructor code@>=
-SizeRefinement::SizeRefinement(const IntSequence& s, int nc, int max)
-{
-	new_nc = 0;
-	for (int i = 0; i < nc; i++) {
-		int nr = s[i]/max;
-		if (s[i] % max != 0)
-			nr++;
-		int ss = (nr>0) ? (int)round(((double)s[i])/nr) : 0;
-		for (int j = 0; j < nr - 1; j++) {
-			rsizes.push_back(ss);
-			ind_map.push_back(i);
-			new_nc++;
-		}
-		rsizes.push_back(s[i]-(nr-1)*ss);
-		ind_map.push_back(i);
-		new_nc++;
-	}
-
-	for (int i = nc; i < s.size(); i++) {
-		rsizes.push_back(s[i]);
-		ind_map.push_back(i);
-	}
-}
-
-@ End of {\tt stack\_container.cpp} file.
--- a/dynare++/tl/cc/fine_container.hh
+++ b/dynare++/tl/cc/fine_container.hh
@ -0,0 +1,162 @@
+// Copyright 2005, Ondra Kamenik
+
+// Refined stack of containers.
+
+/* This file defines a refinement of the stack container. It makes a
+   vertical refinement of a given stack container, it refines only matrix
+   items, the items which are always zero, or can be identity matrices
+   are not refined.
+
+   The refinement is done by a simple construction from the stack
+   container being refined. A parameter is passed meaning a maximum size
+   of each stack in the refined container. The resulting object is stack
+   container, so everything works seamlessly.
+
+   We define here a class for refinement of sizes |SizeRefinement|, this
+   is purely an auxiliary class allowing us to write a code more
+   concisely. The main class of this file is |FineContainer|, which
+   corresponds to refining. The two more classes |FoldedFineContainer|
+   and |UnfoldedFineContainer| are its specializations.
+
+   NOTE: This code was implemented with a hope that it will help to cut
+   down memory allocations during the Faa Di Bruno formula
+   evaluation. However, it seems that this needs to be accompanied with a
+   similar thing for tensor multidimensional index. Thus, the abstraction
+   is not currently used, but it might be useful in future. */
+
+#ifndef FINE_CONTAINER_H
+#define FINE_CONTAINER_H
+
+#include "stack_container.hh"
+
+#include <vector>
+
+/* This class splits the first |nc| elements of the given sequence |s|
+   to a sequence not having items greater than given |max|. The remaining
+   elements (those behind |nc|) are left untouched. It also remembers the
+   mapping, i.e. for a given index in a new sequence, it is able to
+   return a corresponding index in old sequence. */
+
+class SizeRefinement
+{
+  vector<int> rsizes;
+  vector<int> ind_map;
+  int new_nc;
+public:
+  SizeRefinement(const IntSequence &s, int nc, int max);
+  int
+  getRefSize(int i) const
+  {
+    return rsizes[i];
+  }
+  int
+  numRefinements() const
+  {
+    return rsizes.size();
+  }
+  int
+  getOldIndex(int i) const
+  {
+    return ind_map[i];
+  }
+  int
+  getNC() const
+  {
+    return new_nc;
+  }
+};
+
+/* This main class of this class refines a given stack container, and
+   inherits from the stack container. It also defines the |getType|
+   method, which returns a type for a given stack as the type of the
+   corresponding (old) stack of the former stack container. */
+
+template <class _Ttype>
+class FineContainer : public SizeRefinement, public StackContainer<_Ttype>
+{
+protected:
+  typedef StackContainer<_Ttype> _Stype;
+  typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
+  typedef typename StackContainerInterface<_Ttype>::itype itype;
+  _Ctype **const ref_conts;
+  const _Stype &stack_cont;
+public:
+  /* Here we construct the |SizeRefinement| and allocate space for the
+     refined containers. Then, the containers are created and put to
+     |conts| array. Note that the containers do not claim any further
+     space, since all the tensors of the created containers are in-place
+     submatrices.
+
+     Here we use a dirty trick of converting |const| pointer to non-|const|
+     pointer and passing it to a subtensor container constructor. The
+     containers are stored in |ref_conts| and then in |conts| from
+     |StackContainer|. However, this is safe since neither |ref_conts| nor
+     |conts| are used in non-|const| contexts. For example,
+     |StackContainer| has only a |const| method to return a member of
+     |conts|. */
+
+  FineContainer(const _Stype &sc, int max)
+    : SizeRefinement(sc.getStackSizes(), sc.numConts(), max),
+      StackContainer<_Ttype>(numRefinements(), getNC()),
+    ref_conts(new _Ctype *[getNC()]),
+    stack_cont(sc)
+  {
+    for (int i = 0; i < numRefinements(); i++)
+      _Stype::stack_sizes[i] = getRefSize(i);
+    _Stype::calculateOffsets();
+
+    int last_cont = -1;
+    int last_row = 0;
+    for (int i = 0; i < getNC(); i++)
+      {
+        if (getOldIndex(i) != last_cont)
+          {
+            last_cont = getOldIndex(i);
+            last_row = 0;
+          }
+        union {const _Ctype *c; _Ctype *n;} convert;
+        convert.c = stack_cont.getCont(last_cont);
+        ref_conts[i] = new _Ctype(last_row, _Stype::stack_sizes[i],
+                                  *(convert.n));
+        _Stype::conts[i] = ref_conts[i];
+        last_row += _Stype::stack_sizes[i];
+      }
+  }
+
+  /* Here we deallocate the refined containers, and deallocate the array of
+     refined containers. */
+  virtual ~FineContainer()
+  {
+    for (int i = 0; i < _Stype::numConts(); i++)
+      delete ref_conts[i];
+    delete [] ref_conts;
+  }
+  itype
+  getType(int i, const Symmetry &s) const
+  {
+    return stack_cont.getType(getOldIndex(i), s);
+  }
+
+};
+
+/* Here is |FineContainer| specialization for folded tensors. */
+class FoldedFineContainer : public FineContainer<FGSTensor>, public FoldedStackContainer
+{
+public:
+  FoldedFineContainer(const StackContainer<FGSTensor> &sc, int max)
+    : FineContainer<FGSTensor>(sc, max)
+  {
+  }
+};
+
+/* Here is |FineContainer| specialization for unfolded tensors. */
+class UnfoldedFineContainer : public FineContainer<UGSTensor>, public UnfoldedStackContainer
+{
+public:
+  UnfoldedFineContainer(const StackContainer<UGSTensor> &sc, int max)
+    : FineContainer<UGSTensor>(sc, max)
+  {
+  }
+};
+
+#endif
--- a/dynare++/tl/cc/fine_container.hweb
+++ b/dynare++/tl/cc/fine_container.hweb
@ -1,164 +0,0 @@
-@q $Id: fine_container.hweb 332 2005-07-15 13:41:48Z kamenik $ @>
-@q Copyright 2005, Ondra Kamenik @>
-
-@*2 Refined stack of containers. Start of {\tt fine\_container.h} file.
-
-This file defines a refinement of the stack container. It makes a
-vertical refinement of a given stack container, it refines only matrix
-items, the items which are always zero, or can be identity matrices
-are not refined.
-
-The refinement is done by a simple construction from the stack
-container being refined. A parameter is passed meaning a maximum size
-of each stack in the refined container. The resulting object is stack
-container, so everything works seamlessly.
-
-We define here a class for refinement of sizes |SizeRefinement|, this
-is purely an auxiliary class allowing us to write a code more
-concisely. The main class of this file is |FineContainer|, which
-corresponds to refining. The two more classes |FoldedFineContainer|
-and |UnfoldedFineContainer| are its specializations.
-
-NOTE: This code was implemented with a hope that it will help to cut
-down memory allocations during the Faa Di Bruno formula
-evaluation. However, it seems that this needs to be accompanied with a
-similar thing for tensor multidimensional index. Thus, the abstraction
-is not currently used, but it might be useful in future.
-
-@s SizeRefinement int
-@s FineContainer int
-@s FoldedFineContainer int
-@s UnfoldedFineContainer int
-
-@c
-#ifndef FINE_CONTAINER_H
-#define FINE_CONTAINER_H
-
-#include "stack_container.h"
-
-#include <vector>
-
-@<|SizeRefinement| class declaration@>;
-@<|FineContainer| class declaration@>;
-@<|FoldedFineContainer| class declaration@>;
-@<|UnfoldedFineContainer| class declaration@>;
-
-#endif
-
-@ This class splits the first |nc| elements of the given sequence |s|
-to a sequence not having items greater than given |max|. The remaining
-elements (those behind |nc|) are left untouched. It also remembers the
-mapping, i.e. for a given index in a new sequence, it is able to
-return a corresponding index in old sequence.
-
-@<|SizeRefinement| class declaration@>=
-class SizeRefinement {
-	vector<int> rsizes;
-	vector<int> ind_map;
-	int new_nc;
-public:@;
-	SizeRefinement(const IntSequence& s, int nc, int max);
-	int getRefSize(int i) const
-		{@+ return rsizes[i];@+}
-	int numRefinements() const
-		{@+ return rsizes.size();@+}
-	int getOldIndex(int i) const
-		{@+ return ind_map[i];@+}
-	int getNC() const
-		{@+ return new_nc;@+}
-};
-
-
-@ This main class of this class refines a given stack container, and
-inherits from the stack container. It also defines the |getType|
-method, which returns a type for a given stack as the type of the
-corresponding (old) stack of the former stack container.
-
-@<|FineContainer| class declaration@>=
-template <class _Ttype>@;
-class FineContainer : public SizeRefinement, public StackContainer<_Ttype> {
-protected:@;
-	typedef StackContainer<_Ttype> _Stype;
-	typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
-	typedef typename StackContainerInterface<_Ttype>::itype itype;
-	_Ctype** const ref_conts;
-	const _Stype& stack_cont;
-public:@;
-	@<|FineContainer| constructor@>;
-	@<|FineContainer| destructor@>;
-	itype getType(int i, const Symmetry& s) const
-		{@+ return stack_cont.getType(getOldIndex(i), s);@+}
-	
-};
-
-
-@ Here we construct the |SizeRefinement| and allocate space for the
-refined containers. Then, the containers are created and put to
-|conts| array. Note that the containers do not claim any further
-space, since all the tensors of the created containers are in-place
-submatrices.
-
-Here we use a dirty trick of converting |const| pointer to non-|const|
-pointer and passing it to a subtensor container constructor. The
-containers are stored in |ref_conts| and then in |conts| from
-|StackContainer|. However, this is safe since neither |ref_conts| nor
-|conts| are used in non-|const| contexts. For example,
-|StackContainer| has only a |const| method to return a member of
-|conts|.
-
-@<|FineContainer| constructor@>=
-FineContainer(const _Stype& sc, int max)
-	: SizeRefinement(sc.getStackSizes(), sc.numConts(), max),
-	  StackContainer<_Ttype>(numRefinements(), getNC()),
-	  ref_conts(new _Ctype*[getNC()]),
-	  stack_cont(sc)
-{
-	for (int i = 0; i < numRefinements(); i++)
-		_Stype::stack_sizes[i] = getRefSize(i);
-	_Stype::calculateOffsets();
-
-	int last_cont = -1;
-	int last_row = 0;
-	for (int i = 0; i < getNC(); i++) {
-		if (getOldIndex(i) != last_cont) {
-			last_cont = getOldIndex(i);
-			last_row = 0;
-		}
-		union {const _Ctype* c; _Ctype* n;} convert;
-		convert.c = stack_cont.getCont(last_cont);
-		ref_conts[i] = new _Ctype(last_row, _Stype::stack_sizes[i],
-								  *(convert.n));
-		_Stype::conts[i] = ref_conts[i];
-		last_row += _Stype::stack_sizes[i];
-	}
-}
-
-@ Here we deallocate the refined containers, and deallocate the array of refined containers.
-@<|FineContainer| destructor@>=
-virtual ~FineContainer()
-{
-	for (int i = 0; i < _Stype::numConts(); i++)
-		delete ref_conts[i];
-	delete [] ref_conts;
-}
-
-
-
-@ Here is |FineContainer| specialization for folded tensors.
-@<|FoldedFineContainer| class declaration@>=
-class FoldedFineContainer : public FineContainer<FGSTensor>, public FoldedStackContainer {
-public:@;
-	FoldedFineContainer(const StackContainer<FGSTensor>& sc, int max)
-		: FineContainer<FGSTensor>(sc, max) @+ {}
-};
-
-@ Here is |FineContainer| specialization for unfolded tensors.
-@<|UnfoldedFineContainer| class declaration@>=
-class UnfoldedFineContainer : public FineContainer<UGSTensor>, public UnfoldedStackContainer {
-public:@;
-	UnfoldedFineContainer(const StackContainer<UGSTensor>& sc, int max)
-		: FineContainer<UGSTensor>(sc, max) @+ {}
-};
-
-
-@ End of {\tt fine\_container.h} file.
--- a/dynare++/tl/cc/fs_tensor.cc
+++ b/dynare++/tl/cc/fs_tensor.cc
@ -0,0 +1,290 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "fs_tensor.hh"
+#include "gs_tensor.hh"
+#include "sparse_tensor.hh"
+#include "rfs_tensor.hh"
+#include "tl_exception.hh"
+
+/* This constructs a fully symmetric tensor as given by the contraction:
+   $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
+   \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
+
+   We go through all columns of output tensor $[g]$ and for each column
+   we cycle through all variables, insert a variable to the column
+   coordinates obtaining a column of tensor $[t]$. the column is multiplied
+   by an appropriate item of |x| and added to the column of $[g]$ tensor. */
+
+FFSTensor::FFSTensor(const FFSTensor &t, const ConstVector &x)
+  : FTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
+            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
+    nv(t.nvar())
+{
+  TL_RAISE_IF(t.dimen() < 1,
+              "Wrong dimension for tensor contraction of FFSTensor");
+  TL_RAISE_IF(t.nvar() != x.length(),
+              "Wrong number of variables for tensor contraction of FFSTensor");
+
+  zeros();
+
+  for (Tensor::index to = begin(); to != end(); ++to)
+    {
+      for (int i = 0; i < nvar(); i++)
+        {
+          IntSequence from_ind(i, to.getCoor());
+          Tensor::index from(&t, from_ind);
+          addColumn(x[i], t, *from, *to);
+        }
+    }
+}
+
+/* This returns number of indices for folded tensor with full
+   symmetry. Let $n$ be a number of variables |nvar| and $d$ the
+   dimension |dim|. Then the number of indices is $\pmatrix{n+d-1\cr d}$. */
+
+int
+FFSTensor::calcMaxOffset(int nvar, int d)
+{
+  if (nvar == 0 && d == 0)
+    return 1;
+  if (nvar == 0 && d > 0)
+    return 0;
+  return noverk(nvar + d - 1, d);
+}
+
+/* The conversion from sparse tensor is clear. We go through all the
+   tensor and write to the dense what is found. */
+FFSTensor::FFSTensor(const FSSparseTensor &t)
+  : FTensor(along_col, IntSequence(t.dimen(), t.nvar()),
+            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()), t.dimen()),
+    nv(t.nvar())
+{
+  zeros();
+  for (FSSparseTensor::const_iterator it = t.getMap().begin();
+       it != t.getMap().end(); ++it)
+    {
+      index ind(this, (*it).first);
+      get((*it).second.first, *ind) = (*it).second.second;
+    }
+}
+
+/* The conversion from unfolded copies only columns of respective
+   coordinates. So we go through all the columns in the folded tensor
+   (this), make an index of the unfolded vector from coordinates, and
+   copy the column. */
+
+FFSTensor::FFSTensor(const UFSTensor &ut)
+  : FTensor(along_col, IntSequence(ut.dimen(), ut.nvar()),
+            ut.nrows(), calcMaxOffset(ut.nvar(), ut.dimen()), ut.dimen()),
+    nv(ut.nvar())
+{
+  for (index in = begin(); in != end(); ++in)
+    {
+      index src(&ut, in.getCoor());
+      copyColumn(ut, *src, *in);
+    }
+}
+
+/* Here just make a new instance and return the reference. */
+UTensor &
+FFSTensor::unfold() const
+{
+  return *(new UFSTensor(*this));
+}
+
+/* Incrementing is easy. We have to increment by calling static method
+   |UTensor::increment| first. In this way, we have coordinates of
+   unfolded tensor. Then we have to skip to the closest folded index
+   which corresponds to monotonizeing the integer sequence. */
+
+void
+FFSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in FFSTensor::increment");
+
+  UTensor::increment(v, nv);
+  v.monotone();
+}
+
+/* Decrement calls static |FTensor::decrement|. */
+
+void
+FFSTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in FFSTensor::decrement");
+
+  FTensor::decrement(v, nv);
+}
+
+int
+FFSTensor::getOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in FFSTensor::getOffset");
+
+  return FTensor::getOffset(v, nv);
+}
+
+/* Here we add a general symmetry tensor to the (part of) full symmetry
+   tensor provided that the unique variable of the full symmetry tensor
+   is a stack of variables from the general symmetry tensor.
+
+   We check for the dimensions and number of variables. Then we calculate
+   a shift of coordinates when going from the general symmetry tensor to
+   full symmetry (it corresponds to shift of coordinates induces by
+   stacking the variables). Then we add the appropriate columns by going
+   through the columns in general symmetry, adding the shift and sorting. */
+
+void
+FFSTensor::addSubTensor(const FGSTensor &t)
+{
+  TL_RAISE_IF(dimen() != t.getDims().dimen(),
+              "Wrong dimensions for FFSTensor::addSubTensor");
+  TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
+              "Wrong nvs for FFSTensor::addSubTensor");
+
+  // set shift for |addSubTensor|
+  /* Code shared with UFSTensor::addSubTensor() */
+  IntSequence shift_pre(t.getSym().num(), 0);
+  for (int i = 1; i < t.getSym().num(); i++)
+    shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
+  IntSequence shift(t.getSym(), shift_pre);
+
+  for (Tensor::index ind = t.begin(); ind != t.end(); ++ind)
+    {
+      IntSequence c(ind.getCoor());
+      c.add(1, shift);
+      c.sort();
+      Tensor::index tar(this, c);
+      addColumn(t, *ind, *tar);
+    }
+}
+
+// |UFSTensor| contraction constructor
+/* This is a bit more straightforward than |@<|FFSTensor| contraction constructor@>|.
+   We do not add column by column but we do it by submatrices due to
+   regularity of the unfolded tensor. */
+
+UFSTensor::UFSTensor(const UFSTensor &t, const ConstVector &x)
+  : UTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
+            t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
+    nv(t.nvar())
+{
+  TL_RAISE_IF(t.dimen() < 1,
+              "Wrong dimension for tensor contraction of UFSTensor");
+  TL_RAISE_IF(t.nvar() != x.length(),
+              "Wrong number of variables for tensor contraction of UFSTensor");
+
+  zeros();
+
+  for (int i = 0; i < ncols(); i++)
+    {
+      ConstTwoDMatrix tpart(t, i *nvar(), nvar());
+      Vector outcol(*this, i);
+      tpart.multaVec(outcol, x);
+    }
+}
+
+/* Here we convert folded full symmetry tensor to unfolded. We copy all
+   columns of folded tensor, and then call |unfoldData()|. */
+
+UFSTensor::UFSTensor(const FFSTensor &ft)
+  : UTensor(along_col, IntSequence(ft.dimen(), ft.nvar()),
+            ft.nrows(), calcMaxOffset(ft.nvar(), ft.dimen()), ft.dimen()),
+    nv(ft.nvar())
+{
+  for (index src = ft.begin(); src != ft.end(); ++src)
+    {
+      index in(this, src.getCoor());
+      copyColumn(ft, *src, *in);
+    }
+  unfoldData();
+}
+
+/* Here we just return a reference to new instance of folded tensor. */
+FTensor &
+UFSTensor::fold() const
+{
+  return *(new FFSTensor(*this));
+}
+
+// |UFSTensor| increment and decrement
+/* Here we just call |UTensor| respective static methods. */
+void
+UFSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UFSTensor::increment");
+
+  UTensor::increment(v, nv);
+}
+
+void
+UFSTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UFSTensor::decrement");
+
+  UTensor::decrement(v, nv);
+}
+
+int
+UFSTensor::getOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in UFSTensor::getOffset");
+
+  return UTensor::getOffset(v, nv);
+}
+
+/* This is very similar to |@<|FFSTensor::addSubTensor| code@>|. The
+   only difference is the addition. We go through all columns in the full
+   symmetry tensor and cancel the shift. If the coordinates after the
+   cancellation are positive, we find the column in the general symmetry
+   tensor, and add it. */
+
+void
+UFSTensor::addSubTensor(const UGSTensor &t)
+{
+  TL_RAISE_IF(dimen() != t.getDims().dimen(),
+              "Wrong dimensions for UFSTensor::addSubTensor");
+  TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
+              "Wrong nvs for UFSTensor::addSubTensor");
+
+  // set shift for |addSubTensor|
+  /* Code shared with FFSTensor::addSubTensor() */
+  IntSequence shift_pre(t.getSym().num(), 0);
+  for (int i = 1; i < t.getSym().num(); i++)
+    shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
+  IntSequence shift(t.getSym(), shift_pre);
+
+  for (Tensor::index tar = begin(); tar != end(); ++tar)
+    {
+      IntSequence c(tar.getCoor());
+      c.sort();
+      c.add(-1, shift);
+      if (c.isPositive() && c.less(t.getDims().getNVX()))
+        {
+          Tensor::index from(&t, c);
+          addColumn(t, *from, *tar);
+        }
+    }
+}
+
+/* Here we go through all columns, find a column of folded index, and
+   then copy the column data. Finding the index is done by sorting the
+   integer sequence. */
+
+void
+UFSTensor::unfoldData()
+{
+  for (index in = begin(); in != end(); ++in)
+    {
+      IntSequence v(in.getCoor());
+      v.sort();
+      index tmp(this, v);
+      copyColumn(*tmp, *in);
+    }
+}
--- a/dynare++/tl/cc/fs_tensor.cweb
+++ b/dynare++/tl/cc/fs_tensor.cweb
@ -1,306 +0,0 @@
-@q $Id: fs_tensor.cweb 280 2005-06-13 09:40:02Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt fs\_tensor.cpp} file.
-
-@c
-#include "fs_tensor.h"
-#include "gs_tensor.h"
-#include "sparse_tensor.h"
-#include "rfs_tensor.h"
-#include "tl_exception.h"
-
-@<|FFSTensor| contraction constructor@>;
-@<|FFSTensor::calcMaxOffset| code@>;
-@<|FFSTensor| conversion from sparse@>;
-@<|FFSTensor| conversion from unfolded@>;
-@<|FFSTensor::unfold| code@>;
-@<|FFSTensor::increment| code@>;
-@<|FFSTensor::decrement| code@>;
-@<|FFSTensor::getOffset| code@>;
-@<|FFSTensor::addSubTensor| code@>;
-@<|UFSTensor| contraction constructor@>;
-@<|UFSTensor| conversion from folded@>;
-@<|UFSTensor::fold| code@>;
-@<|UFSTensor| increment and decrement@>;
-@<|UFSTensor::getOffset| code@>;
-@<|UFSTensor::addSubTensor| code@>;
-@<|UFSTensor::unfoldData| code@>;
-
-@ This constructs a fully symmetric tensor as given by the contraction:
-$$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
-\left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
-
-We go through all columns of output tensor $[g]$ and for each column
-we cycle through all variables, insert a variable to the column
-coordinates obtaining a column of tensor $[t]$. the column is multiplied
-by an appropriate item of |x| and added to the column of $[g]$ tensor.
-
-@<|FFSTensor| contraction constructor@>=
-FFSTensor::FFSTensor(const FFSTensor& t, const ConstVector& x)
-	: FTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
-			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
-	  nv(t.nvar())
-{
-	TL_RAISE_IF(t.dimen() < 1,
-				"Wrong dimension for tensor contraction of FFSTensor");
-	TL_RAISE_IF(t.nvar() != x.length(),
-				"Wrong number of variables for tensor contraction of FFSTensor");
-
-	zeros();
-
-	for (Tensor::index to = begin(); to != end(); ++to) {
-		for (int i = 0; i < nvar(); i++) {
-			IntSequence from_ind(i, to.getCoor());
-			Tensor::index from(&t, from_ind);
-			addColumn(x[i], t, *from, *to);
-		}
-	}
-}
-
-
-@ This returns number of indices for folded tensor with full
-symmetry. Let $n$ be a number of variables |nvar| and $d$ the
-dimension |dim|. Then the number of indices is $\pmatrix{n+d-1\cr d}$.
-  
-@<|FFSTensor::calcMaxOffset| code@>=
-int FFSTensor::calcMaxOffset(int nvar, int d)
-{
-	if (nvar == 0 && d == 0)
-		return 1;
-	if (nvar == 0 && d > 0)
-		return 0;
-	return noverk(nvar + d - 1, d);
-}
-
-@ The conversion from sparse tensor is clear. We go through all the
-tensor and write to the dense what is found.
-@<|FFSTensor| conversion from sparse@>=
-FFSTensor::FFSTensor(const FSSparseTensor& t)
-	: FTensor(along_col, IntSequence(t.dimen(), t.nvar()),
-			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()), t.dimen()),
-	  nv(t.nvar())
-{
-	zeros();
-	for (FSSparseTensor::const_iterator it = t.getMap().begin();
-		 it != t.getMap().end(); ++it) {
-		index ind(this, (*it).first);
-		get((*it).second.first, *ind) = (*it).second.second;
-	}
-}
-
-
-@ The conversion from unfolded copies only columns of respective
-coordinates. So we go through all the columns in the folded tensor
-(this), make an index of the unfolded vector from coordinates, and
-copy the column.
- 
-@<|FFSTensor| conversion from unfolded@>=
-FFSTensor::FFSTensor(const UFSTensor& ut)
-	: FTensor(along_col, IntSequence(ut.dimen(), ut.nvar()),
-			  ut.nrows(), calcMaxOffset(ut.nvar(), ut.dimen()), ut.dimen()),
-	  nv(ut.nvar())
-{
-	for (index in = begin(); in != end(); ++in) {
-		index src(&ut, in.getCoor());
-		copyColumn(ut, *src, *in);
-	}
-}
-
-@ Here just make a new instance and return the reference.
-@<|FFSTensor::unfold| code@>=
-UTensor& FFSTensor::unfold() const
-{
-	return *(new UFSTensor(*this));
-}
-
-@ Incrementing is easy. We have to increment by calling static method
-|UTensor::increment| first. In this way, we have coordinates of
-unfolded tensor. Then we have to skip to the closest folded index
-which corresponds to monotonizeing the integer sequence.
-
-@<|FFSTensor::increment| code@>=
-void FFSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in FFSTensor::increment");
-
-	UTensor::increment(v, nv);
-	v.monotone();
-}
-
-@ Decrement calls static |FTensor::decrement|.
-
-@<|FFSTensor::decrement| code@>=
-void FFSTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in FFSTensor::decrement");
-
-	FTensor::decrement(v, nv);
-}
-
-@ 
-@<|FFSTensor::getOffset| code@>=
-int FFSTensor::getOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in FFSTensor::getOffset");
-
-	return FTensor::getOffset(v, nv);
-}
-
-@ Here we add a general symmetry tensor to the (part of) full symmetry
-tensor provided that the unique variable of the full symmetry tensor
-is a stack of variables from the general symmetry tensor.
-
-We check for the dimensions and number of variables. Then we calculate
-a shift of coordinates when going from the general symmetry tensor to
-full symmetry (it corresponds to shift of coordinates induces by
-stacking the variables). Then we add the appropriate columns by going
-through the columns in general symmetry, adding the shift and sorting.
-
-@<|FFSTensor::addSubTensor| code@>=
-void FFSTensor::addSubTensor(const FGSTensor& t)
-{
-	TL_RAISE_IF(dimen() != t.getDims().dimen(),
-				"Wrong dimensions for FFSTensor::addSubTensor");
-	TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
-				"Wrong nvs for FFSTensor::addSubTensor");
-
-	@<set shift for |addSubTensor|@>;
-	for (Tensor::index ind = t.begin(); ind != t.end(); ++ind) {
-		IntSequence c(ind.getCoor());
-		c.add(1, shift);
-		c.sort();
-		Tensor::index tar(this, c);
-		addColumn(t, *ind, *tar);
-	}
-}
-
-@ 
-@<set shift for |addSubTensor|@>=
-	IntSequence shift_pre(t.getSym().num(), 0);
-	for (int i = 1; i < t.getSym().num(); i++)
-		shift_pre[i] = shift_pre[i-1]+t.getDims().getNVS()[i-1];
-	IntSequence shift(t.getSym(), shift_pre);
-
-@ This is a bit more straightforward than |@<|FFSTensor| contraction constructor@>|.
-We do not add column by column but we do it by submatrices due to
-regularity of the unfolded tensor.
- 
-@<|UFSTensor| contraction constructor@>=
-UFSTensor::UFSTensor(const UFSTensor& t, const ConstVector& x)
-	: UTensor(along_col, IntSequence(t.dimen()-1, t.nvar()),
-			  t.nrows(), calcMaxOffset(t.nvar(), t.dimen()-1), t.dimen()-1),
-	  nv(t.nvar())
-{
-	TL_RAISE_IF(t.dimen() < 1,
-				"Wrong dimension for tensor contraction of UFSTensor");
-	TL_RAISE_IF(t.nvar() != x.length(),
-				"Wrong number of variables for tensor contraction of UFSTensor");
-
-	zeros();
-
-	for (int i = 0; i < ncols(); i++) {
-		ConstTwoDMatrix tpart(t, i*nvar(), nvar());
-		Vector outcol(*this, i);
-		tpart.multaVec(outcol, x);
-	}
-}
-
-@ Here we convert folded full symmetry tensor to unfolded. We copy all
-columns of folded tensor, and then call |unfoldData()|.
-
-@<|UFSTensor| conversion from folded@>=
-UFSTensor::UFSTensor(const FFSTensor& ft)
-	: UTensor(along_col, IntSequence(ft.dimen(), ft.nvar()),
-			  ft.nrows(), calcMaxOffset(ft.nvar(), ft.dimen()), ft.dimen()),
-	  nv(ft.nvar())
-{
-	for (index src = ft.begin(); src != ft.end(); ++src) {
-		index in(this, src.getCoor());
-		copyColumn(ft, *src, *in);
-	}
-	unfoldData();
-}
-
-@ Here we just return a reference to new instance of folded tensor.
-@<|UFSTensor::fold| code@>=
-FTensor& UFSTensor::fold() const
-{
-	return *(new FFSTensor(*this));
-}
-
-@ Here we just call |UTensor| respective static methods.
-@<|UFSTensor| increment and decrement@>=
-void UFSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UFSTensor::increment");
-
-	UTensor::increment(v, nv);
-}
-
-void UFSTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UFSTensor::decrement");
-
-	UTensor::decrement(v, nv);
-}
-
-@ 
-@<|UFSTensor::getOffset| code@>=
-int UFSTensor::getOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in UFSTensor::getOffset");
-
-	return UTensor::getOffset(v, nv);
-}
-
-@ This is very similar to |@<|FFSTensor::addSubTensor| code@>|. The
-only difference is the addition. We go through all columns in the full
-symmetry tensor and cancel the shift. If the coordinates after the
-cancellation are positive, we find the column in the general symmetry
-tensor, and add it.
-
-@<|UFSTensor::addSubTensor| code@>=
-void UFSTensor::addSubTensor(const UGSTensor& t)
-{
-	TL_RAISE_IF(dimen() != t.getDims().dimen(),
-				"Wrong dimensions for UFSTensor::addSubTensor");
-	TL_RAISE_IF(nvar() != t.getDims().getNVS().sum(),
-				"Wrong nvs for UFSTensor::addSubTensor");
-
-	@<set shift for |addSubTensor|@>;
-	for (Tensor::index tar = begin(); tar != end(); ++tar) {
-		IntSequence c(tar.getCoor());
-		c.sort();
-		c.add(-1, shift);
-		if (c.isPositive() && c.less(t.getDims().getNVX())) {
-			Tensor::index from(&t, c);
-			addColumn(t, *from, *tar);
-		}
-	}
-}
-
-
-@ Here we go through all columns, find a column of folded index, and
-then copy the column data. Finding the index is done by sorting the
-integer sequence.
-
-@<|UFSTensor::unfoldData| code@>=
-void UFSTensor::unfoldData()
-{
-	for (index in = begin(); in != end(); ++in) {
-		IntSequence v(in.getCoor());
-		v.sort();
-		index tmp(this, v);
-		copyColumn(*tmp, *in);
-	}
-}
-
-
-@ End of {\tt fs\_tensor.cpp} file.
--- a/dynare++/tl/cc/fs_tensor.hh
+++ b/dynare++/tl/cc/fs_tensor.hh
@ -0,0 +1,141 @@
+// Copyright 2004, Ondra Kamenik
+
+// Full symmetry tensor.
+
+/* Here we define folded and unfolded tensors for full symmetry. All
+   tensors from here are identifying the multidimensional index with
+   columns. */
+
+#ifndef FS_TENSOR_H
+#define FS_TENSOR_H
+
+#include "tensor.hh"
+#include "symmetry.hh"
+
+class FGSTensor;
+class UGSTensor;
+class FRSingleTensor;
+class FSSparseTensor;
+
+/* Folded tensor with full symmetry maintains only information about
+   number of symmetrical variables |nv|. Further, we implement what is
+   left from the super class |FTensor|.
+
+   We implement |getOffset| which should be used with care since
+   its complexity.
+
+   We implement a method adding a given general symmetry tensor to the
+   full symmetry tensor supposing the variables of the general symmetry
+   tensor are stacked giving only one variable of the full symmetry
+   tensor. For instance, if $x=[y^T, u^T]^T$, then we can add tensor
+   $\left[g_{y^2u}\right]$ to tensor $g_{x^3}$. This is done in method
+   |addSubTensor|. Consult |@<|FGSTensor| class declaration@>| to know
+   what is general symmetry tensor. */
+
+class UFSTensor;
+class FFSTensor : public FTensor
+{
+  int nv;
+public:
+  /* Here are the constructors. The second constructor constructs a
+     tensor by one-dimensional contraction from the higher dimensional
+     tensor |t|. This is, it constructs a tensor
+     $$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
+     \left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
+     See implementation |@<|FFSTensor| contraction constructor@>| for details.
+
+     The next constructor converts from sparse tensor (which is fully
+     symmetric and folded by nature).
+
+     The fourth constructs object from unfolded fully symmetric.
+
+     The fifth constructs a subtensor of selected rows. */
+
+  FFSTensor(int r, int nvar, int d)
+    : FTensor(along_col, IntSequence(d, nvar),
+              r, calcMaxOffset(nvar, d), d), nv(nvar)
+  {
+  }
+  FFSTensor(const FFSTensor &t, const ConstVector &x);
+  FFSTensor(const FSSparseTensor &t);
+  FFSTensor(const FFSTensor &ft)
+    : FTensor(ft), nv(ft.nv)
+  {
+  }
+  FFSTensor(const UFSTensor &ut);
+  FFSTensor(int first_row, int num, FFSTensor &t)
+    : FTensor(first_row, num, t), nv(t.nv)
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  UTensor&unfold() const;
+  Symmetry
+  getSym() const
+  {
+    return Symmetry(dimen());
+  }
+
+  int getOffset(const IntSequence &v) const;
+  void addSubTensor(const FGSTensor &t);
+  int
+  nvar() const
+  {
+    return nv;
+  }
+  static int calcMaxOffset(int nvar, int d);
+};
+
+/* Unfolded fully symmetric tensor is almost the same in structure as
+   |FFSTensor|, but the method |unfoldData|. It takes columns which also
+   exist in folded version and copies them to all their symmetrical
+   locations. This is useful when constructing unfolded tensor from
+   folded one. */
+
+class UFSTensor : public UTensor
+{
+  int nv;
+public:
+  UFSTensor(int r, int nvar, int d)
+    : UTensor(along_col, IntSequence(d, nvar),
+              r, calcMaxOffset(nvar, d), d), nv(nvar)
+  {
+  }
+  UFSTensor(const UFSTensor &t, const ConstVector &x);
+  UFSTensor(const UFSTensor &ut)
+    : UTensor(ut), nv(ut.nv)
+  {
+  }
+  UFSTensor(const FFSTensor &ft);
+  UFSTensor(int first_row, int num, UFSTensor &t)
+    : UTensor(first_row, num, t), nv(t.nv)
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  FTensor&fold() const;
+  Symmetry
+  getSym() const
+  {
+    return Symmetry(dimen());
+  }
+
+  int getOffset(const IntSequence &v) const;
+  void addSubTensor(const UGSTensor &t);
+  int
+  nvar() const
+  {
+    return nv;
+  }
+  static int
+  calcMaxOffset(int nvar, int d)
+  {
+    return power(nvar, d);
+  }
+private:
+  void unfoldData();
+};
+
+#endif
--- a/dynare++/tl/cc/fs_tensor.hweb
+++ b/dynare++/tl/cc/fs_tensor.hweb
@ -1,129 +0,0 @@
-@q $Id: fs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Full symmetry tensor. Start of {\tt fs\_tensor.h} file.
-
-Here we define folded and unfolded tensors for full symmetry. All
-tensors from here are identifying the multidimensional index with
-columns.
-
-@c
-#ifndef FS_TENSOR_H
-#define FS_TENSOR_H
-
-#include "tensor.h"
-#include "symmetry.h"
-
-class FGSTensor;
-class UGSTensor;
-class FRSingleTensor;
-class FSSparseTensor;
-@<|FFSTensor| class declaration@>;
-@<|UFSTensor| class declaration@>;
-
-#endif
-
-
-@ Folded tensor with full symmetry maintains only information about
-number of symmetrical variables |nv|. Further, we implement what is
-left from the super class |FTensor|.
-
-We implement |getOffset| which should be used with care since
-its complexity.
-
-We implement a method adding a given general symmetry tensor to the
-full symmetry tensor supposing the variables of the general symmetry
-tensor are stacked giving only one variable of the full symmetry
-tensor. For instance, if $x=[y^T, u^T]^T$, then we can add tensor
-$\left[g_{y^2u}\right]$ to tensor $g_{x^3}$. This is done in method
-|addSubTensor|. Consult |@<|FGSTensor| class declaration@>| to know
-what is general symmetry tensor.
-
-@<|FFSTensor| class declaration@>=
-class UFSTensor;
-class FFSTensor : public FTensor {
-	int nv;
-public:@;
-    @<|FFSTensor| constructor declaration@>;
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	UTensor& unfold() const;
-	Symmetry getSym() const
-		{@+ return Symmetry(dimen());@+}
-
-	int getOffset(const IntSequence& v) const;
-	void addSubTensor(const FGSTensor& t);
-	int nvar() const
-		{@+return nv;@+}
-	static int calcMaxOffset(int nvar, int d);
-};
-
-@ Here are the constructors. The second constructor constructs a
-tensor by one-dimensional contraction from the higher dimensional
-tensor |t|. This is, it constructs a tensor
-$$\left[g_{y^n}\right]_{\alpha_1\ldots\alpha_n}=
-\left[t_{y^{n+1}}\right]_{\alpha_1\ldots\alpha_n\beta}[x]^\beta$$
-See implementation |@<|FFSTensor| contraction constructor@>| for details.
-
-The next constructor converts from sparse tensor (which is fully
-symmetric and folded by nature).
-
-The fourth constructs object from unfolded fully symmetric.
-
-The fifth constructs a subtensor of selected rows.
-
-@<|FFSTensor| constructor declaration@>=
-	FFSTensor(int r, int nvar, int d)
-		: FTensor(along_col, IntSequence(d, nvar),
-				  r, calcMaxOffset(nvar, d), d), nv(nvar)@+ {}
-	FFSTensor(const FFSTensor& t, const ConstVector& x);
-	FFSTensor(const FSSparseTensor& t);
-	FFSTensor(const FFSTensor& ft)
-		: FTensor(ft), nv(ft.nv)@+ {}
-	FFSTensor(const UFSTensor& ut);
-	FFSTensor(int first_row, int num, FFSTensor& t)
-		: FTensor(first_row, num, t), nv(t.nv)@+ {}
-
-
-@ Unfolded fully symmetric tensor is almost the same in structure as
-|FFSTensor|, but the method |unfoldData|. It takes columns which also
-exist in folded version and copies them to all their symmetrical
-locations. This is useful when constructing unfolded tensor from
-folded one.
-
-@<|UFSTensor| class declaration@>=
-class UFSTensor : public UTensor {
-	int nv;
-public:@;
-	@<|UFSTensor| constructor declaration@>;
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	FTensor& fold() const;
-	Symmetry getSym() const
-		{@+ return Symmetry(dimen());@+}
-
-	int getOffset(const IntSequence& v) const;
-	void addSubTensor(const UGSTensor& t);
-	int nvar() const
-		{@+ return nv;@+}
-	static int calcMaxOffset(int nvar, int d)
-		{@+ return power(nvar, d);@+}
-private:@;
-	void unfoldData();
-};
-
-@ 
-@<|UFSTensor| constructor declaration@>=
-	UFSTensor(int r, int nvar, int d)
-		: UTensor(along_col, IntSequence(d, nvar),
-				  r, calcMaxOffset(nvar, d), d), nv(nvar)@+ {}
-	UFSTensor(const UFSTensor& t, const ConstVector& x);
-	UFSTensor(const UFSTensor& ut)
-		: UTensor(ut), nv(ut.nv)@+ {}
-	UFSTensor(const FFSTensor& ft);
-	UFSTensor(int first_row, int num, UFSTensor& t)
-		: UTensor(first_row, num, t), nv(t.nv)@+ {}
-
-@ End of {\tt fs\_tensor.h} file.
--- a/dynare++/tl/cc/gs_tensor.cc
+++ b/dynare++/tl/cc/gs_tensor.cc
@ -0,0 +1,490 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "gs_tensor.hh"
+#include "sparse_tensor.hh"
+#include "tl_exception.hh"
+#include "kron_prod.hh"
+
+/* This constructs the tensor dimensions for slicing. See
+   |@<|TensorDimens| class declaration@>| for details. */
+TensorDimens::TensorDimens(const IntSequence &ss, const IntSequence &coor)
+  : nvs(ss),
+    sym(ss.size(), ""),
+    nvmax(coor.size(), 0)
+{
+  TL_RAISE_IF(!coor.isSorted(),
+              "Coordinates not sorted in TensorDimens slicing constructor");
+  TL_RAISE_IF(coor[0] < 0 || coor[coor.size()-1] >= ss.size(),
+              "A coordinate out of stack range in TensorDimens slicing constructor");
+
+  for (int i = 0; i < coor.size(); i++)
+    {
+      sym[coor[i]]++;
+      nvmax[i] = ss[coor[i]];
+    }
+}
+
+/* Number of unfold offsets is a product of all members of |nvmax|. */
+int
+TensorDimens::calcUnfoldMaxOffset() const
+{
+  return nvmax.mult();
+}
+
+/* Number of folded offsets is a product of all unfold offsets within
+   each equivalence class of the symmetry. */
+
+int
+TensorDimens::calcFoldMaxOffset() const
+{
+  int res = 1;
+  for (int i = 0; i < nvs.size(); i++)
+    {
+      if (nvs[i] == 0 && sym[i] > 0)
+        return 0;
+      if (sym[i] > 0)
+        res *= Tensor::noverk(nvs[i]+sym[i]-1, sym[i]);
+    }
+  return res;
+}
+
+/* Here we implement offset calculation for folded general symmetry
+   tensor. The offset of a given sequence is calculated by breaking the
+   sequence to subsequences according to the symmetry. The offset is
+   orthogonal with respect to the blocks, this means that indexing within
+   the blocks is independent. If there are two blocks, for instance, then
+   the offset will be an offset within the outer block (the first)
+   multiplied with all offsets of the inner block (last) plus an offset
+   within the second block.
+
+   Generally, the resulting offset $r$ will be
+   $$\sum_{i=1}^s r_i\cdot\left(\prod_{j=i+1}^sn_j\right),$$
+   where $s$ is a number of blocks (|getSym().num()|), $r_i$ is an offset
+   within $i$-th block, and $n_j$ is a number of all offsets in $j$-th
+   block.
+
+   In the code, we go from the innermost to the outermost, maintaining the
+   product in |pow|. */
+
+int
+TensorDimens::calcFoldOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in TensorDimens::getFoldOffset");
+
+  int res = 0;
+  int pow = 1;
+  int blstart = v.size();
+  for (int ibl = getSym().num()-1; ibl >= 0; ibl--)
+    {
+      int bldim = getSym()[ibl];
+      if (bldim > 0)
+        {
+          blstart -= bldim;
+          int blnvar = getNVX()[blstart];
+          IntSequence subv(v, blstart, blstart+bldim);
+          res += FTensor::getOffset(subv, blnvar)*pow;
+          pow *= FFSTensor::calcMaxOffset(blnvar, bldim);
+        }
+    }
+  TL_RAISE_IF(blstart != 0,
+              "Error in tracing symmetry in TensorDimens::getFoldOffset");
+  return res;
+}
+
+/* In order to find the predecessor of index within folded generally
+   symmetric tensor, note, that a decrease action in $i$-th partition of
+   symmetric indices can happen only if all indices in all subsequent
+   partitions are zero. Then the decrease action of whole the index
+   consists of decrease action of the first nonzero partition from the
+   right, and setting these trailing zero partitions to their maximum
+   indices.
+
+   So we set |iblock| to the number of last partitions. During the
+   execution, |block_first|, and |block_last| will point to the first
+   element of |iblock| and, first element of following block.
+
+   Then we check for all trailing zero partitions, set them to their
+   maximums and return |iblock| to point to the first non-zero partition
+   (or the first partition). Then for this partition, we decrease the
+   index (fully symmetric within that partition). */
+
+void
+TensorDimens::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(getNVX().size() != v.size(),
+              "Wrong size of input/output sequence in TensorDimens::decrement");
+
+  int iblock = getSym().num()-1;
+  int block_last = v.size();
+  int block_first = block_last-getSym()[iblock];
+
+  // check for zero trailing blocks
+  while (iblock > 0 && v[block_last-1] == 0)
+    {
+      for (int i = block_first; i < block_last; i++)
+        v[i] = getNVX(i); // equivalent to |nvs[iblock]|
+      iblock--;
+      block_last = block_first;
+      block_first -= getSym()[iblock];
+    }
+
+  // decrease the non-zero block
+  IntSequence vtmp(v, block_first, block_last);
+  FTensor::decrement(vtmp, getNVX(block_first));
+}
+
+// |FGSTensor| conversion from |UGSTensor|
+/* Here we go through columns of folded, calculate column of unfolded,
+   and copy data. */
+FGSTensor::FGSTensor(const UGSTensor &ut)
+  : FTensor(along_col, ut.tdims.getNVX(), ut.nrows(),
+            ut.tdims.calcFoldMaxOffset(), ut.dimen()),
+    tdims(ut.tdims)
+{
+  for (index ti = begin(); ti != end(); ++ti)
+    {
+      index ui(&ut, ti.getCoor());
+      copyColumn(ut, *ui, *ti);
+    }
+}
+
+// |FGSTensor| slicing from |FSSparseTensor|
+/* Here is the code of slicing constructor from the sparse tensor. We
+   first calculate coordinates of first and last index of the slice
+   within the sparse tensor (these are |lb| and |ub|), and then we
+   iterate through all items between them (in lexicographical ordering of
+   sparse tensor), and check whether an item is between the |lb| and |ub|
+   in Cartesian ordering (this corresponds to belonging to the
+   slices). If it belongs, then we subtract the lower bound |lb| to
+   obtain coordinates in the |this| tensor and we copy the item. */
+FGSTensor::FGSTensor(const FSSparseTensor &t, const IntSequence &ss,
+                     const IntSequence &coor, const TensorDimens &td)
+  : FTensor(along_col, td.getNVX(), t.nrows(),
+            td.calcFoldMaxOffset(), td.dimen()),
+    tdims(td)
+{
+  // set |lb| and |ub| to lower and upper bounds of indices
+  /* Here we first set |s_offsets| to offsets of partitions whose lengths
+     are given by |ss|. So |s_offsets| is a cumulative sum of |ss|.
+
+     Then we create |lb| to be coordinates of the possibly first index from
+     the slice, and |ub| to be coordinates of possibly last index of the
+     slice. */
+  IntSequence s_offsets(ss.size(), 0);
+  for (int i = 1; i < ss.size(); i++)
+    s_offsets[i] = s_offsets[i-1] + ss[i-1];
+
+  IntSequence lb(coor.size());
+  IntSequence ub(coor.size());
+  for (int i = 0; i < coor.size(); i++)
+    {
+      lb[i] = s_offsets[coor[i]];
+      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
+    }
+
+  zeros();
+  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
+  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
+  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
+    {
+      if (lb.lessEq((*run).first) && (*run).first.lessEq(ub))
+        {
+          IntSequence c((*run).first);
+          c.add(-1, lb);
+          Tensor::index ind(this, c);
+          TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
+                      "Internal error in slicing constructor of FGSTensor");
+          get((*run).second.first, *ind) = (*run).second.second;
+        }
+    }
+}
+
+// |FGSTensor| slicing from |FFSTensor|
+/* The code is similar to |@<|FGSTensor| slicing from |FSSparseTensor|@>|. */
+FGSTensor::FGSTensor(const FFSTensor &t, const IntSequence &ss,
+                     const IntSequence &coor, const TensorDimens &td)
+  : FTensor(along_col, td.getNVX(), t.nrows(),
+            td.calcFoldMaxOffset(), td.dimen()),
+    tdims(td)
+{
+  if (ncols() == 0)
+    return;
+
+  // set |lb| and |ub| to lower and upper bounds of indices
+  /* Same code as in the previous converting constructor */
+  IntSequence s_offsets(ss.size(), 0);
+  for (int i = 1; i < ss.size(); i++)
+    s_offsets[i] = s_offsets[i-1] + ss[i-1];
+
+  IntSequence lb(coor.size());
+  IntSequence ub(coor.size());
+  for (int i = 0; i < coor.size(); i++)
+    {
+      lb[i] = s_offsets[coor[i]];
+      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
+    }
+
+  zeros();
+  Tensor::index lbi(&t, lb);
+  Tensor::index ubi(&t, ub);
+  ++ubi;
+  for (Tensor::index run = lbi; run != ubi; ++run)
+    {
+      if (lb.lessEq(run.getCoor()) && run.getCoor().lessEq(ub))
+        {
+          IntSequence c(run.getCoor());
+          c.add(-1, lb);
+          Tensor::index ind(this, c);
+          TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
+                      "Internal error in slicing constructor of FGSTensor");
+          copyColumn(t, *run, *ind);
+        }
+    }
+}
+
+// |FGSTensor| conversion from |GSSparseTensor|
+FGSTensor::FGSTensor(const GSSparseTensor &t)
+  : FTensor(along_col, t.getDims().getNVX(), t.nrows(),
+            t.getDims().calcFoldMaxOffset(), t.dimen()), tdims(t.getDims())
+{
+  zeros();
+  for (FSSparseTensor::const_iterator it = t.getMap().begin();
+       it != t.getMap().end(); ++it)
+    {
+      index ind(this, (*it).first);
+      get((*it).second.first, *ind) = (*it).second.second;
+    }
+}
+
+/* First we increment as unfolded, then we must monotonize within
+   partitions defined by the symmetry. This is done by
+   |IntSequence::pmonotone|. */
+
+void
+FGSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in FGSTensor::increment");
+
+  UTensor::increment(v, tdims.getNVX());
+  v.pmonotone(tdims.getSym());
+}
+
+/* Return unfolded version of the tensor. */
+UTensor &
+FGSTensor::unfold() const
+{
+  return *(new UGSTensor(*this));
+}
+
+/* Here we implement the contraction
+   $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
+   \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
+   \left[c\right]^{\beta_1\ldots\beta_j}
+   $$
+   More generally, $x^i$ and $z^k$ can represent also general symmetries.
+
+   The operation can be rewritten as a matrix product
+   $$\left[t_{x^iy^jz^k}\right]\cdot\left(I_l\otimes c\otimes I_r\right)$$
+   where $l$ is a number of columns in tensor with symmetry on the left
+   (i.e. $x^i$), and $r$ is a number of columns in tensor with a symmetry
+   on the right (i.e. $z^k$). The code proceeds accordingly. We first
+   form two symmetries |sym_left| and |sym_right|, then calculate the
+   number of columns |dleft|$=l$ and |dright|$=r$, form the Kronecker
+   product and multiply and add.
+
+   The input parameter |i| is the order of a variable being contracted
+   starting from 0. */
+
+void
+FGSTensor::contractAndAdd(int i, FGSTensor &out,
+                          const FRSingleTensor &col) const
+{
+  TL_RAISE_IF(i < 0 || i >= getSym().num(),
+              "Wrong index for FGSTensor::contractAndAdd");
+
+  TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
+              "Wrong dimensions for FGSTensor::contractAndAdd");
+
+  // set |sym_left| and |sym_right| to symmetries around |i|
+  /* Here we have a symmetry of |this| tensor and we have to set
+     |sym_left| to the subsymmetry left from the |i|-th variable and
+     |sym_right| to the subsymmetry right from the |i|-th variable. So we
+     copy first all the symmetry and then put zeros to the left for
+     |sym_right| and to the right for |sym_left|. */
+  Symmetry sym_left(getSym());
+  Symmetry sym_right(getSym());
+  for (int j = 0; j < getSym().num(); j++)
+    {
+      if (j <= i)
+        sym_right[j] = 0;
+      if (j >= i)
+        sym_left[j] = 0;
+    }
+
+  int dleft = TensorDimens(sym_left, tdims.getNVS()).calcFoldMaxOffset();
+  int dright = TensorDimens(sym_right, tdims.getNVS()).calcFoldMaxOffset();
+  KronProdAll kp(3);
+  kp.setUnit(0, dleft);
+  kp.setMat(1, col);
+  kp.setUnit(2, dright);
+  FGSTensor tmp(out.nrows(), out.getDims());
+  kp.mult(*this, tmp);
+  out.add(1.0, tmp);
+}
+
+/* Here we go through folded tensor, and each index we convert to index
+   of the unfolded tensor and copy the data to the unfolded. Then we
+   unfold data within the unfolded tensor. */
+
+UGSTensor::UGSTensor(const FGSTensor &ft)
+  : UTensor(along_col, ft.tdims.getNVX(), ft.nrows(),
+            ft.tdims.calcUnfoldMaxOffset(), ft.dimen()),
+    tdims(ft.tdims)
+{
+  for (index fi = ft.begin(); fi != ft.end(); ++fi)
+    {
+      index ui(this, fi.getCoor());
+      copyColumn(ft, *fi, *ui);
+    }
+  unfoldData();
+}
+
+// |UGSTensor| slicing from |FSSparseTensor|
+/* This makes a folded slice from the sparse tensor and unfolds it. */
+UGSTensor::UGSTensor(const FSSparseTensor &t, const IntSequence &ss,
+                     const IntSequence &coor, const TensorDimens &td)
+  : UTensor(along_col, td.getNVX(), t.nrows(),
+            td.calcUnfoldMaxOffset(), td.dimen()),
+    tdims(td)
+{
+  if (ncols() == 0)
+    return;
+
+  FGSTensor ft(t, ss, coor, td);
+  for (index fi = ft.begin(); fi != ft.end(); ++fi)
+    {
+      index ui(this, fi.getCoor());
+      copyColumn(ft, *fi, *ui);
+    }
+  unfoldData();
+}
+
+// |UGSTensor| slicing from |UFSTensor|
+/* This makes a folded slice from dense and unfolds it. */
+UGSTensor::UGSTensor(const UFSTensor &t, const IntSequence &ss,
+                     const IntSequence &coor, const TensorDimens &td)
+  : UTensor(along_col, td.getNVX(), t.nrows(),
+            td.calcUnfoldMaxOffset(), td.dimen()),
+    tdims(td)
+{
+  FFSTensor folded(t);
+  FGSTensor ft(folded, ss, coor, td);
+  for (index fi = ft.begin(); fi != ft.end(); ++fi)
+    {
+      index ui(this, fi.getCoor());
+      copyColumn(ft, *fi, *ui);
+    }
+  unfoldData();
+}
+
+// |UGSTensor| increment and decrement codes
+/* Clear, just call |UTensor| static methods. */
+void
+UGSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UGSTensor::increment");
+
+  UTensor::increment(v, tdims.getNVX());
+}
+
+void
+UGSTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UGSTensor::decrement");
+
+  UTensor::decrement(v, tdims.getNVX());
+}
+
+/* Return a new instance of folded version. */
+FTensor &
+UGSTensor::fold() const
+{
+  return *(new FGSTensor(*this));
+}
+
+/* Return an offset of a given index. */
+int
+UGSTensor::getOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in UGSTensor::getOffset");
+
+  return UTensor::getOffset(v, tdims.getNVX());
+}
+
+/* Unfold all data. We go through all the columns and for each we
+   obtain an index of the first equivalent, and copy the data. */
+
+void
+UGSTensor::unfoldData()
+{
+  for (index in = begin(); in != end(); ++in)
+    copyColumn(*(getFirstIndexOf(in)), *in);
+}
+
+/* Here we return the first index which is equivalent in the symmetry
+   to the given index. It is a matter of sorting all the symmetry
+   partitions of the index. */
+
+Tensor::index
+UGSTensor::getFirstIndexOf(const index &in) const
+{
+  IntSequence v(in.getCoor());
+  int last = 0;
+  for (int i = 0; i < tdims.getSym().num(); i++)
+    {
+      IntSequence vtmp(v, last, last+tdims.getSym()[i]);
+      vtmp.sort();
+      last += tdims.getSym()[i];
+    }
+  return index(this, v);
+}
+
+/* Here is perfectly same code with the same semantics as in
+   |@<|FGSTensor::contractAndAdd| code@>|. */
+
+void
+UGSTensor::contractAndAdd(int i, UGSTensor &out,
+                          const URSingleTensor &col) const
+{
+  TL_RAISE_IF(i < 0 || i >= getSym().num(),
+              "Wrong index for UGSTensor::contractAndAdd");
+  TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
+              "Wrong dimensions for UGSTensor::contractAndAdd");
+
+  // set |sym_left| and |sym_right| to symmetries around |i|
+  /* Same code as in FGSTensor::contractAndAdd */
+  Symmetry sym_left(getSym());
+  Symmetry sym_right(getSym());
+  for (int j = 0; j < getSym().num(); j++)
+    {
+      if (j <= i)
+        sym_right[j] = 0;
+      if (j >= i)
+        sym_left[j] = 0;
+    }
+
+  int dleft = TensorDimens(sym_left, tdims.getNVS()).calcUnfoldMaxOffset();
+  int dright = TensorDimens(sym_right, tdims.getNVS()).calcUnfoldMaxOffset();
+  KronProdAll kp(3);
+  kp.setUnit(0, dleft);
+  kp.setMat(1, col);
+  kp.setUnit(2, dright);
+  UGSTensor tmp(out.nrows(), out.getDims());
+  kp.mult(*this, tmp);
+  out.add(1.0, tmp);
+}
--- a/dynare++/tl/cc/gs_tensor.cweb
+++ b/dynare++/tl/cc/gs_tensor.cweb
@ -1,501 +0,0 @@
-@q $Id: gs_tensor.cweb 425 2005-08-16 15:18:01Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt gs\_tensor.cpp} file.
-
-@c
-#include "gs_tensor.h"
-#include "sparse_tensor.h"
-#include "tl_exception.h"
-#include "kron_prod.h"
-
-@<|TensorDimens| constructor code@>;
-@<|TensorDimens::calcUnfoldMaxOffset| code@>;
-@<|TensorDimens::calcFoldMaxOffset| code@>;
-@<|TensorDimens::calcFoldOffset| code@>;
-@<|TensorDimens::decrement| code@>;
-@<|FGSTensor| conversion from |UGSTensor|@>;
-@<|FGSTensor| slicing from |FSSparseTensor|@>;
-@<|FGSTensor| slicing from |FFSTensor|@>;
-@<|FGSTensor| conversion from |GSSparseTensor|@>;
-@<|FGSTensor::increment| code@>;
-@<|FGSTensor::unfold| code@>;
-@<|FGSTensor::contractAndAdd| code@>;
-@<|UGSTensor| conversion from |FGSTensor|@>;
-@<|UGSTensor| slicing from |FSSparseTensor|@>;
-@<|UGSTensor| slicing from |UFSTensor|@>;
-@<|UGSTensor| increment and decrement codes@>;
-@<|UGSTensor::fold| code@>;
-@<|UGSTensor::getOffset| code@>;
-@<|UGSTensor::unfoldData| code@>;
-@<|UGSTensor::getFirstIndexOf| code@>;
-@<|UGSTensor::contractAndAdd| code@>;
-
-@ This constructs the tensor dimensions for slicing. See
-|@<|TensorDimens| class declaration@>| for details.
-@<|TensorDimens| constructor code@>=
-TensorDimens::TensorDimens(const IntSequence& ss, const IntSequence& coor)
-	: nvs(ss),
-	  sym(ss.size(), ""),
-	  nvmax(coor.size(), 0)
-{
-	TL_RAISE_IF(! coor.isSorted(),
-				"Coordinates not sorted in TensorDimens slicing constructor");
-	TL_RAISE_IF(coor[0] < 0 || coor[coor.size()-1] >= ss.size(),
-				"A coordinate out of stack range in TensorDimens slicing constructor");
-
-	for (int i = 0; i < coor.size(); i++) {
-		sym[coor[i]]++;
-		nvmax[i] = ss[coor[i]];
-	}
-}
-
-
-@ Number of unfold offsets is a product of all members of |nvmax|.
-@<|TensorDimens::calcUnfoldMaxOffset| code@>=
-int TensorDimens::calcUnfoldMaxOffset() const
-{
-	return nvmax.mult();
-}
-
-@ Number of folded offsets is a product of all unfold offsets within
-each equivalence class of the symmetry.
-
-@<|TensorDimens::calcFoldMaxOffset| code@>=
-int TensorDimens::calcFoldMaxOffset() const
-{
-	int res = 1;
-	for (int i = 0; i < nvs.size(); i++) {
-		if (nvs[i] == 0 && sym[i] > 0)
-			return 0;
-		if (sym[i] > 0)
-			res *= Tensor::noverk(nvs[i]+sym[i]-1, sym[i]);
-	}
-	return res;
-}
-
-@ Here we implement offset calculation for folded general symmetry
-tensor. The offset of a given sequence is calculated by breaking the
-sequence to subsequences according to the symmetry. The offset is
-orthogonal with respect to the blocks, this means that indexing within
-the blocks is independent. If there are two blocks, for instance, then
-the offset will be an offset within the outer block (the first)
-multiplied with all offsets of the inner block (last) plus an offset
-within the second block.
-
-Generally, the resulting offset $r$ will be
-$$\sum_{i=1}^s r_i\cdot\left(\prod_{j=i+1}^sn_j\right),$$
-where $s$ is a number of blocks (|getSym().num()|), $r_i$ is an offset
-within $i$-th block, and $n_j$ is a number of all offsets in $j$-th
-block.
-
-In the code, we go from the innermost to the outermost, maintaining the
-product in |pow|.
-
-@<|TensorDimens::calcFoldOffset| code@>=
-int TensorDimens::calcFoldOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in TensorDimens::getFoldOffset");
-
-	int res = 0;
-	int pow = 1;
-	int blstart = v.size();
-	for (int ibl = getSym().num()-1; ibl >= 0; ibl--) {
-		int bldim = getSym()[ibl];
-		if (bldim > 0) {
-			blstart -= bldim;
-			int blnvar = getNVX()[blstart];
-			IntSequence subv(v, blstart, blstart+bldim);
-			res += FTensor::getOffset(subv, blnvar)*pow;
-			pow *= FFSTensor::calcMaxOffset(blnvar, bldim);
-		}
-	}
-	TL_RAISE_IF(blstart != 0,
-				"Error in tracing symmetry in TensorDimens::getFoldOffset");
-	return res;
-}
-
-@ In order to find the predecessor of index within folded generally
-symmetric tensor, note, that a decrease action in $i$-th partition of
-symmetric indices can happen only if all indices in all subsequent
-partitions are zero. Then the decrease action of whole the index
-consists of decrease action of the first nonzero partition from the
-right, and setting these trailing zero partitions to their maximum
-indices.
-
-So we set |iblock| to the number of last partitions. During the
-execution, |block_first|, and |block_last| will point to the first
-element of |iblock| and, first element of following block.
-
-Then we check for all trailing zero partitions, set them to their
-maximums and return |iblock| to point to the first non-zero partition
-(or the first partition). Then for this partition, we decrease the
-index (fully symmetric within that partition).
-  
-@<|TensorDimens::decrement| code@>=
-void TensorDimens::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(getNVX().size() != v.size(),
-				"Wrong size of input/output sequence in TensorDimens::decrement");
-
-	int iblock = getSym().num()-1;
-	int block_last = v.size();
-	int block_first = block_last-getSym()[iblock];
-	@<check for zero trailing blocks@>;
-	@<decrease the non-zero block@>;
-}
-
-@ 
-@<check for zero trailing blocks@>=
-	while (iblock > 0 && v[block_last-1] == 0) {
-		for (int i = block_first; i < block_last; i++)
-			v[i] = getNVX(i); // equivalent to |nvs[iblock]|
-		iblock--;
-		block_last = block_first;
-		block_first -= getSym()[iblock];
-	}
-
-@ 
-@<decrease the non-zero block@>=
-	IntSequence vtmp(v, block_first, block_last);
-	FTensor::decrement(vtmp, getNVX(block_first));
-
-
-
-@ Here we go through columns of folded, calculate column of unfolded,
-and copy data.
-
-@<|FGSTensor| conversion from |UGSTensor|@>=
-FGSTensor::FGSTensor(const UGSTensor& ut)
-	: FTensor(along_col, ut.tdims.getNVX(), ut.nrows(),
-			  ut.tdims.calcFoldMaxOffset(), ut.dimen()),
-	  tdims(ut.tdims)
-{
-	for (index ti = begin(); ti != end(); ++ti) {
-		index ui(&ut, ti.getCoor());
-		copyColumn(ut, *ui, *ti);
-	}
-}
-
-@ Here is the code of slicing constructor from the sparse tensor. We
-first calculate coordinates of first and last index of the slice
-within the sparse tensor (these are |lb| and |ub|), and then we
-iterate through all items between them (in lexicographical ordering of
-sparse tensor), and check whether an item is between the |lb| and |ub|
-in Cartesian ordering (this corresponds to belonging to the
-slices). If it belongs, then we subtract the lower bound |lb| to
-obtain coordinates in the |this| tensor and we copy the item.
-
-@<|FGSTensor| slicing from |FSSparseTensor|@>=
-FGSTensor::FGSTensor(const FSSparseTensor& t, const IntSequence& ss,
-					 const IntSequence& coor, const TensorDimens& td)
-	: FTensor(along_col, td.getNVX(), t.nrows(),
-			  td.calcFoldMaxOffset(), td.dimen()),
-	  tdims(td)
-{
-	@<set |lb| and |ub| to lower and upper bounds of indices@>;
-
-	zeros();
-	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
-	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
-	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
-		if (lb.lessEq((*run).first) && (*run).first.lessEq(ub)) {
-			IntSequence c((*run).first);
-			c.add(-1, lb);
-			Tensor::index ind(this, c);
-			TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
-						"Internal error in slicing constructor of FGSTensor");
-			get((*run).second.first, *ind) = (*run).second.second;
-		}
-	}
-}
-
-@ Here we first set |s_offsets| to offsets of partitions whose lengths
-are given by |ss|. So |s_offsets| is a cumulative sum of |ss|.
-
-Then we create |lb| to be coordinates of the possibly first index from
-the slice, and |ub| to be coordinates of possibly last index of the
-slice.
-
-@<set |lb| and |ub| to lower and upper bounds of indices@>=
-	IntSequence s_offsets(ss.size(), 0);
-	for (int i = 1; i < ss.size(); i++)
-		s_offsets[i] = s_offsets[i-1] + ss[i-1];
-
-	IntSequence lb(coor.size());
-	IntSequence ub(coor.size());
-	for (int i = 0; i < coor.size(); i++) {
-		lb[i] = s_offsets[coor[i]];
-		ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
-	}
-
-
-@ The code is similar to |@<|FGSTensor| slicing from |FSSparseTensor|@>|.
-@<|FGSTensor| slicing from |FFSTensor|@>=
-FGSTensor::FGSTensor(const FFSTensor& t, const IntSequence& ss,
-					 const IntSequence& coor, const TensorDimens& td)
-	: FTensor(along_col, td.getNVX(), t.nrows(),
-			  td.calcFoldMaxOffset(), td.dimen()),
-	  tdims(td)
-{
-	if (ncols() == 0)
-		return;
-
-	@<set |lb| and |ub| to lower and upper bounds of indices@>;
-
-	zeros();
-	Tensor::index lbi(&t, lb);
-	Tensor::index ubi(&t, ub);
-	++ubi;
-	for (Tensor::index run = lbi; run != ubi; ++run) {
-		if (lb.lessEq(run.getCoor()) && run.getCoor().lessEq(ub)) {
-			IntSequence c(run.getCoor());
-			c.add(-1, lb);
-			Tensor::index ind(this, c);
-			TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
-						"Internal error in slicing constructor of FGSTensor");
-			copyColumn(t, *run, *ind);
-		}
-	}
-}
-
-@ 
-@<|FGSTensor| conversion from |GSSparseTensor|@>=
-FGSTensor::FGSTensor(const GSSparseTensor& t)
-	: FTensor(along_col, t.getDims().getNVX(), t.nrows(),
-			  t.getDims().calcFoldMaxOffset(), t.dimen()), tdims(t.getDims())
-{
-	zeros();
-	for (FSSparseTensor::const_iterator it = t.getMap().begin();
-		 it != t.getMap().end(); ++it) {
-		index ind(this, (*it).first);
-		get((*it).second.first, *ind) = (*it).second.second;
-	}
-}
-
-@ First we increment as unfolded, then we must monotonize within
-partitions defined by the symmetry. This is done by
-|IntSequence::pmonotone|.
-
-@<|FGSTensor::increment| code@>=
-void FGSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in FGSTensor::increment");
-
-	UTensor::increment(v, tdims.getNVX());
-	v.pmonotone(tdims.getSym());
-}
-
-
-
-
-@ Return unfolded version of the tensor.
-@<|FGSTensor::unfold| code@>=
-UTensor& FGSTensor::unfold() const
-{
-	return *(new UGSTensor(*this));
-}
-
-
-@ Here we implement the contraction
-$$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
-\left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
-\left[c\right]^{\beta_1\ldots\beta_j}
-$$
-More generally, $x^i$ and $z^k$ can represent also general symmetries. 
-
-The operation can be rewritten as a matrix product
-$$\left[t_{x^iy^jz^k}\right]\cdot\left(I_l\otimes c\otimes I_r\right)$$
-where $l$ is a number of columns in tensor with symmetry on the left
-(i.e. $x^i$), and $r$ is a number of columns in tensor with a symmetry
-on the right (i.e. $z^k$). The code proceeds accordingly. We first
-form two symmetries |sym_left| and |sym_right|, then calculate the
-number of columns |dleft|$=l$ and |dright|$=r$, form the Kronecker
-product and multiply and add.
-
-The input parameter |i| is the order of a variable being contracted
-starting from 0.
-
-@<|FGSTensor::contractAndAdd| code@>=
-void FGSTensor::contractAndAdd(int i, FGSTensor& out,
-							   const FRSingleTensor& col) const
-{
-	TL_RAISE_IF(i < 0 || i >= getSym().num(),
-				"Wrong index for FGSTensor::contractAndAdd");
-
-	TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
-				"Wrong dimensions for FGSTensor::contractAndAdd");
-
-	@<set |sym_left| and |sym_right| to symmetries around |i|@>;
-	int dleft = TensorDimens(sym_left, tdims.getNVS()).calcFoldMaxOffset();
-	int dright = TensorDimens(sym_right, tdims.getNVS()).calcFoldMaxOffset();
-	KronProdAll kp(3);
-	kp.setUnit(0, dleft);
-	kp.setMat(1, col);
-	kp.setUnit(2, dright);
-	FGSTensor tmp(out.nrows(), out.getDims());
-	kp.mult(*this, tmp);
-	out.add(1.0, tmp);
-}
-
-@ Here we have a symmetry of |this| tensor and we have to set
-|sym_left| to the subsymmetry left from the |i|-th variable and
-|sym_right| to the subsymmetry right from the |i|-th variable. So we
-copy first all the symmetry and then put zeros to the left for
-|sym_right| and to the right for |sym_left|.
-
-@<set |sym_left| and |sym_right| to symmetries around |i|@>=
-	Symmetry sym_left(getSym());
-	Symmetry sym_right(getSym());
-	for (int j = 0; j < getSym().num(); j++) {
-		if (j <= i)
-			sym_right[j] = 0;
-		if (j >= i)
-			sym_left[j] = 0;
-	}
-
-
-@ Here we go through folded tensor, and each index we convert to index
-of the unfolded tensor and copy the data to the unfolded. Then we
-unfold data within the unfolded tensor.
-
-@<|UGSTensor| conversion from |FGSTensor|@>=
-UGSTensor::UGSTensor(const FGSTensor& ft)
-	: UTensor(along_col, ft.tdims.getNVX(), ft.nrows(),
-			  ft.tdims.calcUnfoldMaxOffset(), ft.dimen()),
-	  tdims(ft.tdims)
-{
-	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
-		index ui(this, fi.getCoor());
-		copyColumn(ft, *fi, *ui);
-	}
-	unfoldData();
-}
-
-@ This makes a folded slice from the sparse tensor and unfolds it.
-@<|UGSTensor| slicing from |FSSparseTensor|@>=
-UGSTensor::UGSTensor(const FSSparseTensor& t, const IntSequence& ss,
-					 const IntSequence& coor, const TensorDimens& td)
-	: UTensor(along_col, td.getNVX(), t.nrows(),
-			  td.calcUnfoldMaxOffset(), td.dimen()),
-	  tdims(td)
-{
-	if (ncols() == 0)
-		return;
-
-	FGSTensor ft(t, ss, coor, td);
-	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
-		index ui(this, fi.getCoor());
-		copyColumn(ft, *fi, *ui);
-	}
-	unfoldData();
-}
-
-@ This makes a folded slice from dense and unfolds it. 
-@<|UGSTensor| slicing from |UFSTensor|@>=
-UGSTensor::UGSTensor(const UFSTensor& t, const IntSequence& ss,
-					 const IntSequence& coor, const TensorDimens& td)
-	: UTensor(along_col, td.getNVX(), t.nrows(),
-			  td.calcUnfoldMaxOffset(), td.dimen()),
-	  tdims(td)
-{
-	FFSTensor folded(t);
-	FGSTensor ft(folded, ss, coor, td);
-	for (index fi = ft.begin(); fi != ft.end(); ++fi) {
-		index ui(this, fi.getCoor());
-		copyColumn(ft, *fi, *ui);
-	}
-	unfoldData();
-}
-
-
-@ Clear, just call |UTensor| static methods.
-@<|UGSTensor| increment and decrement codes@>=
-void UGSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UGSTensor::increment");
-
-	UTensor::increment(v, tdims.getNVX());
-}
-
-void UGSTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UGSTensor::decrement");
-
-	UTensor::decrement(v, tdims.getNVX());
-}
-
-
-@ Return a new instance of folded version.
-@<|UGSTensor::fold| code@>=
-FTensor& UGSTensor::fold() const
-{
-	return *(new FGSTensor(*this));
-}
-
-@ Return an offset of a given index.
-@<|UGSTensor::getOffset| code@>=
-int UGSTensor::getOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in UGSTensor::getOffset");
-
-	return UTensor::getOffset(v, tdims.getNVX());
-}
-
-@ Unfold all data. We go through all the columns and for each we
-obtain an index of the first equivalent, and copy the data.
-
-@<|UGSTensor::unfoldData| code@>=
-void UGSTensor::unfoldData()
-{
-	for (index in = begin(); in != end(); ++in)
-		copyColumn(*(getFirstIndexOf(in)), *in);
-}
-
-@ Here we return the first index which is equivalent in the symmetry
-to the given index. It is a matter of sorting all the symmetry
-partitions of the index.
-
-@<|UGSTensor::getFirstIndexOf| code@>=
-Tensor::index UGSTensor::getFirstIndexOf(const index& in) const
-{
-	IntSequence v(in.getCoor());
-	int last = 0;
-	for (int i = 0; i < tdims.getSym().num(); i++) {
-		IntSequence vtmp(v, last, last+tdims.getSym()[i]);
-		vtmp.sort();
-		last += tdims.getSym()[i];
-	}
-	return index(this, v);
-}
-
-@ Here is perfectly same code with the same semantics as in 
-|@<|FGSTensor::contractAndAdd| code@>|.
-
-@<|UGSTensor::contractAndAdd| code@>=
-void UGSTensor::contractAndAdd(int i, UGSTensor& out,
-							   const URSingleTensor& col) const
-{
-	TL_RAISE_IF(i < 0 || i >= getSym().num(),
-				"Wrong index for UGSTensor::contractAndAdd");
-	TL_RAISE_IF(getSym()[i] != col.dimen() || tdims.getNVS()[i] != col.nvar(),
-				"Wrong dimensions for UGSTensor::contractAndAdd");
-
-	@<set |sym_left| and |sym_right| to symmetries around |i|@>;
-	int dleft = TensorDimens(sym_left, tdims.getNVS()).calcUnfoldMaxOffset();
-	int dright = TensorDimens(sym_right, tdims.getNVS()).calcUnfoldMaxOffset();
-	KronProdAll kp(3);
-	kp.setUnit(0, dleft);
-	kp.setMat(1, col);
-	kp.setUnit(2, dright);
-	UGSTensor tmp(out.nrows(), out.getDims());
-	kp.mult(*this, tmp);
-	out.add(1.0, tmp);
-}
-
-@ End of {\tt gs\_tensor.cpp} file.
--- a/dynare++/tl/cc/gs_tensor.hh
+++ b/dynare++/tl/cc/gs_tensor.hh
@ -0,0 +1,274 @@
+// Copyright 2004, Ondra Kamenik
+
+// General symmetry tensor.
+
+/* Here we define tensors for general symmetry. All tensors from here are
+   identifying the multidimensional index with columns. Thus all
+   symmetries regard to columns. The general symmetry here is not the most
+   general. It captures all symmetries of indices which are given by
+   continuous partitioning of indices. Two items are symmetric if they
+   belong to the same group. The continuity implies that if two items
+   belong to one group, then all items between them belong to that
+   group. This continuous partitioning of indices is described by
+   |Symmetry| class.
+
+   The dimension of the tensors here are described (besides the symmetry)
+   also by number of variables for each group. This is dealt in the class
+   for tensor dimensions defined also here. */
+
+#ifndef GS_TENSOR_H
+#define GS_TENSOR_H
+
+#include "tensor.hh"
+#include "fs_tensor.hh"
+#include "symmetry.hh"
+#include "rfs_tensor.hh"
+
+class FGSTensor;
+class UGSTensor;
+class FSSparseTensor;
+
+/* This class encapsulates symmetry information for the general
+   symmetry tensor. It maintains a vector of variable numbers |nvs|, and
+   symmetry |sym|. For example, let the symmetry be $y^2u^3$, and
+   variable numbers be 10 for $y$, and 5 for $u$. Then the |nvs| is
+   $(10,5)$, and |sym| is $(2,3)$. Also it maintains |nvmax| unfolded |nvs| with
+   respect to the symmetry, this is $(10,10,5,5,5)$.
+
+   The constructors of |TensorDimens| are clear and pretty intuitive but
+   the constructor which is used for slicing fully symmetric tensor. It
+   constructs the dimensions from the partitioning of variables of fully
+   symmetric tensor. Let the partitioning be, for instance, $(a,b,c,d)$,
+   where $(n_a,n_b,n_c,n_d)$ are lengths of the partitions. Let one want
+   to get a slice only of the part of the fully symmetric tensor
+   corresponding to indices of the form $b^2d^3$. This corresponds to the
+   symmetry $a^0b^2c^0d^3$. So, the dimension of the slice would be also
+   $(n_a,n_b,n_c,n_d)$ for number of variables and $(0,2,0,3)$ for the
+   symmetry. So we provide the constructor which takes sizes of
+   partitions $(n_a,n_b,n_c,n_d)$ as |IntSequence|, and indices of picked
+   partitions, in our case $(1,1,3,3,3)$, as |IntSequence|.
+
+   The class is able to calculate number of offsets (columns or rows depending
+   what matrix coordinate we describe) in unfolded and folded tensors
+   with the given symmetry. */
+
+class TensorDimens
+{
+protected:
+  IntSequence nvs;
+  Symmetry sym;
+  IntSequence nvmax;
+public:
+  TensorDimens(const Symmetry &s, const IntSequence &nvars)
+    : nvs(nvars), sym(s), nvmax(sym, nvs)
+  {
+  }
+  TensorDimens(int nvar, int dimen)
+    : nvs(1), sym(dimen), nvmax(dimen, nvar)
+  {
+    nvs[0] = nvar;
+  }
+  TensorDimens(const TensorDimens &td)
+    : nvs(td.nvs), sym(td.sym), nvmax(td.nvmax)
+  {
+  }
+  virtual ~TensorDimens()
+  {
+  }
+  TensorDimens(const IntSequence &ss, const IntSequence &coor);
+  const TensorDimens &
+  operator=(const TensorDimens &td)
+  {
+    nvs = td.nvs; sym = td.sym; nvmax = td.nvmax; return *this;
+  }
+  bool
+  operator==(const TensorDimens &td) const
+  {
+    return nvs == td.nvs && sym == td.sym;
+  }
+  bool
+  operator!=(const TensorDimens &td) const
+  {
+    return !operator==(td);
+  }
+
+  int
+  dimen() const
+  {
+    return sym.dimen();
+  }
+  int
+  getNVX(int i) const
+  {
+    return nvmax[i];
+  }
+  const IntSequence &
+  getNVS() const
+  {
+    return nvs;
+  }
+  const IntSequence &
+  getNVX() const
+  {
+    return nvmax;
+  }
+  const Symmetry &
+  getSym() const
+  {
+    return sym;
+  }
+
+  int calcUnfoldMaxOffset() const;
+  int calcFoldMaxOffset() const;
+  int calcFoldOffset(const IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+};
+
+/* Here is a class for folded general symmetry tensor. It only contains
+   tensor dimensions, it defines types for indices, implement virtual
+   methods of super class |FTensor|.
+
+   We add a method |contractAndAdd| which performs a contraction of one
+   variable in the tensor. This is, for instance
+   $$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
+   \left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
+   \left[c\right]^{\beta_1\ldots\beta_j}
+   $$
+
+   Also we add |getOffset| which should be used with care. */
+
+class GSSparseTensor;
+class FGSTensor : public FTensor
+{
+  friend class UGSTensor;
+
+  const TensorDimens tdims;
+public:
+  /* These are standard constructors followed by two slicing. The first
+     constructs a slice from the sparse, the second from the dense (both
+     fully symmetric). Next constructor is just a conversion from
+     |GSSParseTensor|. The last constructor allows for in-place conversion
+     from |FFSTensor| to |FGSTensor|. */
+
+  FGSTensor(int r, const TensorDimens &td)
+    : FTensor(along_col, td.getNVX(), r,
+              td.calcFoldMaxOffset(), td.dimen()), tdims(td)
+  {
+  }
+  FGSTensor(const FGSTensor &ft)
+    : FTensor(ft), tdims(ft.tdims)
+  {
+  }
+  FGSTensor(const UGSTensor &ut);
+  FGSTensor(int first_row, int num, FGSTensor &t)
+    : FTensor(first_row, num, t), tdims(t.tdims)
+  {
+  }
+  FGSTensor(const FSSparseTensor &t, const IntSequence &ss,
+            const IntSequence &coor, const TensorDimens &td);
+  FGSTensor(const FFSTensor &t, const IntSequence &ss,
+            const IntSequence &coor, const TensorDimens &td);
+  FGSTensor(const GSSparseTensor &sp);
+  FGSTensor(FFSTensor &t)
+    : FTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())
+  {
+  }
+
+  virtual ~FGSTensor()
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void
+  decrement(IntSequence &v) const
+  {
+    tdims.decrement(v);
+  }
+  UTensor&unfold() const;
+  const TensorDimens &
+  getDims() const
+  {
+    return tdims;
+  }
+  const Symmetry &
+  getSym() const
+  {
+    return getDims().getSym();
+  }
+
+  void contractAndAdd(int i, FGSTensor &out,
+                      const FRSingleTensor &col) const;
+  int
+  getOffset(const IntSequence &v) const
+  {
+    return tdims.calcFoldOffset(v);
+  }
+};
+
+/* Besides similar things that has |FGSTensor|, we have here also
+   method |unfoldData|, and helper method |getFirstIndexOf|
+   which corresponds to sorting coordinates in fully symmetric case (here
+   the action is more complicated, so we put it to the method). */
+
+class UGSTensor : public UTensor
+{
+  friend class FGSTensor;
+
+  const TensorDimens tdims;
+public:
+  /* These are standard constructors. The last two constructors are
+     slicing. The first makes a slice from fully symmetric sparse, the
+     second from fully symmetric dense unfolded tensor. The last
+     constructor allows for in-place conversion from |UFSTensor| to
+     |UGSTensor|. */
+  UGSTensor(int r, const TensorDimens &td)
+    : UTensor(along_col, td.getNVX(), r,
+              td.calcUnfoldMaxOffset(), td.dimen()), tdims(td)
+  {
+  }
+  UGSTensor(const UGSTensor &ut)
+    : UTensor(ut), tdims(ut.tdims)
+  {
+  }
+  UGSTensor(const FGSTensor &ft);
+
+  UGSTensor(int first_row, int num, UGSTensor &t)
+    : UTensor(first_row,  num, t), tdims(t.tdims)
+  {
+  }
+  UGSTensor(const FSSparseTensor &t, const IntSequence &ss,
+            const IntSequence &coor, const TensorDimens &td);
+  UGSTensor(const UFSTensor &t, const IntSequence &ss,
+            const IntSequence &coor, const TensorDimens &td);
+  UGSTensor(UFSTensor &t)
+    : UTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())
+  {
+  }
+  virtual ~UGSTensor()
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  FTensor&fold() const;
+  const TensorDimens &
+  getDims() const
+  {
+    return tdims;
+  }
+  const Symmetry &
+  getSym() const
+  {
+    return getDims().getSym();
+  }
+
+  void contractAndAdd(int i, UGSTensor &out,
+                      const URSingleTensor &col) const;
+  int getOffset(const IntSequence &v) const;
+private:
+  void unfoldData();
+public:
+  index getFirstIndexOf(const index &in) const;
+};
+
+#endif
--- a/dynare++/tl/cc/gs_tensor.hweb
+++ b/dynare++/tl/cc/gs_tensor.hweb
@ -1,222 +0,0 @@
-@q $Id: gs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 General symmetry tensor. Start of {\tt gs\_tensor.h} file.
-
-Here we define tensors for general symmetry. All tensors from here are
-identifying the multidimensional index with columns. Thus all
-symmetries regard to columns. The general symmetry here is not the most
-general. It captures all symmetries of indices which are given by
-continuous partitioning of indices. Two items are symmetric if they
-belong to the same group. The continuity implies that if two items
-belong to one group, then all items between them belong to that
-group. This continuous partitioning of indices is described by
-|Symmetry| class.
-
-The dimension of the tensors here are described (besides the symmetry)
-also by number of variables for each group. This is dealt in the class
-for tensor dimensions defined also here.
-
-@c
-#ifndef GS_TENSOR_H
-#define GS_TENSOR_H
-
-#include "tensor.h"
-#include "fs_tensor.h"
-#include "symmetry.h"
-#include "rfs_tensor.h"
-
-class FGSTensor;
-class UGSTensor;
-class FSSparseTensor;
-
-@<|TensorDimens| class declaration@>;
-@<|FGSTensor| class declaration@>;
-@<|UGSTensor| class declaration@>;
-
-#endif
-
-@ This class encapsulates symmetry information for the general
-symmetry tensor. It maintains a vector of variable numbers |nvs|, and
-symmetry |sym|. For example, let the symmetry be $y^2u^3$, and
-variable numbers be 10 for $y$, and 5 for $u$. Then the |nvs| is
-$(10,5)$, and |sym| is $(2,3)$. Also it maintains |nvmax| unfolded |nvs| with
-respect to the symmetry, this is $(10,10,5,5,5)$.
-
-The constructors of |TensorDimens| are clear and pretty intuitive but
-the constructor which is used for slicing fully symmetric tensor. It
-constructs the dimensions from the partitioning of variables of fully
-symmetric tensor. Let the partitioning be, for instance, $(a,b,c,d)$,
-where $(n_a,n_b,n_c,n_d)$ are lengths of the partitions. Let one want
-to get a slice only of the part of the fully symmetric tensor
-corresponding to indices of the form $b^2d^3$. This corresponds to the
-symmetry $a^0b^2c^0d^3$. So, the dimension of the slice would be also
-$(n_a,n_b,n_c,n_d)$ for number of variables and $(0,2,0,3)$ for the
-symmetry. So we provide the constructor which takes sizes of
-partitions $(n_a,n_b,n_c,n_d)$ as |IntSequence|, and indices of picked
-partitions, in our case $(1,1,3,3,3)$, as |IntSequence|.
-
-The class is able to calculate number of offsets (columns or rows depending
-what matrix coordinate we describe) in unfolded and folded tensors
-with the given symmetry.
-
-@s TensorDimens int
-
-@<|TensorDimens| class declaration@>=
-class TensorDimens {
-protected:@;
-	IntSequence nvs;
-	Symmetry sym;
-	IntSequence nvmax;
-public:@;
-	TensorDimens(const Symmetry& s, const IntSequence& nvars)
-		: nvs(nvars), sym(s), nvmax(sym, nvs)@+ {}
-	TensorDimens(int nvar, int dimen)
-		: nvs(1), sym(dimen), nvmax(dimen, nvar)
-		{@+ nvs[0] = nvar;@+}
-	TensorDimens(const TensorDimens& td)
-		: nvs(td.nvs), sym(td.sym), nvmax(td.nvmax)@+ {}
-	virtual ~TensorDimens()@+ {}
-	TensorDimens(const IntSequence& ss, const IntSequence& coor);
-	const TensorDimens& operator=(const TensorDimens& td)
-		{@+ nvs = td.nvs;@+ sym = td.sym;@+ nvmax = td.nvmax;@+ return *this;@+}
-	bool operator==(const TensorDimens& td) const
-		{@+ return nvs == td.nvs && sym == td.sym;@+}
-	bool operator!=(const TensorDimens& td) const
-		{@+ return !operator==(td);@+}
-
-	int dimen() const
-		{@+ return sym.dimen();@+}
-	int getNVX(int i) const
-		{@+ return nvmax[i];@+}
-	const IntSequence& getNVS() const
-		{ @+ return nvs;@+}
-	const IntSequence& getNVX() const
-		{@+ return nvmax;@+}
-	const Symmetry& getSym() const
-		{@+ return sym;@+}
-
-	int calcUnfoldMaxOffset() const;
-	int calcFoldMaxOffset() const;
-	int calcFoldOffset(const IntSequence& v) const;
-	void decrement(IntSequence& v) const; 
-};
-
-@ Here is a class for folded general symmetry tensor. It only contains
-tensor dimensions, it defines types for indices, implement virtual
-methods of super class |FTensor|.
-
-We add a method |contractAndAdd| which performs a contraction of one
-variable in the tensor. This is, for instance
-$$\left[r_{x^iz^k}\right]_{\alpha_1\ldots\alpha_i\gamma_1\ldots\gamma_k}=
-\left[t_{x^iy^jz^k}\right]_{\alpha_1\ldots\alpha_i\beta_1\ldots\beta_j\gamma_1\ldots\gamma_k}
-\left[c\right]^{\beta_1\ldots\beta_j}
-$$
-
-Also we add |getOffset| which should be used with care.
-
-@<|FGSTensor| class declaration@>=
-class GSSparseTensor;
-class FGSTensor : public FTensor {
-	friend class UGSTensor;
-
-	const TensorDimens tdims;
-public:@;
-	@<|FGSTensor| constructor declarations@>;
-	virtual ~FGSTensor()@+ {}
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const
-		{@+ tdims.decrement(v);@+}
-	UTensor& unfold() const;
-	const TensorDimens& getDims() const
-		{@+ return tdims;@+}
-	const Symmetry& getSym() const
-		{@+ return getDims().getSym();@+}
-
-	void contractAndAdd(int i, FGSTensor& out,
-						const FRSingleTensor& col) const;
-	int getOffset(const IntSequence& v) const
-		{@+ return tdims.calcFoldOffset(v);@+}
-};
-
-@ These are standard constructors followed by two slicing. The first
-constructs a slice from the sparse, the second from the dense (both
-fully symmetric). Next constructor is just a conversion from
-|GSSParseTensor|. The last constructor allows for in-place conversion
-from |FFSTensor| to |FGSTensor|.
-
-@<|FGSTensor| constructor declarations@>=
-	FGSTensor(int r, const TensorDimens& td)
-		: FTensor(along_col, td.getNVX(), r,
-				  td.calcFoldMaxOffset(), td.dimen()), tdims(td)@+ {}
-	FGSTensor(const FGSTensor& ft)
-		: FTensor(ft), tdims(ft.tdims)@+ {}
-	FGSTensor(const UGSTensor& ut);
-	FGSTensor(int first_row, int num, FGSTensor& t)
-		: FTensor(first_row, num, t), tdims(t.tdims)@+ {}
-	FGSTensor(const FSSparseTensor& t, const IntSequence& ss,
-			  const IntSequence& coor, const TensorDimens& td);
-	FGSTensor(const FFSTensor& t, const IntSequence& ss,
-			  const IntSequence& coor, const TensorDimens& td);
-	FGSTensor(const GSSparseTensor& sp);
-	FGSTensor(FFSTensor& t)
-		: FTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())@+ {}
-
-
-@ Besides similar things that has |FGSTensor|, we have here also
-method |unfoldData|, and helper method |getFirstIndexOf|
-which corresponds to sorting coordinates in fully symmetric case (here
-the action is more complicated, so we put it to the method).
-
-@<|UGSTensor| class declaration@>=
-class UGSTensor : public UTensor {
-	friend class FGSTensor;
-
-	const TensorDimens tdims;
-public:@;
-	@<|UGSTensor| constructor declarations@>;
-	virtual ~UGSTensor()@+ {}
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	FTensor& fold() const;
-	const TensorDimens& getDims() const
-		{@+ return tdims;@+}
-	const Symmetry& getSym() const
-		{@+ return getDims().getSym();@+}
-
-	void contractAndAdd(int i, UGSTensor& out,
-						const URSingleTensor& col) const;
-	int getOffset(const IntSequence& v) const;
-private:@;
-	void unfoldData();
-public:@;
-	index getFirstIndexOf(const index& in) const;
-};
-
-
-@ These are standard constructors. The last two constructors are
-slicing. The first makes a slice from fully symmetric sparse, the
-second from fully symmetric dense unfolded tensor. The last
-constructor allows for in-place conversion from |UFSTensor| to
-|UGSTensor|.
-
-@<|UGSTensor| constructor declarations@>=
-	UGSTensor(int r, const TensorDimens& td)
-		: UTensor(along_col, td.getNVX(), r,
-				  td.calcUnfoldMaxOffset(), td.dimen()), tdims(td)@+ {}
-	UGSTensor(const UGSTensor& ut)
-		: UTensor(ut), tdims(ut.tdims)@+ {}
-	UGSTensor(const FGSTensor& ft);
-	UGSTensor(int first_row, int num, UGSTensor& t)
-		: UTensor(first_row,  num, t), tdims(t.tdims)@+ {}
-	UGSTensor(const FSSparseTensor& t, const IntSequence& ss,
-			  const IntSequence& coor, const TensorDimens& td);
-	UGSTensor(const UFSTensor& t, const IntSequence& ss,
-			  const IntSequence& coor, const TensorDimens& td);
-	UGSTensor(UFSTensor& t)
-		: UTensor(0, t.nrows(), t), tdims(t.nvar(), t.dimen())@+ {}
-
-
-@ End of {\tt gs\_tensor.h} file.
--- a/dynare++/tl/cc/int_sequence.cc
+++ b/dynare++/tl/cc/int_sequence.cc
@ -0,0 +1,312 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "int_sequence.hh"
+#include "symmetry.hh"
+#include "tl_exception.hh"
+
+#include <cstdio>
+#include <climits>
+
+/* This unfolds a given integer sequence with respect to the given
+   symmetry. If for example the symmetry is $(2,3)$, and the sequence is
+   $(a,b)$, then the result is $(a,a,b,b,b)$. */
+
+IntSequence::IntSequence(const Symmetry &sy, const IntSequence &se)
+  : data(new int[sy.dimen()]), length(sy.dimen()), destroy(true)
+{
+  int k = 0;
+  for (int i = 0; i < sy.num(); i++)
+    for (int j = 0; j < sy[i]; j++, k++)
+      operator[](k) = se[i];
+}
+
+/* This constructs an implied symmetry (implemented as |IntSequence|
+   from a more general symmetry and equivalence class (implemented as
+   |vector<int>|). For example, let the general symmetry be $y^3u^2$ and
+   the equivalence class is $\{0,4\}$ picking up first and fifth
+   variable, we calculate symmetry (at this point only |IntSequence|)
+   corresponding to the picked variables. These are $yu$. Thus the
+   constructed sequence must be $(1,1)$, meaning that we picked one $y$
+   and one $u$. */
+
+IntSequence::IntSequence(const Symmetry &sy, const vector<int> &se)
+  : data(new int[sy.num()]), length(sy.num()), destroy(true)
+{
+  TL_RAISE_IF(sy.dimen() <= se[se.size()-1],
+              "Sequence is not reachable by symmetry in IntSequence()");
+  for (int i = 0; i < length; i++)
+    operator[](i) = 0;
+
+  for (unsigned int i = 0; i < se.size(); i++)
+    operator[](sy.findClass(se[i]))++;
+}
+
+/* This constructs an ordered integer sequence from the given ordered
+   sequence inserting the given number to the sequence. */
+
+IntSequence::IntSequence(int i, const IntSequence &s)
+  : data(new int[s.size()+1]), length(s.size()+1), destroy(true)
+{
+  int j = 0;
+  while (j < s.size() && s[j] < i)
+    j++;
+  for (int jj = 0; jj < j; jj++)
+    operator[](jj) = s[jj];
+  operator[](j) = i;
+  for (int jj = j; jj < s.size(); jj++)
+    operator[](jj+1) = s[jj];
+}
+
+IntSequence::IntSequence(int i, const IntSequence &s, int pos)
+  : data(new int[s.size()+1]), length(s.size()+1), destroy(true)
+{
+  TL_RAISE_IF(pos < 0 || pos > s.size(),
+              "Wrong position for insertion IntSequence constructor");
+  for (int jj = 0; jj < pos; jj++)
+    operator[](jj) = s[jj];
+  operator[](pos) = i;
+  for (int jj = pos; jj < s.size(); jj++)
+    operator[](jj+1) = s[jj];
+}
+
+const IntSequence &
+IntSequence::operator=(const IntSequence &s)
+{
+  TL_RAISE_IF(!destroy && length != s.length,
+              "Wrong length for in-place IntSequence::operator=");
+  if (destroy && length != s.length)
+    {
+      delete [] data;
+      data = new int[s.length];
+      destroy = true;
+      length = s.length;
+    }
+  memcpy(data, s.data, sizeof(int)*length);
+  return *this;
+}
+
+bool
+IntSequence::operator==(const IntSequence &s) const
+{
+  if (size() != s.size())
+    return false;
+
+  int i = 0;
+  while (i < size() && operator[](i) == s[i])
+    i++;
+  return i == size();
+}
+
+/* We need some linear irreflexive ordering, we implement it as
+   lexicographic ordering without identity. */
+bool
+IntSequence::operator<(const IntSequence &s) const
+{
+  int len = min(size(), s.size());
+
+  int i = 0;
+  while (i < len && operator[](i) == s[i])
+    i++;
+  return (i < s.size() && (i == size() || operator[](i) < s[i]));
+}
+
+bool
+IntSequence::lessEq(const IntSequence &s) const
+{
+  TL_RAISE_IF(size() != s.size(),
+              "Sequence with different lengths in IntSequence::lessEq");
+
+  int i = 0;
+  while (i < size() && operator[](i) <= s[i])
+    i++;
+  return (i == size());
+}
+
+bool
+IntSequence::less(const IntSequence &s) const
+{
+  TL_RAISE_IF(size() != s.size(),
+              "Sequence with different lengths in IntSequence::less");
+
+  int i = 0;
+  while (i < size() && operator[](i) < s[i])
+    i++;
+  return (i == size());
+}
+
+/* This is a bubble sort, all sequences are usually very short, so this
+   sin might be forgiven. */
+
+void
+IntSequence::sort()
+{
+  for (int i = 0; i < length; i++)
+    {
+      int swaps = 0;
+      for (int j = 0; j < length-1; j++)
+        {
+          if (data[j] > data[j+1])
+            {
+              int s = data[j+1];
+              data[j+1] = data[j];
+              data[j] = s;
+              swaps++;
+            }
+        }
+      if (swaps == 0)
+        return;
+    }
+}
+
+/* Here we monotonize the sequence. If an item is less then its
+   predecessor, it is equalized. */
+
+void
+IntSequence::monotone()
+{
+  for (int i = 1; i < length; i++)
+    if (data[i-1] > data[i])
+      data[i] = data[i-1];
+}
+
+/* This partially monotones the sequence. The partitioning is done by a
+   symmetry. So the subsequence given by the symmetry classes are
+   monotonized. For example, if the symmetry is $y^2u^3$, and the
+   |IntSequence| is $(5,3,1,6,4)$, the result is $(5,5,1,6,6)$. */
+
+void
+IntSequence::pmonotone(const Symmetry &s)
+{
+  int cum = 0;
+  for (int i = 0; i < s.num(); i++)
+    {
+      for (int j = cum + 1; j < cum + s[i]; j++)
+        if (data[j-1] > data[j])
+          data[j] = data[j-1];
+      cum += s[i];
+    }
+}
+
+/* This returns sum of all elements. Useful for symmetries. */
+
+int
+IntSequence::sum() const
+{
+  int res = 0;
+  for (int i = 0; i < length; i++)
+    res += operator[](i);
+  return res;
+}
+
+/* This returns product of subsequent items. Useful for Kronecker product
+   dimensions. */
+
+int
+IntSequence::mult(int i1, int i2) const
+{
+  int res = 1;
+  for (int i = i1; i < i2; i++)
+    res *= operator[](i);
+  return res;
+}
+
+/* Return a number of the same items in the beginning of the sequence. */
+
+int
+IntSequence::getPrefixLength() const
+{
+  int i = 0;
+  while (i+1 < size() && operator[](i+1) == operator[](0))
+    i++;
+  return i+1;
+}
+
+/* This returns a number of distinct items in the sequence. It supposes
+   that the sequence is ordered. For the empty sequence it returns zero. */
+
+int
+IntSequence::getNumDistinct() const
+{
+  int res = 0;
+  if (size() > 0)
+    res++;
+  for (int i = 1; i < size(); i++)
+    if (operator[](i) != operator[](i-1))
+      res++;
+  return res;
+}
+
+/* This returns a maximum of the sequence. If the sequence is empty, it
+   returns the least possible |int| value. */
+
+int
+IntSequence::getMax() const
+{
+  int res = INT_MIN;
+  for (int i = 0; i < size(); i++)
+    if (operator[](i) > res)
+      res = operator[](i);
+  return res;
+}
+
+void
+IntSequence::add(int i)
+{
+  for (int j = 0; j < size(); j++)
+    operator[](j) += i;
+}
+
+void
+IntSequence::add(int f, const IntSequence &s)
+{
+  TL_RAISE_IF(size() != s.size(),
+              "Wrong sequence length in IntSequence::add");
+  for (int j = 0; j < size(); j++)
+    operator[](j) += f*s[j];
+}
+
+bool
+IntSequence::isPositive() const
+{
+  int i = 0;
+  while (i < size() && operator[](i) >= 0)
+    i++;
+  return (i == size());
+}
+
+bool
+IntSequence::isConstant() const
+{
+  bool res = true;
+  int i = 1;
+  while (res && i < size())
+    {
+      res = res && operator[](0) == operator[](i);
+      i++;
+    }
+  return res;
+}
+
+bool
+IntSequence::isSorted() const
+{
+  bool res = true;
+  int i = 1;
+  while (res && i < size())
+    {
+      res = res && operator[](i-1) <= operator[](i);
+      i++;
+    }
+  return res;
+}
+
+/* Debug print. */
+
+void
+IntSequence::print() const
+{
+  printf("[");
+  for (int i = 0; i < size(); i++)
+    printf("%2d ", operator[](i));
+  printf("]\n");
+}
--- a/dynare++/tl/cc/int_sequence.cweb
+++ b/dynare++/tl/cc/int_sequence.cweb
@ -1,351 +0,0 @@
-@q $Id: int_sequence.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt int\_sequence.cpp} file.
-
-@c
-#include "int_sequence.h"
-#include "symmetry.h"
-#include "tl_exception.h"
-
-#include <cstdio>
-#include <climits>
-
-@<|IntSequence| constructor code 1@>;
-@<|IntSequence| constructor code 2@>;
-@<|IntSequence| constructor code 3@>;
-@<|IntSequence| constructor code 4@>;
-@<|IntSequence::operator=| code@>;
-@<|IntSequence::operator==| code@>;
-@<|IntSequence::operator<| code@>;
-@<|IntSequence::lessEq| code@>;
-@<|IntSequence::less| code@>;
-@<|IntSequence::sort| code@>;
-@<|IntSequence::monotone| code@>;
-@<|IntSequence::pmonotone| code@>;
-@<|IntSequence::sum| code@>;
-@<|IntSequence::mult| code@>;
-@<|IntSequence::getPrefixLength| code@>;
-@<|IntSequence::getNumDistinct| code@>;
-@<|IntSequence::getMax| code@>;
-@<|IntSequence::add| code 1@>;
-@<|IntSequence::add| code 2@>;
-@<|IntSequence::isPositive| code@>;
-@<|IntSequence::isConstant| code@>;
-@<|IntSequence::isSorted| code@>;
-@<|IntSequence::print| code@>;
-
-@ This unfolds a given integer sequence with respect to the given
-symmetry. If for example the symmetry is $(2,3)$, and the sequence is
-$(a,b)$, then the result is $(a,a,b,b,b)$.
-
-@<|IntSequence| constructor code 1@>=
-IntSequence::IntSequence(const Symmetry& sy, const IntSequence& se)
-	: data(new int[sy.dimen()]), length(sy.dimen()), destroy(true)
-{
-	int k = 0;
-	for (int i = 0; i < sy.num(); i++)
-		for (int j = 0;	 j < sy[i]; j++, k++)
-			operator[](k) = se[i];
-}
-
-
-@ This constructs an implied symmetry (implemented as |IntSequence|
-from a more general symmetry and equivalence class (implemented as
-|vector<int>|). For example, let the general symmetry be $y^3u^2$ and
-the equivalence class is $\{0,4\}$ picking up first and fifth
-variable, we calculate symmetry (at this point only |IntSequence|)
-corresponding to the picked variables. These are $yu$. Thus the
-constructed sequence must be $(1,1)$, meaning that we picked one $y$
-and one $u$.
-
-
-@<|IntSequence| constructor code 2@>=
-IntSequence::IntSequence(const Symmetry& sy, const vector<int>& se)
-	: data(new int[sy.num()]), length(sy.num()), destroy(true)
-{
-	TL_RAISE_IF(sy.dimen() <= se[se.size()-1],
-				"Sequence is not reachable by symmetry in IntSequence()");
-	for (int i = 0; i < length; i++) @/
-		operator[](i) = 0;
-
-    for (unsigned int i = 0; i < se.size(); i++) @/
-		operator[](sy.findClass(se[i]))++;
-}
-
-@ This constructs an ordered integer sequence from the given ordered
-sequence inserting the given number to the sequence.
-
-@<|IntSequence| constructor code 3@>=
-IntSequence::IntSequence(int i, const IntSequence& s)
-	: data(new int[s.size()+1]), length(s.size()+1), destroy(true)
-{
-	int j = 0;
-	while (j < s.size() && s[j] < i)
-		j++;
-	for (int jj = 0; jj < j; jj++)
-		operator[](jj) = s[jj];
-	operator[](j) = i;
-	for (int jj = j; jj < s.size(); jj++)
-		operator[](jj+1) = s[jj];
-}
-
-@ 
-@<|IntSequence| constructor code 4@>=
-IntSequence::IntSequence(int i, const IntSequence& s, int pos)
-	: data(new int[s.size()+1]), length(s.size()+1), destroy(true)
-{
-	TL_RAISE_IF(pos < 0 || pos > s.size(),
-				"Wrong position for insertion IntSequence constructor");
-	for (int jj = 0; jj < pos; jj++)
-		operator[](jj) = s[jj];
-	operator[](pos) = i;
-	for (int jj = pos; jj < s.size(); jj++)
-		operator[](jj+1) = s[jj];
-}
-
-@ 
-@<|IntSequence::operator=| code@>=
-const IntSequence& IntSequence::operator=(const IntSequence& s)
- {
-	 TL_RAISE_IF(!destroy && length != s.length,
-				 "Wrong length for in-place IntSequence::operator=");
-	 if (destroy && length != s.length) {
-		 delete [] data;
-		 data = new int[s.length];
-		 destroy = true;
-		 length = s.length;
-	 }
-	 memcpy(data, s.data, sizeof(int)*length);
-	 return *this;
- }
-
-
-@ 
-@<|IntSequence::operator==| code@>=
-bool IntSequence::operator==(const IntSequence& s) const
-{
-	if (size() != s.size())
-		return false;
-
-	int i = 0;
-	while (i < size() && operator[](i) == s[i])
-		i++;
-	return i == size();
-}
-
-@ We need some linear irreflexive ordering, we implement it as
-lexicographic ordering without identity.
-@<|IntSequence::operator<| code@>=
-bool IntSequence::operator<(const IntSequence& s) const
-{
-	int len = min(size(), s.size());
-
-	int i = 0;
-	while (i < len && operator[](i) == s[i])
-		i++;
-	return (i < s.size() && (i == size() || operator[](i) < s[i]));
-}
-
-@ 
-@<|IntSequence::lessEq| code@>=
-bool IntSequence::lessEq(const IntSequence& s) const
-{
-	TL_RAISE_IF(size() != s.size(),
-				"Sequence with different lengths in IntSequence::lessEq");
-
-	int i = 0;
-	while (i < size() && operator[](i) <= s[i])
-		i++;
-	return (i == size());
-}
-
-@ 
-@<|IntSequence::less| code@>=
-bool IntSequence::less(const IntSequence& s) const
-{
-	TL_RAISE_IF(size() != s.size(),
-				"Sequence with different lengths in IntSequence::less");
-
-	int i = 0;
-	while (i < size() && operator[](i) < s[i])
-		i++;
-	return (i == size());
-}
-
-@ This is a bubble sort, all sequences are usually very short, so this
-sin might be forgiven.
-
-@<|IntSequence::sort| code@>=
-void IntSequence::sort()
-{
-	for (int i = 0; i < length; i++) {
-		int swaps = 0;
-		for (int j = 0; j < length-1; j++) {
-			if (data[j] > data[j+1]) {
-				int s = data[j+1];
-				data[j+1] = data[j];
-				data[j] = s;
-				swaps++;
-			}
-		}
-		if (swaps == 0)
-			return;
-	}
-}
-
-@ Here we monotonize the sequence. If an item is less then its
-predecessor, it is equalized.
-
-@<|IntSequence::monotone| code@>=
-void IntSequence::monotone()
-{
-	for (int i = 1; i < length; i++)
-		if (data[i-1] > data[i])@/
-			data[i] = data[i-1];
-}
-
-@ This partially monotones the sequence. The partitioning is done by a
-symmetry. So the subsequence given by the symmetry classes are
-monotonized. For example, if the symmetry is $y^2u^3$, and the
-|IntSequence| is $(5,3,1,6,4)$, the result is $(5,5,1,6,6)$.
-
-@<|IntSequence::pmonotone| code@>=
-void IntSequence::pmonotone(const Symmetry& s)
-{
-	int cum = 0;
-	for (int i = 0; i < s.num(); i++) {
-		for (int j = cum + 1; j < cum + s[i]; j++)
-			if (data[j-1] > data[j])@/
-				data[j] = data[j-1];
-		cum += s[i];
-	}
-}
-
-@ This returns sum of all elements. Useful for symmetries.
-@<|IntSequence::sum| code@>=
-int IntSequence::sum() const
-{
-	int res = 0;
-	for (int i = 0; i < length; i++) @/
-		res += operator[](i);
-	return res;
-}
-
-@ This returns product of subsequent items. Useful for Kronecker product
-dimensions.
-
-@<|IntSequence::mult| code@>=
-int IntSequence::mult(int i1, int i2) const
-{
-	int res = 1;
-	for (int i = i1; i < i2; i++)@/
-		res *= operator[](i);
-	return res;
-}
-
-@ Return a number of the same items in the beginning of the sequence.
-@<|IntSequence::getPrefixLength| code@>=
-int IntSequence::getPrefixLength() const
-{
-	int i = 0;
-	while (i+1 < size() && operator[](i+1) == operator[](0))
-		i++;
-	return i+1;
-}
-
-@ This returns a number of distinct items in the sequence. It supposes
-that the sequence is ordered. For the empty sequence it returns zero.
-
-@<|IntSequence::getNumDistinct| code@>=
-int IntSequence::getNumDistinct() const
-{
-	int res = 0;
-	if (size() > 0)
-		res++;
-	for (int i = 1; i < size(); i++)
-		if (operator[](i) != operator[](i-1))
-			res++;
-	return res;
-}
-
-@ This returns a maximum of the sequence. If the sequence is empty, it
-returns the least possible |int| value.
-
-@<|IntSequence::getMax| code@>=
-int IntSequence::getMax() const
-{
-	int res = INT_MIN;
-	for (int i = 0; i < size(); i++)
-		if (operator[](i) > res)
-			res = operator[](i);
-	return res;
-}
-
-@ 
-@<|IntSequence::add| code 1@>=
-void IntSequence::add(int i)
-{
-	for (int j = 0; j < size(); j++)
-		operator[](j) += i;
-}
-
-@ 
-@<|IntSequence::add| code 2@>=
-void IntSequence::add(int f, const IntSequence& s)
-{
-	TL_RAISE_IF(size() != s.size(),
-				"Wrong sequence length in IntSequence::add");
-	for (int j = 0; j < size(); j++)
-		operator[](j) += f*s[j];
-}
-
-@ 
-@<|IntSequence::isPositive| code@>=
-bool IntSequence::isPositive() const
-{
-	int i = 0;
-	while (i < size() && operator[](i) >= 0)
-		i++;
-	return (i == size());
-}
-
-@ 
-@<|IntSequence::isConstant| code@>=
-bool IntSequence::isConstant() const
-{
-	bool res = true;
-	int i = 1;
-	while (res && i < size()) {
-		res = res && operator[](0) == operator[](i);
-		i++;
-	}
-	return res;
-}
-
-@ 
-@<|IntSequence::isSorted| code@>=
-bool IntSequence::isSorted() const
-{
-	bool res = true;
-	int i = 1;
-	while (res && i < size()) {
-		res = res && operator[](i-1) <= operator[](i);
-		i++;
-	}
-	return res;
-}
-
-
-
-@ Debug print.
-@<|IntSequence::print| code@>=
-void IntSequence::print() const
-{
-	printf("[");
-	for (int i = 0; i < size(); i++)@/
-		printf("%2d ",operator[](i));
-	printf("]\n");
-}
-
-@ End of {\tt int\_sequence.cpp} file.
--- a/dynare++/tl/cc/int_sequence.hh
+++ b/dynare++/tl/cc/int_sequence.hh
@ -0,0 +1,148 @@
+// Copyright 2004, Ondra Kamenik
+
+// Integer sequence.
+
+/* Here we define an auxiliary abstraction for a sequence of integers. The
+   basic functionality is to hold an ordered sequence of integers with
+   constant length. We prefer using this simple class before STL
+   |vector<int>| since it is more efficient for our purposes.
+
+   The class is used in index of a tensor, in symmetry definition, in
+   Kronecker product dimensions, or as a class of an equivalence. The
+   latter case is not ordered, but we always order equivalence classes in
+   order to ensure unique representativeness. For almost all cases we
+   need the integer sequence to be ordered (sort), or monotonize (indices
+   of folded tensors), or partially monotonize (indices of folded tensors
+   not fully symmetric), or calculate a product of all members or only of
+   a part (used in Kronecker product dimensions). When we calculate
+   offsets in folded tensors, we need to obtain a number of the same
+   items in the front (|getPrefixLength|), and also to add some integer
+   number to all items.
+
+   Also, we need to construct a subsequence of a sequence, so
+   some instances do destroy the underlying data, and some not. */
+
+#ifndef INT_SEQUENCE_H
+#define INT_SEQUENCE_H
+
+#include <cstring>
+#include <vector>
+
+using namespace std;
+
+/* The implementation of |IntSequence| is straightforward. It has a
+   pointer |data|, a |length| of the data, and a flag |destroy|, whether
+   the instance must destroy the underlying data. */
+
+class Symmetry;
+class IntSequence
+{
+  int *data;
+  int length;
+  bool destroy;
+public:
+  /* We have a constructor allocating a given length of data, constructor
+     allocating and then initializing all members to a given number, a copy
+     constructor, a conversion from |vector<int>|, a subsequence
+     constructor, a constructor used for calculating implied symmetry from
+     a more general symmetry and one equivalence class (see |Symmetry|
+     class). Finally we have a constructor which unfolds a sequence with
+     respect to a given symmetry and constructor which inserts a given
+     number to the ordered sequence or given number to a given position. */
+
+  IntSequence(int l)
+    : data(new int[l]), length(l), destroy(true)
+  {
+  }
+  IntSequence(int l, int n)
+    :  data(new int[l]), length(l), destroy(true)
+  {
+    for (int i = 0; i < length; i++)
+      data[i] = n;
+  }
+  IntSequence(const IntSequence &s)
+    : data(new int[s.length]), length(s.length), destroy(true)
+  {
+    memcpy(data, s.data, length*sizeof(int));
+  }
+  IntSequence(IntSequence &s, int i1, int i2)
+    : data(s.data+i1), length(i2-i1), destroy(false)
+  {
+  }
+  IntSequence(const IntSequence &s, int i1, int i2)
+    : data(new int[i2-i1]), length(i2-i1), destroy(true)
+  {
+    memcpy(data, s.data+i1, sizeof(int)*length);
+  }
+  IntSequence(const Symmetry &sy, const vector<int> &se);
+  IntSequence(const Symmetry &sy, const IntSequence &se);
+  IntSequence(int i, const IntSequence &s);
+  IntSequence(int i, const IntSequence &s, int pos);
+  IntSequence(int l, const int *d)
+    : data(new int[l]), length(l), destroy(true)
+  {
+    memcpy(data, d, sizeof(int)*length);
+  }
+
+  const IntSequence &operator=(const IntSequence &s);
+  virtual ~IntSequence()
+  {
+    if (destroy)
+      delete [] data;
+  }
+  bool operator==(const IntSequence &s) const;
+  bool
+  operator!=(const IntSequence &s) const
+  {
+    return !operator==(s);
+  }
+  int &
+  operator[](int i)
+  {
+    return data[i];
+  }
+  int
+  operator[](int i) const
+  {
+    return data[i];
+  }
+  int
+  size() const
+  {
+    return length;
+  }
+
+  /* We provide two orderings. The first |operator<| is the linear
+     lexicographic ordering, the second |less| is the non-linear Cartesian
+     ordering. */
+  bool operator<(const IntSequence &s) const;
+  bool
+  operator<=(const IntSequence &s) const
+  {
+    return (operator==(s) || operator<(s));
+  }
+  bool lessEq(const IntSequence &s) const;
+  bool less(const IntSequence &s) const;
+
+  void sort();
+  void monotone();
+  void pmonotone(const Symmetry &s);
+  int sum() const;
+  int mult(int i1, int i2) const;
+  int
+  mult() const
+  {
+    return mult(0, length);
+  }
+  void add(int i);
+  void add(int f, const IntSequence &s);
+  int getPrefixLength() const;
+  int getNumDistinct() const;
+  int getMax() const;
+  bool isPositive() const;
+  bool isConstant() const;
+  bool isSorted() const;
+  void print() const;
+};
+
+#endif
--- a/dynare++/tl/cc/int_sequence.hweb
+++ b/dynare++/tl/cc/int_sequence.hweb
@ -1,132 +0,0 @@
-@q $Id: int_sequence.hweb 758 2006-05-22 08:31:18Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Integer sequence. Start of {\tt int\_sequence.h} file.
-
-Here we define an auxiliary abstraction for a sequence of integers. The
-basic functionality is to hold an ordered sequence of integers with
-constant length. We prefer using this simple class before STL
-|vector<int>| since it is more efficient for our purposes.
-
-The class is used in index of a tensor, in symmetry definition, in
-Kronecker product dimensions, or as a class of an equivalence. The
-latter case is not ordered, but we always order equivalence classes in
-order to ensure unique representativeness. For almost all cases we
-need the integer sequence to be ordered (sort), or monotonize (indices
-of folded tensors), or partially monotonize (indices of folded tensors
-not fully symmetric), or calculate a product of all members or only of
-a part (used in Kronecker product dimensions). When we calculate
-offsets in folded tensors, we need to obtain a number of the same
-items in the front (|getPrefixLength|), and also to add some integer
-number to all items.
-
-Also, we need to construct a subsequence of a sequence, so
-some instances do destroy the underlying data, and some not.
-
-@s IntSequence int
-@s Symmetry int
-@c
-#ifndef INT_SEQUENCE_H
-#define INT_SEQUENCE_H
-
-
-#include <cstring>
-#include <vector>
-
-using namespace std;
-
-@<|IntSequence| class declaration@>;
-
-#endif
-
-@ The implementation of |IntSequence| is straightforward. It has a
-pointer |data|, a |length| of the data, and a flag |destroy|, whether
-the instance must destroy the underlying data.
-
-@<|IntSequence| class declaration@>=
-class Symmetry;
-class IntSequence {
-	int* data;
-	int length;
-	bool destroy;
-public:@/
-	@<|IntSequence| constructors@>;
-	@<|IntSequence| inlines and operators@>;
-	@<|IntSequence| orderings@>;
-	void sort();
-	void monotone();
-	void pmonotone(const Symmetry& s);
-	int sum() const;
-	int mult(int i1, int i2) const;
-	int mult() const
-		{@+return mult(0, length);@+}
-	void add(int i);
-	void add(int f, const IntSequence& s); 
-	int getPrefixLength() const;
-	int getNumDistinct() const;
-	int getMax() const;
-	bool isPositive() const;
-	bool isConstant() const;
-	bool isSorted() const;
-	void print() const;
-};
-
-@ We have a constructor allocating a given length of data, constructor
-allocating and then initializing all members to a given number, a copy
-constructor, a conversion from |vector<int>|, a subsequence
-constructor, a constructor used for calculating implied symmetry from
-a more general symmetry and one equivalence class (see |Symmetry|
-class). Finally we have a constructor which unfolds a sequence with
-respect to a given symmetry and constructor which inserts a given
-number to the ordered sequence or given number to a given position.
-
-@<|IntSequence| constructors@>=
-	IntSequence(int l)
-		: data(new int[l]), length(l), destroy(true)@+ {}	
-	IntSequence(int l, int n)
-		:  data(new int[l]), length(l), destroy(true)
-		{@+ for (int i = 0; i < length; i++) data[i] = n;@+}
-	IntSequence(const IntSequence& s)
-		: data(new int[s.length]), length(s.length), destroy(true)
-		{@+ memcpy(data, s.data, length*sizeof(int));@+}
-	IntSequence(IntSequence& s, int i1, int i2)
-		: data(s.data+i1), length(i2-i1), destroy(false)@+ {}
-	IntSequence(const IntSequence& s, int i1, int i2)
-		: data(new int[i2-i1]), length(i2-i1), destroy(true)
-		{@+ memcpy(data, s.data+i1, sizeof(int)*length);@+}
-	IntSequence(const Symmetry& sy, const vector<int>& se);
-	IntSequence(const Symmetry& sy, const IntSequence& se);
-	IntSequence(int i, const IntSequence& s);
-	IntSequence(int i, const IntSequence& s, int pos);
-	IntSequence(int l, const int* d)
-		: data(new int[l]), length(l), destroy(true)
-		{@+ memcpy(data, d, sizeof(int)*length);@+}
-
-
-@ These are clear inlines and operators.
-@<|IntSequence| inlines and operators@>=
-    const IntSequence& operator=(const IntSequence& s);
-    virtual ~IntSequence()
-		{@+ if (destroy) delete [] data;@+}
-	bool operator==(const IntSequence& s) const;
-	bool operator!=(const IntSequence& s) const
-		{@+ return ! operator==(s);@+}
-	int& operator[](int i)
-		{@+ return data[i];@+}
-	int operator[](int i) const
-		{@+ return data[i];@+}
-	int size() const
-		{@+ return length;@+}
-
-@ We provide two orderings. The first |operator<| is the linear
-lexicographic ordering, the second |less| is the non-linear Cartesian
-ordering.
-@<|IntSequence| orderings@>=
-	bool operator<(const IntSequence& s) const;
-	bool operator<=(const IntSequence& s) const
-		{@+ return (operator==(s) || operator<(s));@+}
-	bool lessEq(const IntSequence& s) const;
-	bool less(const IntSequence& s) const;
-
-
-@ End of {\tt int\_sequence.h} file.
--- a/dynare++/tl/cc/kron_prod.cc
+++ b/dynare++/tl/cc/kron_prod.cc
@ -0,0 +1,430 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "kron_prod.hh"
+#include "tl_exception.hh"
+
+#include <cstdio>
+
+/* Here we construct Kronecker product dimensions from Kronecker
+   product dimensions by picking a given matrix and all other set to
+   identity. The constructor takes dimensions of $A_1\otimes
+   A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
+   A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
+   $i$. The identity matrices must fit into the described order. See
+   header file.
+
+   We first decide what is a length of the resulting dimensions. Possible
+   length is three for $I\otimes A\otimes I$, and two for $I\otimes A$,
+   or $A\otimes I$.
+
+   Then we fork according to |i|. */
+
+KronProdDimens::KronProdDimens(const KronProdDimens &kd, int i)
+  : rows((i == 0 || i == kd.dimen()-1) ? (2) : (3)),
+    cols((i == 0 || i == kd.dimen()-1) ? (2) : (3))
+{
+  TL_RAISE_IF(i < 0 || i >= kd.dimen(),
+              "Wrong index for pickup in KronProdDimens constructor");
+
+  int kdim = kd.dimen();
+  if (i == 0)
+    {
+      // set AI dimensions
+      /* The first rows and cols are taken from |kd|. The dimensions of
+         identity matrix is a number of rows in $A_2\otimes\ldots\otimes A_n$
+         since the matrix $A_1\otimes I$ is the first. */
+      rows[0] = kd.rows[0];
+      rows[1] = kd.rows.mult(1, kdim);
+      cols[0] = kd.cols[0];
+      cols[1] = rows[1];
+    }
+  else if (i == kdim-1)
+    {
+      // set IA dimensions
+      /* The second dimension is taken from |kd|. The dimensions of identity
+         matrix is a number of columns of $A_1\otimes\ldots A_{n-1}$, since the
+         matrix $I\otimes A_n$ is the last. */
+      rows[0] = kd.cols.mult(0, kdim-1);
+      rows[1] = kd.rows[kdim-1];
+      cols[0] = rows[0];
+      cols[1] = kd.cols[kdim-1];
+    }
+  else
+    {
+      // set IAI dimensions
+      /* The dimensions of the middle matrix are taken from |kd|. The
+         dimensions of the first identity matrix are a number of columns of
+         $A_1\otimes\ldots\otimes A_{i-1}$, and the dimensions of the last
+         identity matrix are a number of rows of $A_{i+1}\otimes\ldots\otimes
+         A_n$. */
+      rows[0] = kd.cols.mult(0, i);
+      cols[0] = rows[0];
+      rows[1] = kd.rows[i];
+      cols[1] = kd.cols[i];
+      cols[2] = kd.rows.mult(i+1, kdim);
+      rows[2] = cols[2];
+    }
+}
+
+/* This raises an exception if dimensions are bad for multiplication
+   |out = in*this|. */
+
+void
+KronProd::checkDimForMult(const ConstTwoDMatrix &in, const TwoDMatrix &out) const
+{
+  int my_rows;
+  int my_cols;
+  kpd.getRC(my_rows, my_cols);
+  TL_RAISE_IF(in.nrows() != out.nrows() || in.ncols() != my_rows,
+              "Wrong dimensions for KronProd in KronProd::checkDimForMult");
+}
+
+/* Here we Kronecker multiply two given vectors |v1| and |v2| and
+   store the result in preallocated |res|. */
+
+void
+KronProd::kronMult(const ConstVector &v1, const ConstVector &v2,
+                   Vector &res)
+{
+  TL_RAISE_IF(res.length() != v1.length()*v2.length(),
+              "Wrong vector lengths in KronProd::kronMult");
+  res.zeros();
+  for (int i = 0; i < v1.length(); i++)
+    {
+      Vector sub(res, i *v2.length(), v2.length());
+      sub.add(v1[i], v2);
+    }
+}
+
+void
+KronProdAll::setMat(int i, const TwoDMatrix &m)
+{
+  matlist[i] = &m;
+  kpd.setRC(i, m.nrows(), m.ncols());
+}
+
+void
+KronProdAll::setUnit(int i, int n)
+{
+  matlist[i] = NULL;
+  kpd.setRC(i, n, n);
+}
+
+bool
+KronProdAll::isUnit() const
+{
+  int i = 0;
+  while (i < dimen() && matlist[i] == NULL)
+    i++;
+  return i == dimen();
+}
+
+/* Here we multiply $B\cdot(I\otimes A)$. If $m$ is a dimension of the
+   identity matrix, then the product is equal to
+   $B\cdot\hbox{diag}_m(A)$. If $B$ is partitioned accordingly, then the
+   result is $[B_1A, B_2A,\ldots B_mA]$.
+
+   Here, |outi| are partitions of |out|, |ini| are const partitions of
+   |in|, and |id_cols| is $m$. We employ level-2 BLAS. */
+
+void
+KronProdIA::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
+{
+  checkDimForMult(in, out);
+
+  int id_cols = kpd.cols[0];
+  ConstTwoDMatrix a(mat);
+
+  for (int i = 0; i < id_cols; i++)
+    {
+      TwoDMatrix outi(out, i *a.ncols(), a.ncols());
+      ConstTwoDMatrix ini(in, i *a.nrows(), a.nrows());
+      outi.mult(ini, a);
+    }
+}
+
+/* Here we construct |KronProdAI| from |KronProdIAI|. It is clear. */
+KronProdAI::KronProdAI(const KronProdIAI &kpiai)
+  : KronProd(KronProdDimens(2)), mat(kpiai.mat)
+{
+  kpd.rows[0] = mat.nrows();
+  kpd.cols[0] = mat.ncols();
+  kpd.rows[1] = kpiai.kpd.rows[2];
+  kpd.cols[1] = kpiai.kpd.cols[2];
+}
+
+/* Here we multiply $B\cdot(A\otimes I)$. Let the dimension of the
+   matrix $A$ be $m\times n$, the dimension of $I$ be $p$, and a number
+   of rows of $B$ be $q$. We use the fact that $B\cdot(A\otimes
+   I)=\hbox{reshape}(\hbox{reshape}(B, q, mp)\cdot A, q, np)$. This works
+   only for matrix $B$, whose storage has leading dimension equal to
+   number of rows.
+
+   For cases where the leading dimension is not equal to the number of
+   rows, we partition the matrix $A\otimes I$ to $m\times n$ square
+   partitions $a_{ij}I$. Therefore, we partition $B$ to $m$ partitions
+   $[B_1, B_2,\ldots,B_m]$. Each partition of $B$ has the same number of
+   columns as the identity matrix. If $R$ denotes the resulting matrix,
+   then it can be partitioned to $n$ partitions
+   $[R_1,R_2,\ldots,R_n]$. Each partition of $R$ has the same number of
+   columns as the identity matrix. Then we have $R_i=\sum a_{ji}B_j$.
+
+   In code, |outi| is $R_i$, |ini| is $B_j$, and |id_cols| is a dimension
+   of the identity matrix */
+
+void
+KronProdAI::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
+{
+  checkDimForMult(in, out);
+
+  int id_cols = kpd.cols[1];
+  ConstTwoDMatrix a(mat);
+
+  if (in.getLD() == in.nrows())
+    {
+      ConstTwoDMatrix in_resh(in.nrows()*id_cols, a.nrows(), in.getData().base());
+      TwoDMatrix out_resh(in.nrows()*id_cols, a.ncols(), out.getData().base());
+      out_resh.mult(in_resh, a);
+    }
+  else
+    {
+      out.zeros();
+      for (int i = 0; i < a.ncols(); i++)
+        {
+          TwoDMatrix outi(out, i *id_cols, id_cols);
+          for (int j = 0; j < a.nrows(); j++)
+            {
+              ConstTwoDMatrix ini(in, j *id_cols, id_cols);
+              outi.add(a.get(j, i), ini);
+            }
+        }
+    }
+}
+
+/* Here we multiply $B\cdot(I\otimes A\otimes I)$. If $n$ is a
+   dimension of the first identity matrix, then we multiply
+   $B\cdot\hbox{diag}_n(A\otimes I)$. So we partition $B$ and result $R$
+   accordingly, and multiply $B_i\cdot(A\otimes I)$, which is in fact
+   |KronProdAI::mult|. Note that number of columns of partitions of $B$
+   are number of rows of $A\otimes I$, and number of columns of $R$ are
+   number of columns of $A\otimes I$.
+
+   In code, |id_cols| is $n$, |akronid| is a Kronecker product object of
+   $A\otimes I$, and |in_bl_width|, and |out_bl_width| are rows and cols of
+   $A\otimes I$. */
+
+void
+KronProdIAI::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
+{
+  checkDimForMult(in, out);
+
+  int id_cols = kpd.cols[0];
+
+  KronProdAI akronid(*this);
+  int in_bl_width;
+  int out_bl_width;
+  akronid.kpd.getRC(in_bl_width, out_bl_width);
+
+  for (int i = 0; i < id_cols; i++)
+    {
+      TwoDMatrix outi(out, i *out_bl_width, out_bl_width);
+      ConstTwoDMatrix ini(in, i *in_bl_width, in_bl_width);
+      akronid.mult(ini, outi);
+    }
+}
+
+/* Here we multiply $B\cdot(A_1\otimes\ldots\otimes A_n)$. First we
+   multiply $B\cdot(A_1\otimes)$, then this is multiplied by all
+   $I\otimes A_i\otimes I$, and finally by $I\otimes A_n$.
+
+   If the dimension of the Kronecker product is only 1, then we multiply
+   two matrices in straight way and return.
+
+   The intermediate results are stored on heap pointed by |last|. A new
+   result is allocated, and then the former storage is deallocated.
+
+   We have to be careful in cases when last or first matrix is unit and
+   no calculations are performed in corresponding codes. The codes should
+   handle |last| safely also if no calcs are done. */
+
+void
+KronProdAll::mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const
+{
+  // quick copy if product is unit
+  if (isUnit())
+    {
+      out.zeros();
+      out.add(1.0, in);
+      return;
+    }
+
+  // quick zero if one of the matrices is zero
+  /* If one of the matrices is exactly zero or the |in| matrix is zero,
+     set out to zero and return */
+  bool is_zero = false;
+  for (int i = 0; i < dimen() && !is_zero; i++)
+    is_zero = matlist[i] && matlist[i]->isZero();
+  if (is_zero || in.isZero())
+    {
+      out.zeros();
+      return;
+    }
+
+  // quick multiplication if dimension is 1
+  if (dimen() == 1)
+    {
+      if (matlist[0]) // always true
+        out.mult(in, ConstTwoDMatrix(*(matlist[0])));
+      return;
+    }
+
+  int c;
+  TwoDMatrix *last = NULL;
+
+  // perform first multiplication AI
+  /* Here we have to construct $A_1\otimes I$, allocate intermediate
+     result |last|, and perform the multiplication. */
+  if (matlist[0])
+    {
+      KronProdAI akronid(*this);
+      c = akronid.kpd.ncols();
+      last = new TwoDMatrix(in.nrows(), c);
+      akronid.mult(in, *last);
+    }
+  else
+    {
+      last = new TwoDMatrix(in.nrows(), in.ncols(), in.getData().base());
+    }
+
+  // perform intermediate multiplications IAI
+  /* Here we go through all $I\otimes A_i\otimes I$, construct the
+     product, allocate new storage for result |newlast|, perform the
+     multiplication, deallocate old |last|, and set |last| to |newlast|. */
+  for (int i = 1; i < dimen()-1; i++)
+    {
+      if (matlist[i])
+        {
+          KronProdIAI interkron(*this, i);
+          c = interkron.kpd.ncols();
+          TwoDMatrix *newlast = new TwoDMatrix(in.nrows(), c);
+          interkron.mult(*last, *newlast);
+          delete last;
+          last = newlast;
+        }
+    }
+
+  // perform last multiplication IA
+  /* Here just construct $I\otimes A_n$ and perform multiplication and
+     deallocate |last|. */
+  if (matlist[dimen()-1])
+    {
+      KronProdIA idkrona(*this);
+      idkrona.mult(*last, out);
+    }
+  else
+    {
+      out = *last;
+    }
+  delete last;
+}
+
+/* This calculates a Kornecker product of rows of matrices, the row
+   indices are given by the integer sequence. The result is allocated and
+   returned. The caller is repsonsible for its deallocation. */
+
+Vector *
+KronProdAll::multRows(const IntSequence &irows) const
+{
+  TL_RAISE_IF(irows.size() != dimen(),
+              "Wrong length of row indices in KronProdAll::multRows");
+
+  Vector *last = NULL;
+  ConstVector *row;
+  vector<Vector *> to_delete;
+  for (int i = 0; i < dimen(); i++)
+    {
+      int j = dimen()-1-i;
+
+      // set |row| to the row of |j|-th matrix
+      /* If the |j|-th matrix is real matrix, then the row is constructed
+         from the matrix. It the matrix is unit, we construct a new vector,
+         fill it with zeros, than set the unit to appropriate place, and make
+         the |row| as ConstVector of this vector, which sheduled for
+         deallocation. */
+      if (matlist[j])
+        row = new ConstVector(irows[j], *(matlist[j]));
+      else
+        {
+          Vector *aux = new Vector(ncols(j));
+          aux->zeros();
+          (*aux)[irows[j]] = 1.0;
+          to_delete.push_back(aux);
+          row = new ConstVector(*aux);
+        }
+
+      // set |last| to product of |row| and |last|
+      /* If the |last| is exists, we allocate new storage, Kronecker
+         multiply, deallocate the old storage. If the |last| does not exist,
+         then we only make |last| equal to |row|. */
+      if (last)
+        {
+          Vector *newlast;
+          newlast = new Vector(last->length()*row->length());
+          kronMult(*row, ConstVector(*last), *newlast);
+          delete last;
+          last = newlast;
+        }
+      else
+        {
+          last = new Vector(*row);
+        }
+
+      delete row;
+    }
+
+  for (unsigned int i = 0; i < to_delete.size(); i++)
+    delete to_delete[i];
+
+  return last;
+}
+
+/* This permutes the matrices so that the new ordering would minimize
+   memory consumption. As shown in |@<|KronProdAllOptim| class declaration@>|,
+   we want ${m_k\over n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$,
+   where $(m_i,n_i)$ is the dimension of $A_i$. So we implement the bubble
+   sort. */
+
+void
+KronProdAllOptim::optimizeOrder()
+{
+  for (int i = 0; i < dimen(); i++)
+    {
+      int swaps = 0;
+      for (int j = 0; j < dimen()-1; j++)
+        {
+          if (((double) kpd.rows[j])/kpd.cols[j] < ((double) kpd.rows[j+1])/kpd.cols[j+1])
+            {
+              // swap dimensions and matrices at |j| and |j+1|
+              int s = kpd.rows[j+1];
+              kpd.rows[j+1] = kpd.rows[j];
+              kpd.rows[j] = s;
+              s = kpd.cols[j+1];
+              kpd.cols[j+1] = kpd.cols[j];
+              kpd.cols[j] = s;
+              const TwoDMatrix *m = matlist[j+1];
+              matlist[j+1] = matlist[j];
+              matlist[j] = m;
+
+              // project the swap to the permutation |oper|
+              s = oper.getMap()[j+1];
+              oper.getMap()[j+1] = oper.getMap()[j];
+              oper.getMap()[j] = s;
+              swaps++;
+            }
+        }
+      if (swaps == 0)
+        {
+          return;
+        }
+    }
+}
--- a/dynare++/tl/cc/kron_prod.cweb
+++ b/dynare++/tl/cc/kron_prod.cweb
@ -1,457 +0,0 @@
-@q $Id: kron_prod.cweb 1834 2008-05-18 20:23:54Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt kron\_prod.cpp} file.
-@c
-#include "kron_prod.h"
-#include "tl_exception.h"
-
-#include <cstdio>
-
-@<|KronProdDimens| constructor code@>;
-@<|KronProd::checkDimForMult| code@>;
-@<|KronProd::kronMult| code@>;
-@<|KronProdAll::setMat| code@>;
-@<|KronProdAll::setUnit| code@>;
-@<|KronProdAll::isUnit| code@>;
-@<|KronProdAll::multRows| code@>;
-@<|KronProdIA::mult| code@>;
-@<|KronProdAI| constructor code@>;
-@<|KronProdAI::mult| code@>;
-@<|KronProdIAI::mult| code@>;
-@<|KronProdAll::mult| code@>;
-@<|KronProdAllOptim::optimizeOrder| code@>;
-
-@ Here we construct Kronecker product dimensions from Kronecker
-product dimensions by picking a given matrix and all other set to
-identity. The constructor takes dimensions of $A_1\otimes
-A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
-A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
-$i$. The identity matrices must fit into the described order. See
-header file.
-
-We first decide what is a length of the resulting dimensions. Possible
-length is three for $I\otimes A\otimes I$, and two for $I\otimes A$,
-or $A\otimes I$.
-
-Then we fork according to |i|.
-
-@<|KronProdDimens| constructor code@>=
-KronProdDimens::KronProdDimens(const KronProdDimens& kd, int i)
-	: rows((i==0 || i==kd.dimen()-1)? (2):(3)),
-	  cols((i==0 || i==kd.dimen()-1)? (2):(3))
-{
-	TL_RAISE_IF(i < 0 || i >= kd.dimen(),
-				"Wrong index for pickup in KronProdDimens constructor");
-
-	int kdim = kd.dimen();
-	if (i == 0) {
-		@<set AI dimensions@>;
-	} else if (i == kdim-1){
-		@<set IA dimensions@>;
-	} else {
-		@<set IAI dimensions@>;
-	}
-}
-
-@ The first rows and cols are taken from |kd|. The dimensions of
-identity matrix is a number of rows in $A_2\otimes\ldots\otimes A_n$
-since the matrix $A_1\otimes I$ is the first.
- 
-@<set AI dimensions@>=
-rows[0] = kd.rows[0];
-rows[1] = kd.rows.mult(1, kdim);
-cols[0] = kd.cols[0];
-cols[1] = rows[1];
-
-@ The second dimension is taken from |kd|. The dimensions of identity
-matrix is a number of columns of $A_1\otimes\ldots A_{n-1}$, since the
-matrix $I\otimes A_n$ is the last.
-
-@<set IA dimensions@>=
-rows[0] = kd.cols.mult(0, kdim-1);
-rows[1] = kd.rows[kdim-1];
-cols[0] = rows[0];
-cols[1] = kd.cols[kdim-1];
-
-@ The dimensions of the middle matrix are taken from |kd|. The
-dimensions of the first identity matrix are a number of columns of
-$A_1\otimes\ldots\otimes A_{i-1}$, and the dimensions of the last
-identity matrix are a number of rows of $A_{i+1}\otimes\ldots\otimes
-A_n$.
- 
-@<set IAI dimensions@>=
-rows[0] = kd.cols.mult(0, i);
-cols[0] = rows[0];
-rows[1] = kd.rows[i];
-cols[1] = kd.cols[i];
-cols[2] = kd.rows.mult(i+1, kdim);
-rows[2] = cols[2];
-
-
-@ This raises an exception if dimensions are bad for multiplication
-|out = in*this|.
-
-@<|KronProd::checkDimForMult| code@>=
-void KronProd::checkDimForMult(const ConstTwoDMatrix& in, const TwoDMatrix& out) const
-{
-	int my_rows;
-	int my_cols;
-	kpd.getRC(my_rows, my_cols);
-	TL_RAISE_IF(in.nrows() != out.nrows() || in.ncols() != my_rows,
-				"Wrong dimensions for KronProd in KronProd::checkDimForMult");
-}
-
-@ Here we Kronecker multiply two given vectors |v1| and |v2| and
-store the result in preallocated |res|.
-
-@<|KronProd::kronMult| code@>=
-void KronProd::kronMult(const ConstVector& v1, const ConstVector& v2,
-						Vector& res)
-{
-	TL_RAISE_IF(res.length() != v1.length()*v2.length(),
-				"Wrong vector lengths in KronProd::kronMult");
-	res.zeros();
-	for (int i = 0; i < v1.length(); i++) {
-		Vector sub(res, i*v2.length(), v2.length());
-		sub.add(v1[i], v2);
-	}
-}
-
-
-@ 
-@<|KronProdAll::setMat| code@>=
-void KronProdAll::setMat(int i, const TwoDMatrix& m)
-{
-	matlist[i] = &m;
-	kpd.setRC(i, m.nrows(), m.ncols());
-}
-
-@ 
-@<|KronProdAll::setUnit| code@>=
-void KronProdAll::setUnit(int i, int n)
-{
-	matlist[i] = NULL;
-	kpd.setRC(i, n, n);
-}
-
-@ 
-@<|KronProdAll::isUnit| code@>=
-bool KronProdAll::isUnit() const
-{
-	int i = 0;
-	while (i < dimen() && matlist[i] == NULL)
-		i++; 
-	return i == dimen();
-}
-
-@ Here we multiply $B\cdot(I\otimes A)$. If $m$ is a dimension of the
-identity matrix, then the product is equal to
-$B\cdot\hbox{diag}_m(A)$. If $B$ is partitioned accordingly, then the
-result is $[B_1A, B_2A,\ldots B_mA]$.
-
-Here, |outi| are partitions of |out|, |ini| are const partitions of
-|in|, and |id_cols| is $m$. We employ level-2 BLAS.
-
-@<|KronProdIA::mult| code@>=
-void KronProdIA::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
-{
-	checkDimForMult(in, out);
-
-	int id_cols = kpd.cols[0];
-	ConstTwoDMatrix a(mat);
-
-	for (int i = 0; i < id_cols; i++) {
-		TwoDMatrix outi(out, i*a.ncols(), a.ncols());
-		ConstTwoDMatrix ini(in, i*a.nrows(), a.nrows()); 
-		outi.mult(ini, a);
-	}
-}
-
-@ Here we construct |KronProdAI| from |KronProdIAI|. It is clear.
-@<|KronProdAI| constructor code@>=
-KronProdAI::KronProdAI(const KronProdIAI& kpiai)
-	: KronProd(KronProdDimens(2)), mat(kpiai.mat)
-{
-	kpd.rows[0] = mat.nrows();
-	kpd.cols[0] = mat.ncols();
-	kpd.rows[1] = kpiai.kpd.rows[2];
-	kpd.cols[1] = kpiai.kpd.cols[2];
-}
-
-
-@ Here we multiply $B\cdot(A\otimes I)$. Let the dimension of the
-matrix $A$ be $m\times n$, the dimension of $I$ be $p$, and a number
-of rows of $B$ be $q$. We use the fact that $B\cdot(A\otimes
-I)=\hbox{reshape}(\hbox{reshape}(B, q, mp)\cdot A, q, np)$. This works
-only for matrix $B$, whose storage has leading dimension equal to
-number of rows.
-
-For cases where the leading dimension is not equal to the number of
-rows, we partition the matrix $A\otimes I$ to $m\times n$ square
-partitions $a_{ij}I$. Therefore, we partition $B$ to $m$ partitions
-$[B_1, B_2,\ldots,B_m]$. Each partition of $B$ has the same number of
-columns as the identity matrix. If $R$ denotes the resulting matrix,
-then it can be partitioned to $n$ partitions
-$[R_1,R_2,\ldots,R_n]$. Each partition of $R$ has the same number of
-columns as the identity matrix. Then we have $R_i=\sum a_{ji}B_j$.
-
-In code, |outi| is $R_i$, |ini| is $B_j$, and |id_cols| is a dimension
-of the identity matrix
- 
-@<|KronProdAI::mult| code@>=
-void KronProdAI::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
-{
-	checkDimForMult(in, out);
-
-	int id_cols = kpd.cols[1];
-	ConstTwoDMatrix a(mat);
-
-	if (in.getLD() == in.nrows()) {
-		ConstTwoDMatrix in_resh(in.nrows()*id_cols, a.nrows(), in.getData().base());
-		TwoDMatrix out_resh(in.nrows()*id_cols, a.ncols(), out.getData().base());
-		out_resh.mult(in_resh, a);
-	} else {
-		out.zeros();
-		for (int i = 0; i < a.ncols(); i++) {
-			TwoDMatrix outi(out, i*id_cols, id_cols);
-			for (int j = 0; j < a.nrows(); j++) {
-				ConstTwoDMatrix ini(in, j*id_cols, id_cols);
-				outi.add(a.get(j,i), ini);
-			}
-		}
-	}
-}
-
-
-@ Here we multiply $B\cdot(I\otimes A\otimes I)$. If $n$ is a
-dimension of the first identity matrix, then we multiply
-$B\cdot\hbox{diag}_n(A\otimes I)$. So we partition $B$ and result $R$
-accordingly, and multiply $B_i\cdot(A\otimes I)$, which is in fact
-|KronProdAI::mult|. Note that number of columns of partitions of $B$
-are number of rows of $A\otimes I$, and number of columns of $R$ are
-number of columns of $A\otimes I$.
-
-In code, |id_cols| is $n$, |akronid| is a Kronecker product object of
-$A\otimes I$, and |in_bl_width|, and |out_bl_width| are rows and cols of
-$A\otimes I$.
-
- 
-@<|KronProdIAI::mult| code@>=
-void KronProdIAI::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
-{
-	checkDimForMult(in, out);
-
-	int id_cols = kpd.cols[0];
-
-	KronProdAI akronid(*this);
-	int in_bl_width;
-	int out_bl_width;
-	akronid.kpd.getRC(in_bl_width, out_bl_width);
-
-	for (int i = 0; i < id_cols; i++) {
-		TwoDMatrix outi(out, i*out_bl_width, out_bl_width);
-		ConstTwoDMatrix ini(in, i*in_bl_width, in_bl_width);
-		akronid.mult(ini, outi);
-	}
-}
-
-@ Here we multiply $B\cdot(A_1\otimes\ldots\otimes A_n)$. First we
-multiply $B\cdot(A_1\otimes)$, then this is multiplied by all
-$I\otimes A_i\otimes I$, and finally by $I\otimes A_n$.
-
-If the dimension of the Kronecker product is only 1, then we multiply
-two matrices in straight way and return.
-
-The intermediate results are stored on heap pointed by |last|. A new
-result is allocated, and then the former storage is deallocated.
-
-We have to be careful in cases when last or first matrix is unit and
-no calculations are performed in corresponding codes. The codes should
-handle |last| safely also if no calcs are done.
- 
-@<|KronProdAll::mult| code@>=
-void KronProdAll::mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const
-{
-	@<quick copy if product is unit@>;
-	@<quick zero if one of the matrices is zero@>;
-	@<quick multiplication if dimension is 1@>;
-	int c;
-	TwoDMatrix* last = NULL;
-	@<perform first multiplication AI@>;
-	@<perform intermediate multiplications IAI@>;
-	@<perform last multiplication IA@>;
-}
-
-@ 
-@<quick copy if product is unit@>=
-	if (isUnit()) {
-		out.zeros();
-		out.add(1.0, in);
-		return;
-	}
-
-@ If one of the matrices is exactly zero or the |in| matrix is zero,
-set out to zero and return
-
-@<quick zero if one of the matrices is zero@>=
-	bool is_zero = false;
-	for (int i = 0; i < dimen() && ! is_zero; i++)
-		is_zero = matlist[i] && matlist[i]->isZero();
-	if (is_zero || in.isZero()) {
-		out.zeros();
-		return;
-	}
-
-@ 
-@<quick multiplication if dimension is 1@>=
-	if (dimen() == 1) {
-		if (matlist[0]) // always true
-			out.mult(in, ConstTwoDMatrix(*(matlist[0])));
-		return;
-	}
-
-@ Here we have to construct $A_1\otimes I$, allocate intermediate
-result |last|, and perform the multiplication.
-
-@<perform first multiplication AI@>=
-	if (matlist[0]) {
-		KronProdAI akronid(*this);
-		c = akronid.kpd.ncols();
-		last = new TwoDMatrix(in.nrows(), c);
-		akronid.mult(in, *last);
-	} else {
-		last = new TwoDMatrix(in.nrows(), in.ncols(), in.getData().base());
-	}
-
-@ Here we go through all $I\otimes A_i\otimes I$, construct the
-product, allocate new storage for result |newlast|, perform the
-multiplication, deallocate old |last|, and set |last| to |newlast|.
-
-@<perform intermediate multiplications IAI@>=
-	for (int i = 1; i < dimen()-1; i++) {
-		if (matlist[i]) {
-			KronProdIAI interkron(*this, i);
-			c = interkron.kpd.ncols();
-			TwoDMatrix* newlast = new TwoDMatrix(in.nrows(), c);
-			interkron.mult(*last, *newlast);
-			delete last;
-			last = newlast;
-		}
-	}
-
-@ Here just construct $I\otimes A_n$ and perform multiplication and
-deallocate |last|.
-
-@<perform last multiplication IA@>=
-	if (matlist[dimen()-1]) {
-		KronProdIA idkrona(*this);
-		idkrona.mult(*last, out);
-	} else {
-		out = *last;
-	}
-	delete last;
-
-@ This calculates a Kornecker product of rows of matrices, the row
-indices are given by the integer sequence. The result is allocated and
-returned. The caller is repsonsible for its deallocation.
-
-@<|KronProdAll::multRows| code@>=
-Vector* KronProdAll::multRows(const IntSequence& irows) const
-{
-	TL_RAISE_IF(irows.size() != dimen(),
-				"Wrong length of row indices in KronProdAll::multRows");
-
-	Vector* last = NULL;
-	ConstVector* row;
-	vector<Vector*> to_delete;
-	for (int i = 0; i < dimen(); i++) {
-		int j = dimen()-1-i;
-		@<set |row| to the row of |j|-th matrix@>;
-		@<set |last| to product of |row| and |last|@>;
-		delete row;
-	}
-
-	for (unsigned int i = 0; i < to_delete.size(); i++)
-		delete to_delete[i];
-
-	return last;
-}
-
-@ If the |j|-th matrix is real matrix, then the row is constructed
-from the matrix. It the matrix is unit, we construct a new vector,
-fill it with zeros, than set the unit to appropriate place, and make
-the |row| as ConstVector of this vector, which sheduled for
-deallocation.
-
-@<set |row| to the row of |j|-th matrix@>=
-	if (matlist[j])
-		row = new ConstVector(irows[j], *(matlist[j]));
-	else {
-		Vector* aux = new Vector(ncols(j));
-		aux->zeros();
-		(*aux)[irows[j]] = 1.0;
-		to_delete.push_back(aux);
-		row = new ConstVector(*aux);
-	}
-
-@ If the |last| is exists, we allocate new storage, Kronecker
-multiply, deallocate the old storage. If the |last| does not exist,
-then we only make |last| equal to |row|.
- 
-@<set |last| to product of |row| and |last|@>=
-	if (last) {
-		Vector* newlast;
-		newlast = new Vector(last->length()*row->length());
-		kronMult(*row, ConstVector(*last), *newlast);
-		delete last;
-		last = newlast;
-	} else { 
-		last = new Vector(*row);
-	}
-
-
-@ This permutes the matrices so that the new ordering would minimize
-memory consumption. As shown in |@<|KronProdAllOptim| class declaration@>|,
-we want ${m_k\over n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$,
-where $(m_i,n_i)$ is the dimension of $A_i$. So we implement the bubble
-sort.
-
-@<|KronProdAllOptim::optimizeOrder| code@>=
-void KronProdAllOptim::optimizeOrder()
-{
-	for (int i = 0; i < dimen(); i++) {
-		int swaps = 0;
-		for (int j = 0; j < dimen()-1; j++) {
-			if (((double)kpd.rows[j])/kpd.cols[j] < ((double)kpd.rows[j+1])/kpd.cols[j+1]) {
-				@<swap dimensions and matrices at |j| and |j+1|@>;
-				@<project the swap to the permutation |oper|@>;
-			}
-		}
-		if (swaps == 0) {
-			return;
-		}
-	}
-}
-
-@ 
-@<swap dimensions and matrices at |j| and |j+1|@>=
-	int s = kpd.rows[j+1];
-	kpd.rows[j+1] = kpd.rows[j];
-	kpd.rows[j] = s;
-	s = kpd.cols[j+1];
-	kpd.cols[j+1] = kpd.cols[j];
-	kpd.cols[j] = s;
-	const TwoDMatrix* m = matlist[j+1];
-	matlist[j+1] = matlist[j];
-	matlist[j] = m;
-
-@ 
-@<project the swap to the permutation |oper|@>=
-	s = oper.getMap()[j+1];
-	oper.getMap()[j+1] = oper.getMap()[j];
-	oper.getMap()[j] = s;
-	swaps++;
-
-
-@ End of {\tt kron\_prod.cpp} file.
--- a/dynare++/tl/cc/kron_prod.hh
+++ b/dynare++/tl/cc/kron_prod.hh
@ -0,0 +1,348 @@
+// Copyright 2004, Ondra Kamenik
+
+// Kronecker product.
+
+/* Here we define an abstraction for a Kronecker product of a sequence of
+   matrices. This is $A_1\otimes\ldots\otimes A_n$. Obviously we do not
+   store the product in memory. First we need to represent a dimension
+   of the Kronecker product. Then we represent the Kronecker product,
+   simply it is the Kronecker product dimension with a vector of
+   references to the matrices $A_1,\ldots, A_n$.
+
+   The main task of this class is to calculate a matrix product
+   $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_n)$ which in
+   our application has much more moderate dimensions than $A_1\otimes
+   A_2\otimes\ldots\otimes A_n$. We calculate it as
+   $$B\cdot(A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes
+   I)\cdot\ldots\cdot (I\otimes A_n)$$
+   where dimensions of identity matrices differ and are given by the
+   chosen order. One can naturally ask, whether there is some optimal
+   order minimizing maximum storage needed for intermediate
+   results. The optimal ordering is implemented by class |KronProdAllOptim|.
+
+   For this multiplication, we also need to represent products of type
+   $A\otimes I$, $I\otimes A\otimes I$, and $I\otimes A$. */
+
+#ifndef KRON_PROD_H
+#define KRON_PROD_H
+
+#include "twod_matrix.hh"
+#include "permutation.hh"
+#include "int_sequence.hh"
+
+class KronProdAll;
+class KronProdAllOptim;
+class KronProdIA;
+class KronProdIAI;
+class KronProdAI;
+
+/* |KronProdDimens| maintains a dimension of the Kronecker product. So,
+   it maintains two sequences, one for rows, and one for columns. */
+
+class KronProdDimens
+{
+  friend class KronProdAll;
+  friend class KronProdAllOptim;
+  friend class KronProdIA;
+  friend class KronProdIAI;
+  friend class KronProdAI;
+private:
+  IntSequence rows;
+  IntSequence cols;
+public:
+  /* We define three constructors. First initializes to a given
+     dimension, and all rows and cols are set to zeros. Second is a copy
+     constructor. The third constructor takes dimensions of $A_1\otimes
+     A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
+     A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
+     $i$. The dimensions of identity matrices are such that
+     $$A_1\otimes A_2\otimes\ldots\otimes A_n=
+     (A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes I)
+     \cdot\ldots\cdot(I\otimes A_n)$$
+     Note that the matrices on the right do not commute only because sizes
+     of identity matrices which are then given by this ordering. */
+  KronProdDimens(int dim)
+    : rows(dim, 0), cols(dim, 0)
+  {
+  }
+  KronProdDimens(const KronProdDimens &kd)
+    : rows(kd.rows), cols(kd.cols)
+  {
+  }
+  KronProdDimens(const KronProdDimens &kd, int i);
+
+  const KronProdDimens &
+  operator=(const KronProdDimens &kd)
+  {
+    rows = kd.rows; cols = kd.cols; return *this;
+  }
+  bool
+  operator==(const KronProdDimens &kd) const
+  {
+    return rows == kd.rows && cols == kd.cols;
+  }
+
+  int
+  dimen() const
+  {
+    return rows.size();
+  }
+  void
+  setRC(int i, int r, int c)
+  {
+    rows[i] = r; cols[i] = c;
+  }
+  void
+  getRC(int i, int &r, int &c) const
+  {
+    r = rows[i]; c = cols[i];
+  }
+  void
+  getRC(int &r, int &c) const
+  {
+    r = rows.mult(); c = cols.mult();
+  }
+  int
+  nrows() const
+  {
+    return rows.mult();
+  }
+  int
+  ncols() const
+  {
+    return cols.mult();
+  }
+  int
+  nrows(int i) const
+  {
+    return rows[i];
+  }
+  int
+  ncols(int i) const
+  {
+    return cols[i];
+  }
+};
+
+/* Here we define an abstract class for all Kronecker product classes,
+   which are |KronProdAll| (the most general), |KronProdIA| (for
+   $I\otimes A$), |KronProdAI| (for $A\otimes I$), and |KronProdIAI| (for
+   $I\otimes A\otimes I$). The purpose of the super class is to only
+   define some common methods and common member |kpd| for dimensions and
+   declare pure virtual |mult| which is implemented by the subclasses.
+
+   The class also contains a static method |kronMult|, which calculates a
+   Kronecker product of two vectors and stores it in the provided
+   vector. It is useful at a few points of the library. */
+
+class KronProd
+{
+protected:
+  KronProdDimens kpd;
+public:
+  KronProd(int dim)
+    : kpd(dim)
+  {
+  }
+  KronProd(const KronProdDimens &kd)
+    : kpd(kd)
+  {
+  }
+  KronProd(const KronProd &kp)
+    : kpd(kp.kpd)
+  {
+  }
+  virtual ~KronProd()
+  {
+  }
+
+  int
+  dimen() const
+  {
+    return kpd.dimen();
+  }
+
+  virtual void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const = 0;
+  void
+  mult(const TwoDMatrix &in, TwoDMatrix &out) const
+  {
+    mult(ConstTwoDMatrix(in), out);
+  }
+
+  void checkDimForMult(const ConstTwoDMatrix &in, const TwoDMatrix &out) const;
+  void
+  checkDimForMult(const TwoDMatrix &in, const TwoDMatrix &out) const
+  {
+    checkDimForMult(ConstTwoDMatrix(in), out);
+  }
+
+  static void kronMult(const ConstVector &v1, const ConstVector &v2,
+                       Vector &res);
+
+  int
+  nrows() const
+  {
+    return kpd.nrows();
+  }
+  int
+  ncols() const
+  {
+    return kpd.ncols();
+  }
+  int
+  nrows(int i) const
+  {
+    return kpd.nrows(i);
+  }
+  int
+  ncols(int i) const
+  {
+    return kpd.ncols(i);
+  }
+};
+
+/* |KronProdAll| is a main class of this file. It represents the
+   Kronecker product $A_1\otimes A_2\otimes\ldots\otimes A_n$. Besides
+   dimensions, it stores pointers to matrices in |matlist| array. If a
+   pointer is null, then the matrix is considered to be unit. The array
+   is set by calls to |setMat| method (for real matrices) or |setUnit|
+   method (for unit matrices).
+
+   The object is constructed by a constructor, which allocates the
+   |matlist| and initializes dimensions to zeros. Then a caller must feed
+   the object with matrices by calling |setMat| and |setUnit| repeatedly
+   for different indices.
+
+   We implement the |mult| method of |KronProd|, and a new method
+   |multRows|, which creates a vector of kronecker product of all rows of
+   matrices in the object. The rows are given by the |IntSequence|. */
+
+class KronProdAll : public KronProd
+{
+  friend class KronProdIA;
+  friend class KronProdIAI;
+  friend class KronProdAI;
+protected:
+  const TwoDMatrix **const matlist;
+public:
+  KronProdAll(int dim)
+    : KronProd(dim), matlist(new const TwoDMatrix *[dim])
+  {
+  }
+  virtual ~KronProdAll()
+  {
+    delete [] matlist;
+  }
+  void setMat(int i, const TwoDMatrix &m);
+  void setUnit(int i, int n);
+  const TwoDMatrix &
+  getMat(int i) const
+  {
+    return *(matlist[i]);
+  }
+
+  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
+  Vector *multRows(const IntSequence &irows) const;
+private:
+  bool isUnit() const;
+};
+
+/* The class |KronProdAllOptim| minimizes memory consumption of the
+   product $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_k)$. The
+   optimization is done by reordering of the matrices $A_1,\ldots,A_k$,
+   in order to minimize a sum of all storages needed for intermediate
+   results. The optimal ordering is also nearly optimal with respect to
+   number of flops.
+
+   Let $(m_i,n_i)$ be dimensions of $A_i$. It is easy to observe, that
+   for $i$-th step we need storage of $r\cdot n_1\cdot\ldots\cdot
+   n_i\cdot m_{i+1}\cdot\ldots\cdot m_k$, where $r$ is a number of rows
+   of $B$. To minimize the sum through all $i$ over all permutations of
+   matrices, it is equivalent to minimize the sum
+   $\sum_{i=1}^k{m_{i+1}\cdot\ldots\cdot m_k\over n_{i+1}\cdot\ldots\cdot
+   n_k}$. The optimal ordering will yield ${m_k\over
+   n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$.
+
+   Now observe, that the number of flops for $i$-th step is $r\cdot
+   n_1\cdot\ldots\cdot n_i\cdot m_i\cdot\ldots\cdot m_k$. In order to
+   minimize a number of flops, it is equivalent to minimize
+   $\sum_{i=1}^km_i{m_{i+1}\cdot\ldots\cdot m_k\over
+   n_{i+1}\cdot\ldots\cdot n_k}$. Note that, normally, the $m_i$ does not
+   change as much as $n_{j+1},\ldots,n_k$, so the ordering minimizing the
+   memory will be nearly optimal with respect to number of flops.
+
+   The class |KronProdAllOptim| inherits from |KronProdAll|. A public
+   method |optimizeOrder| does the reordering. The permutation is stored
+   in |oper|. So, as long as |optimizeOrder| is not called, the class is
+   equivalent to |KronProdAll|. */
+
+class KronProdAllOptim : public KronProdAll
+{
+protected:
+  Permutation oper;
+public:
+  KronProdAllOptim(int dim)
+    : KronProdAll(dim), oper(dim)
+  {
+  }
+  void optimizeOrder();
+  const Permutation &
+  getPer() const
+  {
+    return oper;
+  }
+};
+
+/* This class represents $I\otimes A$. We have only one reference to
+   the matrix, which is set by constructor. */
+
+class KronProdIA : public KronProd
+{
+  friend class KronProdAll;
+  const TwoDMatrix &mat;
+public:
+  KronProdIA(const KronProdAll &kpa)
+    : KronProd(KronProdDimens(kpa.kpd, kpa.dimen()-1)),
+      mat(kpa.getMat(kpa.dimen()-1))
+  {
+  }
+  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
+};
+
+/* This class represents $A\otimes I$. We have only one reference to
+   the matrix, which is set by constructor. */
+
+class KronProdAI : public KronProd
+{
+  friend class KronProdIAI;
+  friend class KronProdAll;
+  const TwoDMatrix &mat;
+public:
+  KronProdAI(const KronProdAll &kpa)
+    : KronProd(KronProdDimens(kpa.kpd, 0)),
+      mat(kpa.getMat(0))
+  {
+  }
+  KronProdAI(const KronProdIAI &kpiai);
+
+  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
+};
+
+/* This class represents $I\otimes A\otimes I$. We have only one reference to
+   the matrix, which is set by constructor. */
+
+class KronProdIAI : public KronProd
+{
+  friend class KronProdAI;
+  friend class KronProdAll;
+  const TwoDMatrix &mat;
+public:
+  KronProdIAI(const KronProdAll &kpa, int i)
+    : KronProd(KronProdDimens(kpa.kpd, i)),
+      mat(kpa.getMat(i))
+  {
+  }
+  void mult(const ConstTwoDMatrix &in, TwoDMatrix &out) const;
+};
+
+#endif
--- a/dynare++/tl/cc/kron_prod.hweb
+++ b/dynare++/tl/cc/kron_prod.hweb
@ -1,296 +0,0 @@
-@q $Id: kron_prod.hweb 2269 2008-11-23 14:33:22Z michel $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Kronecker product. Start of {\tt kron\_prod.h} file. 
-
-Here we define an abstraction for a Kronecker product of a sequence of
-matrices. This is $A_1\otimes\ldots\otimes A_n$. Obviously we do not
-store the product in memory. First we need to represent a dimension
-of the Kronecker product. Then we represent the Kronecker product,
-simply it is the Kronecker product dimension with a vector of
-references to the matrices $A_1,\ldots, A_n$.
-
-The main task of this class is to calculate a matrix product
-$B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_n)$ which in
-our application has much more moderate dimensions than $A_1\otimes
-A_2\otimes\ldots\otimes A_n$. We calculate it as
-$$B\cdot(A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes
-I)\cdot\ldots\cdot (I\otimes A_n)$$
-where dimensions of identity matrices differ and are given by the
-chosen order. One can naturally ask, whether there is some optimal
-order minimizing maximum storage needed for intermediate
-results. The optimal ordering is implemented by class |KronProdAllOptim|.
-
-For this multiplication, we also need to represent products of type
-$A\otimes I$, $I\otimes A\otimes I$, and $I\otimes A$.
-
-@s KronProdDimens int
-@s KronProd int
-
-@c
-
-#ifndef KRON_PROD_H
-#define KRON_PROD_H
-
-#include "twod_matrix.h"
-#include "permutation.h"
-#include "int_sequence.h"
-
-class KronProdAll;
-class KronProdAllOptim;
-class KronProdIA;
-class KronProdIAI;
-class KronProdAI;
-
-@<|KronProdDimens| class declaration@>;
-@<|KronProd| class declaration@>;
-@<|KronProdAll| class declaration@>;
-@<|KronProdAllOptim| class declaration@>;
-@<|KronProdIA| class declaration@>;
-@<|KronProdAI| class declaration@>;
-@<|KronProdIAI| class declaration@>;
-
-#endif
-
-@ |KronProdDimens| maintains a dimension of the Kronecker product. So,
-it maintains two sequences, one for rows, and one for columns.
-
-@<|KronProdDimens| class declaration@>=
-class KronProdDimens {
-	friend class KronProdAll;
-	friend class KronProdAllOptim;
-	friend class KronProdIA;
-	friend class KronProdIAI;
-	friend class KronProdAI;
-private:@;
-	IntSequence rows;
-	IntSequence cols;
-public:@;
-	@<|KronProdDimens| constructors@>;
-	@<|KronProdDimens| inline operators@>;
-	@<|KronProdDimens| inline methods@>;
-};
-
-@ We define three constructors. First initializes to a given
-dimension, and all rows and cols are set to zeros. Second is a copy
-constructor. The third constructor takes dimensions of $A_1\otimes
-A_2\otimes\ldots\otimes A_n$, and makes dimensions of $I\otimes
-A_i\otimes I$, or $I\otimes A_n$, or $A_1\otimes I$ for a given
-$i$. The dimensions of identity matrices are such that
-$$A_1\otimes A_2\otimes\ldots\otimes A_n=
-(A_1\otimes I)\cdot\ldots\cdot(I\otimes A_i\otimes I)
-\cdot\ldots\cdot(I\otimes A_n)$$
-Note that the matrices on the right do not commute only because sizes
-of identity matrices which are then given by this ordering.
-
-@<|KronProdDimens| constructors@>=
-	KronProdDimens(int dim)
-		: rows(dim,0), cols(dim, 0)@+ {}
-	KronProdDimens(const KronProdDimens& kd)
-		: rows(kd.rows), cols(kd.cols)@+ {}
-	KronProdDimens(const KronProdDimens& kd, int i);
-
-@ 
-@<|KronProdDimens| inline operators@>=
-	const KronProdDimens& operator=(const KronProdDimens& kd)
-		{@+ rows = kd.rows;@+ cols = kd.cols;@+ return *this;@+}
-	bool operator==(const KronProdDimens& kd) const
-		{@+ return rows == kd.rows && cols == kd.cols;@+}
-
-@ 
-@<|KronProdDimens| inline methods@>=
-	int dimen() const
-		{@+ return rows.size();@+}
-	void setRC(int i, int r, int c)
-		{@+ rows[i] = r;@+ cols[i] = c;@+}
-	void getRC(int i, int& r, int& c) const
-		{@+ r = rows[i];@+ c = cols[i];@+}
-	void getRC(int& r, int& c) const
-		{@+ r = rows.mult();@+ c = cols.mult();@+}
-	int nrows() const
-		{@+ return rows.mult();@+}
-	int ncols() const
-		{@+ return cols.mult();@+}
-	int nrows(int i) const
-		{@+ return rows[i];@+}
-	int ncols(int i) const
-		{@+ return cols[i];@+}
-
-@ Here we define an abstract class for all Kronecker product classes,
-which are |KronProdAll| (the most general), |KronProdIA| (for
-$I\otimes A$), |KronProdAI| (for $A\otimes I$), and |KronProdIAI| (for
-$I\otimes A\otimes I$). The purpose of the super class is to only
-define some common methods and common member |kpd| for dimensions and
-declare pure virtual |mult| which is implemented by the subclasses.
-
-The class also contains a static method |kronMult|, which calculates a
-Kronecker product of two vectors and stores it in the provided
-vector. It is useful at a few points of the library.
-
-@<|KronProd| class declaration@>=
-class KronProd {
-protected:@/
-	KronProdDimens kpd;
-public:@/
-	KronProd(int dim)
-		: kpd(dim)@+ {}
-	KronProd(const KronProdDimens& kd)
-		: kpd(kd)@+ {}
-	KronProd(const KronProd& kp)
-		: kpd(kp.kpd)@+ {}
-	virtual ~KronProd()@+ {}
-
-	int dimen() const
-		{@+ return kpd.dimen();@+}
-
-	virtual void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const =0;
-	void mult(const TwoDMatrix& in, TwoDMatrix& out) const
-		{@+ mult(ConstTwoDMatrix(in), out);@+}
-
-	void checkDimForMult(const ConstTwoDMatrix& in, const TwoDMatrix& out) const;
-	void checkDimForMult(const TwoDMatrix& in, const TwoDMatrix& out) const
-		{@+ checkDimForMult(ConstTwoDMatrix(in), out);@+}
-
-	static void kronMult(const ConstVector& v1, const ConstVector& v2,
-						 Vector& res);
-
-	int nrows() const
-		{@+ return kpd.nrows();@+}
-	int ncols() const
-		{@+ return kpd.ncols();@+}
-	int nrows(int i) const
-		{@+ return kpd.nrows(i);@+}
-	int ncols(int i) const
-		{@+ return kpd.ncols(i);@+}
-};
-
-@ |KronProdAll| is a main class of this file. It represents the
-Kronecker product $A_1\otimes A_2\otimes\ldots\otimes A_n$. Besides
-dimensions, it stores pointers to matrices in |matlist| array. If a
-pointer is null, then the matrix is considered to be unit. The array
-is set by calls to |setMat| method (for real matrices) or |setUnit|
-method (for unit matrices).
-
-The object is constructed by a constructor, which allocates the
-|matlist| and initializes dimensions to zeros. Then a caller must feed
-the object with matrices by calling |setMat| and |setUnit| repeatedly
-for different indices.
-
-We implement the |mult| method of |KronProd|, and a new method
-|multRows|, which creates a vector of kronecker product of all rows of
-matrices in the object. The rows are given by the |IntSequence|.
-
-@<|KronProdAll| class declaration@>=
-class KronProdAll : public KronProd {
-	friend class KronProdIA;
-	friend class KronProdIAI;
-	friend class KronProdAI;
-protected:@;
-	const TwoDMatrix** const matlist;
-public:@;
-	KronProdAll(int dim)
-		: KronProd(dim), matlist(new const TwoDMatrix*[dim])@+ {}
-	virtual ~KronProdAll()
-		{@+ delete [] matlist;@+}
-	void setMat(int i, const TwoDMatrix& m);
-	void setUnit(int i, int n);
-	const TwoDMatrix& getMat(int i) const
-		{@+ return *(matlist[i]);@+}
-
-	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
-	Vector* multRows(const IntSequence& irows) const;
-private:@;
-	bool isUnit() const;
-};
-
-@ The class |KronProdAllOptim| minimizes memory consumption of the
-product $B\cdot(A_1\otimes A_2\otimes\ldots\otimes A_k)$. The
-optimization is done by reordering of the matrices $A_1,\ldots,A_k$,
-in order to minimize a sum of all storages needed for intermediate
-results. The optimal ordering is also nearly optimal with respect to
-number of flops.
-
-Let $(m_i,n_i)$ be dimensions of $A_i$. It is easy to observe, that
-for $i$-th step we need storage of $r\cdot n_1\cdot\ldots\cdot
-n_i\cdot m_{i+1}\cdot\ldots\cdot m_k$, where $r$ is a number of rows
-of $B$. To minimize the sum through all $i$ over all permutations of
-matrices, it is equivalent to minimize the sum
-$\sum_{i=1}^k{m_{i+1}\cdot\ldots\cdot m_k\over n_{i+1}\cdot\ldots\cdot
-n_k}$. The optimal ordering will yield ${m_k\over
-n_k}\leq{m_{k-1}\over n_{k-1}}\ldots\leq{m_1\over n_1}$.
-
-Now observe, that the number of flops for $i$-th step is $r\cdot
-n_1\cdot\ldots\cdot n_i\cdot m_i\cdot\ldots\cdot m_k$. In order to
-minimize a number of flops, it is equivalent to minimize
-$\sum_{i=1}^km_i{m_{i+1}\cdot\ldots\cdot m_k\over
-n_{i+1}\cdot\ldots\cdot n_k}$. Note that, normally, the $m_i$ does not
-change as much as $n_{j+1},\ldots,n_k$, so the ordering minimizing the
-memory will be nearly optimal with respect to number of flops.
-
-The class |KronProdAllOptim| inherits from |KronProdAll|. A public
-method |optimizeOrder| does the reordering. The permutation is stored
-in |oper|. So, as long as |optimizeOrder| is not called, the class is
-equivalent to |KronProdAll|.
-
-@<|KronProdAllOptim| class declaration@>=
-class KronProdAllOptim : public KronProdAll {
-protected:@;
-	Permutation oper;
-public:@;
-	KronProdAllOptim(int dim)
-		: KronProdAll(dim), oper(dim) @+ {}
-	void optimizeOrder();
-	const Permutation& getPer() const
-		{@+ return oper; @+}
-};
-
-@ This class represents $I\otimes A$. We have only one reference to
-the matrix, which is set by constructor.
-
-@<|KronProdIA| class declaration@>=
-class KronProdIA : public KronProd {
-	friend class KronProdAll;
-	const TwoDMatrix& mat;
-public:@/
-	KronProdIA(const KronProdAll& kpa)
-		: KronProd(KronProdDimens(kpa.kpd, kpa.dimen()-1)),
-		  mat(kpa.getMat(kpa.dimen()-1))
-		{}
-	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
-};
-
-@ This class represents $A\otimes I$. We have only one reference to
-the matrix, which is set by constructor.
-
-@<|KronProdAI| class declaration@>=
-class KronProdAI : public KronProd {
-	friend class KronProdIAI;
-	friend class KronProdAll;
-	const TwoDMatrix& mat;
-public:@/
-	KronProdAI(const KronProdAll& kpa)
-		: KronProd(KronProdDimens(kpa.kpd, 0)),
-		  mat(kpa.getMat(0))
-		{}
-	KronProdAI(const KronProdIAI& kpiai);
-
-	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
-};
-
-@ This class represents $I\otimes A\otimes I$. We have only one reference to
-the matrix, which is set by constructor.
-@<|KronProdIAI| class declaration@>=
-class KronProdIAI : public KronProd {
-	friend class KronProdAI;
-	friend class KronProdAll;
-	const TwoDMatrix& mat;
-public:@/
-	KronProdIAI(const KronProdAll& kpa, int i)
-		: KronProd(KronProdDimens(kpa.kpd, i)),
-		  mat(kpa.getMat(i))
-		{}
-	void mult(const ConstTwoDMatrix& in, TwoDMatrix& out) const;
-};
-
-
-@ End of {\tt kron\_prod.h} file. 
--- a/dynare++/tl/cc/main.web
+++ b/dynare++/tl/cc/main.web
@ -1,387 +0,0 @@
-@q $Id: main.web 2338 2009-01-14 10:40:30Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@q cwebmac.tex defines its own \ifpdf, which is incompatible with the @>
-@q \ifpdf defined by eplain, so undefine it @>
-\let\ifpdf\relax
-\input eplain
-
-@q now define \ifpdf to be always false: PDF macros of cwebmac are buggy @>
-\newif\ifpdf
-\iffalse\fi
-
-\def\title{{\mainfont Tensor Library}}
-
-
-@i ../../c++lib.w
-@s const_reverse_iterator int
-@s value_type int
-
-\titletrue
-\null\vfill
-\centerline{\titlefont Multidimensional Tensor Library}
-\vskip\baselineskip
-\centerline{\vtop{\hsize=10cm\leftskip=0pt plus 1fil
-  \rightskip=0pt plus 1fil\noindent
-	primary use in perturbation methods for Stochastic
-	Dynamic General Equilibrium (SDGE) models}}
-\vfill\vfill
-Copyright \copyright\ 2004 by Ondra Kamenik
-
-@*1 Library overview.
-
-The design of the library was driven by the needs of perturbation
-methods for solving Stochastic Dynamic General Equilibrium models. The
-aim of the library is not to provide an exhaustive interface to
-multidimensional linear algebra. The tensor library's main purposes
-include:
-\unorderedlist
-
-\li Define types for tensors, for a multidimensional index of a
-tensor, and types for folded and unfolded tensors. The tensors defined
-here have only one multidimensional index and one reserved
-one-dimensional index. The tensors should allow modelling of higher
-order derivatives with respect to a few vectors with different sizes
-(for example $\left[g_{y^2u^3}\right]$). The tensors should allow
-folded and unfolded storage modes and conversion between them. A
-folded tensor stores symmetric elements only once, while an unfolded
-stores data as a whole multidimensional cube.
-
-\li Define both sparse and dense tensors. We need only one particular
-type of sparse tensor. This in contrast to dense tensors, where we
-need much wider family of types.
-
-\li Implement the Faa Di Bruno multidimensional formula. So, the main
-purpose of the library is to implement the following step of Faa Di Bruno:
-$$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}
-=\left[h_{y^l}\right]_{\gamma_1\ldots\gamma_l}
-\left(\sum_{c\in M_{l,k}}
-\prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}\right)$$
-where $s$ can be a compound vector of variables, $M_{l,k}$ is a set of
-all equivalences of $k$ element set having $l$ classes, $c_m$ is
-$m$-th class of equivalence $c$, and $c_m(\alpha)$ is a tuple of
-picked indices from $\alpha$ by class $c_m$.
-
-Note that the sparse tensors play a role of $h$ in the Faa Di Bruno, not
-of $B$ nor $g$.
-
-\endunorderedlist
-
-The following table is a road-map to various abstractions in the library.
-
-\def\defloc#1#2{#1\hfill\break{\tt #2}}
-
-\noindent
-\halign to\hsize{%
-\vtop{\hsize=6.6cm\rightskip=0pt plus 1fil\noindent #}&
-\vtop{\advance\hsize by-6.6cm%
-      \raggedright\noindent\vrule width 0pt height 14pt #}\cr
-Class defined in & Purpose\cr
-\noalign{\hrule}\cr
-\defloc{|@<|Tensor| class declaration@>|}{tensor.hweb}&
-Virtual base class for all dense tensors, defines |index| as the
-multidimensonal iterator
-\cr
-\defloc{|@<|FTensor| class declaration@>|}{tensor.hweb}&
-Virtual base class for all folded tensors
-\cr
-\defloc{|@<|UTensor| class declaration@>|}{tensor.hweb}&
-Virtual base class for all unfolded tensors
-\cr
-\defloc{|@<|FFSTensor| class declaration@>|}{fs\_tensor.hweb}&
-Class representing folded full symmetry dense tensor,
-for instance $\left[g_{y^3}\right]$
-\cr
-\defloc{|@<|FGSTensor| class declaration@>|}{gs\_tensor.hweb}&
-Class representing folded general symmetry dense tensor,
-for instance $\left[g_{y^2u^3}\right]$
-\cr
-\defloc{|@<|UFSTensor| class declaration@>|}{fs\_tensor.hweb}&
-Class representing unfolded full symmetry dense tensor,
-for instance $\left[g_{y^3}\right]$
-\cr
-\defloc{|@<|UGSTensor| class declaration@>|}{gs\_tensor.hweb}&
-Class representing unfolded general symmetry dense tensor,
-for instance $\left[g_{y^2u^3}\right]$
-\cr
-|@<|URTensor| class declaration@>|\hfill\break
-\defloc{|@<|FRTensor| class declaration@>|}{rfs\_tensor.hweb}&
-Class representing unfolded/folded full symmetry, row-orient\-ed,
-dense tensor. Row-oriented tensors are used in the Faa Di Bruno
-above as some part (few or one column) of a product of $g$'s. Their
-fold/unfold conversions are special in such a way, that they must
-yield equivalent results if multiplied with folded/unfolded
-column-oriented counterparts.
-\cr
-|@<|URSingleTensor| class declaration@>|\hfill\break
-\defloc{|@<|FRSingleTensor| class declaration@>|}{rfs\_tensor.hweb}&
-Class representing unfolded/folded full symmetry, row-orient\-ed,
-single column, dense tensor. Besides use in the Faa Di Bruno, the
-single column row oriented tensor models also higher moments of normal
-distribution.
-\cr
-\defloc{|@<|UPSTensor| class declaration@>|}{ps\_tensor.hweb}&
-Class representing unfolded, column-orient\-ed tensor whose symmetry
-is not that of the $\left[B_{y^2u^3}\right]$ but rather of something
-as $\left[B_{yuuyu}\right]$. This tensor evolves during the product
-operation for unfolded tensors and its basic operation is to add
-itself to a tensor with nicer symmetry, here $\left[B_{y^2u^3}\right]$.
-\cr
-\defloc{|@<|FPSTensor| class declaration@>|}{ps\_tensor.hweb}&
-Class representing partially folded, column-orient\-ed tensor who\-se
-symmetry is not that of the $\left[B_{y^3u^4}\right]$ but rather
-something as $\left[B_{yu\vert y^3u\vert u^4}\right]$, where the
-portions of symmetries represent folded dimensions which are combined
-in unfolded manner. This tensor evolves during the Faa Di Bruno
-for folded tensors and its basic operation is to add itself to a
-tensor with nicer symmetry, here folded $\left[B_{y^3u^4}\right]$.
-\cr
-\defloc{|@<|USubTensor| class declaration@>|}{pyramid\_prod.hweb}&
-Class representing unfolded full symmetry, row-orient\-ed tensor which
-contains a few columns of huge product
-$\prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}$. This is
-needed during the Faa Di Bruno for folded matrices.
-\cr
-\defloc{|@<|IrregTensor| class declaration@>|}{pyramid2\_prod.hweb}&
-Class representing a product of columns of derivatives
-$\left[z_{y^ku^l}\right]$, where $z=[y^T,v^T,w^T]^T$. Since the first
-part of $z$ is $y$, the derivatives contain many zeros, which are not
-stored, hence the tensor's irregularity. The tensor is used when
-calculating one step of Faa Di Bruno formula, i.e.
-$\left[f_{z^l}\right]\sum\prod_{m=1}^l\left[z_{c_m}\right]^{\gamma_m}_{c_m(\alpha)}$.
-\cr
-\defloc{|@<|FSSparseTensor| class declaration@>|}{sparse\_tensor.hweb}&
-Class representing full symmetry, column-oriented, sparse tensor. It
-is able to store elements keyed by the multidimensional index, and
-multiply itself with one column of row-oriented tensor.
-\cr
-\defloc{|@<|FGSContainer| class declaration@>|}{t\_container.hweb}&
-Container of |FGSTensor|s. It implements the Faa Di Bruno with
-unfolded or folded tensor $h$ yielding folded $B$. The methods are
-|FGSContainer::multAndAdd|.
-\cr
-\defloc{|@<|UGSContainer| class declaration@>|}{t\_container.hweb}&
-Container of |FGSTensor|s. It implements the Faa Di Bruno with
-unfolded tensor $h$ yielding unfolded $B$. The method is
-|UGSContainer::multAndAdd|.
-\cr
-\defloc{|@<|StackContainerInterface| class declaration@>|}
-{stack\_container.hweb}&Virtual pure interface describing all logic
-of stacked containers for which we will do the Faa Di Bruno operation. 
-\cr
-\defloc{|@<|UnfoldedStackContainer| class declaration@>|}
-{stack\_container.hweb}&Implements the Faa Di Bruno operation for stack of
-containers of unfolded tensors.
-\cr
-\defloc{|@<|FoldedStackContainer| class declaration@>|}{stack\_container.hweb}
-&Implements the Faa Di Bruno for stack of
-containers of fold\-ed tensors.
-\cr
-\defloc{|@<|ZContainer| class declaration@>|}{stack\_container.hweb}&
-The class implements the interface |StackContainerInterface| according
-to $z$ appearing in context of SDGE models. By a simple inheritance,
-we obtain |@<|UnfoldedZContainer| class declaration@>| and also
-|@<|FoldedZContainer| class declaration@>|.
-\cr
-\defloc{|@<|GContainer| class declaration@>|}{stack\_container.hweb}&
-The class implements the interface |StackContainerInterface| according
-to $G$ appearing in context of SDGE models. By a simple inheritance,
-we obtain |@<|UnfoldedGContainer| class declaration@>| and also
-|@<|FoldedGContainer| class declaration@>|.
-\cr
-\defloc{|@<|Equivalence| class declaration@>|}{equivalence.hweb}&
-The class represents an equivalence on $n$-element set. Useful in the
-Faa Di Bruno.
-\cr
-\defloc{|@<|EquivalenceSet| class declaration@>|}{equivalence.hweb}&
-The class representing all equivalences on $n$-element set. Useful in the
-Faa Di Bruno.
-\cr
-\defloc{|@<|Symmetry| class declaration@>|}{symmetry.hweb}&
-The class defines a symmetry of general symmetry tensor. This is it
-defines a basic shape of the tensor. For $\left[B_{y^2u^3}\right]$,
-the symmetry is $y^2u^3$.
-\cr
-\defloc{|@<|Permutation| class declaration@>|}{permutation.hweb}&
-The class represents a permutation of $n$ indices. Useful in the
-Faa Di Bruno.
-\cr
-\defloc{|@<|IntSequence| class declaration@>|}{int\_sequence.hweb}&
-The class represents a sequence of integers. Useful everywhere.
-\cr
-|@<|TwoDMatrix| class declaration@>|\hfill\break
-\defloc{|@<|ConstTwoDMatrix| class declaration@>|}{twod\_matrix.hweb}&
-The class provides an interface to a code handling two-di\-men\-si\-onal
-matrices. The code resides in Sylvester module, in directory {\tt
-sylv/cc}. The object files from that directory need to be linked: {\tt
-GeneralMatrix.o}, {\tt Vector.o} and {\tt SylvException.o}. There is
-no similar interface to |Vector| and |ConstVector| classes from the
-Sylvester module and they are used directly.
-\cr
-\defloc{|@<|KronProdAll| class declaration@>|}{kron\_prod.hweb}&
-The class represents a Kronecker product of a sequence of arbitrary
-matrices and is able to multiply a matrix from the right without
-storing the Kronecker product in memory.
-\cr
-\defloc{|@<|KronProdAllOptim| class declaration@>|}{kron\_prod.hweb}&
-The same as |KronProdAll| but it optimizes the order of matrices in
-the product to minimize the used memory during the Faa Di Bruno
-operation. Note that it is close to optimal flops.
-\cr
-|@<|FTensorPolynomial| class declaration@>|\hfill\break
-\defloc{|@<|UTensorPolynomial| class declaration@>|}{t\_polynomial.hweb}&
-Abstractions representing a polynomial whose coefficients are
-folded/unfolded tensors and variable is a column vector. The classes
-provide methods for traditional and horner-like polynomial
-evaluation. This is useful in simulation code.
-\cr
-|@<|FNormalMoments| class declaration@>|\hfill\break
-\defloc{|@<|UNormalMoments| class declaration@>|}{normal\_moments.hweb}&
-These are containers for folded/unfolded single column tensors for
-higher moments of normal distribution. The code contains an algorithm
-for generating the moments for arbitrary covariance matrix.
-\cr
-\defloc{|@<|TLStatic| class declaration@>|}{tl\_static.hweb}&
-The class encapsulates all static information needed for the
-library. It includes a Pascal triangle (for quick computation of
-binomial coefficients), and precalculated equivalence sets.
-\cr
-\defloc{|@<|TLException| class definition@>|}{tl\_exception.hweb}&
-Simple class thrown as an exception.
-\cr
-}
-
-@s Tensor int
-@s FTensor int
-@s UTensor int
-@s FFSTensor int
-@s UFSTensor int
-@s FGSTensor int
-@s UGSTensor int
-@s FRTensor int
-@s URTensor int
-@s FRSingleTensor int
-@s URSingleTensor int
-@s UPSTensor int
-@s UGSContainer int
-@s ZContainer int
-@s GContainer int
-@s StackContainerInterface int
-@s FoldedStackContainer int
-@s UnfoldedStackContainer int
-@s FoldedZContainer int
-@s UnfoldedZContainer int
-@s FoldedGContainer int
-@s UnfoldedGContainer int
-@s Permutation int
-@s KronProdAll int
-@s KronProdAllOptim int
-@s FTensorPolynomial int
-@s UTensorPolynomial int
-@s FNormalMoments int
-@s UNormalMoments int
-@s TLStatic int
-@s FSSparseTensor int
-@ The tensor library is multi-threaded. This means, if appropriate
-compilation options were set, some codes are launched
-concurrently. This boosts the performance on SMP machines or single
-processors with hyper-threading support. The basic property of the
-thread implementation in the library is that we do not allow running
-more concurrent threads than the preset limit. This prevents threads
-from competing for memory in such a way that the OS constantly switches
-among threads with frequent I/O for swaps. This may occur since one
-thread might need much own memory. The threading support allows for
-detached threads, the synchronization points during the Faa Di Bruno
-operation are relatively short, so the resulting load is close to the
-preset maximum number parallel threads.
-
-@ A few words to the library's test suite. The suite resides in
-directory {\tt tl/testing}. There is a file {\tt tests.cpp} which
-contains all tests and {\tt main()} function. Also there are files
-{\tt factory.h} and {\tt factory.cpp} implementing random generation
-of various objects. The important property of these random objects is
-that they are the same for all object's invocations. This is very
-important in testing and debugging. Further, one can find files {\tt
-monoms.h} and {\tt monoms.cpp}. See below for their explanation.
-
-There are a few types of tests:
-\orderedlist
-\li We test for tensor indices. We go through various tensors with
-various symmetries, convert indices from folded to unfolded and
-vice-versa. We test whether their coordinates are as expected.
-\li We test the Faa Di Bruno by comparison of the results of
-|FGSContainer::multAndAdd| against the results of |UGSContainer::multAndAdd|. The two
- implementations are pretty different, so this is a good test.
-\li We use a code in {\tt monoms.h} and {\tt monoms.cpp} to generate a
- random vector function $f(x(y,u))$ along with derivatives of
- $\left[f_x\right]$, $\left[x_{y^ku^l}\right]$, and
- $\left[f_{y^ku^l}\right]$. Then we calculate the resulting derivatives
- $\left[f_{y^ku^l}\right]$ using |multAndAdd| method of |UGSContainer|
- or |FGSContainer| and compare the derivatives provided by {\tt
- monoms}. The functions generated in {\tt monoms} are monomials with
- integer exponents, so the implementation of {\tt monoms} is quite
- easy.
-\li We do a similar thing for sparse tensors. In this case the {\tt monoms}
- generate a function $f(y,v(y,u),w(y,u))$, provide all the derivatives
- and the result $\left[f_{y^ku^l}\right]$. Then we calculate the
- derivatives with |multAndAdd| of |ZContainer| and compare.
-\li We test the polynomial evaluation by evaluating a folded and
- unfolded polynomial in traditional and horner-like fashion. This gives
- four methods in total. The four results are compared.
-\endorderedlist
-
-
-@*1 Utilities.
-@i sthread.hweb
-@i sthread.cweb
-@i tl_exception.hweb
-@i int_sequence.hweb
-@i int_sequence.cweb
-@i twod_matrix.hweb
-@i twod_matrix.cweb
-@i kron_prod.hweb
-@i kron_prod.cweb
-
-@*1 Combinatorics.
-@i symmetry.hweb
-@i symmetry.cweb
-@i equivalence.hweb
-@i equivalence.cweb
-@i permutation.hweb
-@i permutation.cweb
-
-@*1 Tensors.
-@i tensor.hweb
-@i tensor.cweb
-@i fs_tensor.hweb
-@i fs_tensor.cweb
-@i gs_tensor.hweb
-@i gs_tensor.cweb
-@i rfs_tensor.hweb
-@i rfs_tensor.cweb
-@i ps_tensor.hweb
-@i ps_tensor.cweb
-@i sparse_tensor.hweb
-@i sparse_tensor.cweb
-
-@*1 The Faa Di Bruno formula.
-@i t_container.hweb
-@i t_container.cweb
-@i stack_container.hweb
-@i stack_container.cweb
-@i fine_container.hweb
-@i fine_container.cweb
-@i pyramid_prod.hweb
-@i pyramid_prod.cweb
-@i pyramid_prod2.hweb
-@i pyramid_prod2.cweb
-
-@*1 Miscellany.
-@i t_polynomial.hweb
-@i t_polynomial.cweb
-@i normal_moments.hweb
-@i normal_moments.cweb
-@i tl_static.hweb
-@i tl_static.cweb
-
-@*1 Index.
--- a/dynare++/tl/cc/normal_moments.cc
+++ b/dynare++/tl/cc/normal_moments.cc
@ -0,0 +1,103 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "normal_moments.hh"
+#include "permutation.hh"
+#include "kron_prod.hh"
+#include "tl_static.hh"
+
+UNormalMoments::UNormalMoments(int maxdim, const TwoDMatrix &v)
+  : TensorContainer<URSingleTensor>(1)
+{
+  if (maxdim >= 2)
+    generateMoments(maxdim, v);
+}
+
+/* Here we fill up the container with the tensors for $d=2,4,6,\ldots$
+   up to the given dimension. Each tensor of moments is equal to
+   $F_n\left(\otimes^nv\right).$ This has a dimension equal to
+   $2n$. See the header file for proof and details.
+
+   Here we sequentially construct the Kronecker power
+   $\otimes^nv$, and apply $F_n$. */
+
+void
+UNormalMoments::generateMoments(int maxdim, const TwoDMatrix &v)
+{
+  TL_RAISE_IF(v.nrows() != v.ncols(),
+              "Variance-covariance matrix is not square in UNormalMoments constructor");
+
+  int nv = v.nrows();
+  URSingleTensor *mom2 = new URSingleTensor(nv, 2);
+  mom2->getData() = v.getData();
+  insert(mom2);
+  URSingleTensor *kronv = new URSingleTensor(nv, 2);
+  kronv->getData() = v.getData();
+  for (int d = 4; d <= maxdim; d += 2)
+    {
+      URSingleTensor *newkronv = new URSingleTensor(nv, d);
+      KronProd::kronMult(ConstVector(v.getData()),
+                         ConstVector(kronv->getData()),
+                         newkronv->getData());
+      delete kronv;
+      kronv = newkronv;
+      URSingleTensor *mom = new URSingleTensor(nv, d);
+      // apply $F_n$ to |kronv|
+      /* Here we go through all equivalences, select only those having 2
+         elements in each class, then go through all elements in |kronv| and
+         add to permuted location of |mom|.
+
+         The permutation must be taken as inverse of the permutation implied by
+         the equivalence, since we need a permutation which after application
+         to identity of indices yileds indices in the equivalence classes. Note
+         how the |Equivalence::apply| method works. */
+      mom->zeros();
+      const EquivalenceSet eset = ebundle.get(d);
+      for (EquivalenceSet::const_iterator cit = eset.begin();
+           cit != eset.end(); cit++)
+        {
+          if (selectEquiv(*cit))
+            {
+              Permutation per(*cit);
+              per.inverse();
+              for (Tensor::index it = kronv->begin(); it != kronv->end(); ++it)
+                {
+                  IntSequence ind(kronv->dimen());
+                  per.apply(it.getCoor(), ind);
+                  Tensor::index it2(mom, ind);
+                  mom->get(*it2, 0) += kronv->get(*it, 0);
+                }
+            }
+        }
+      insert(mom);
+    }
+  delete kronv;
+}
+
+/* We return |true| for an equivalence whose each class has 2 elements. */
+
+bool
+UNormalMoments::selectEquiv(const Equivalence &e)
+{
+  if (2*e.numClasses() != e.getN())
+    return false;
+  for (Equivalence::const_seqit si = e.begin();
+       si != e.end(); ++si)
+    {
+      if ((*si).length() != 2)
+        return false;
+    }
+  return true;
+}
+
+/* Here we go through all the unfolded container, fold each tensor and
+   insert it. */
+FNormalMoments::FNormalMoments(const UNormalMoments &moms)
+  : TensorContainer<FRSingleTensor>(1)
+{
+  for (UNormalMoments::const_iterator it = moms.begin();
+       it != moms.end(); ++it)
+    {
+      FRSingleTensor *fm = new FRSingleTensor(*((*it).second));
+      insert(fm);
+    }
+}
--- a/dynare++/tl/cc/normal_moments.cweb
+++ b/dynare++/tl/cc/normal_moments.cweb
@ -1,115 +0,0 @@
-@q $Id: normal_moments.cweb 281 2005-06-13 09:41:16Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt normal\_moments.cpp} file.
-
-@c
-#include "normal_moments.h"
-#include "permutation.h"
-#include "kron_prod.h"
-#include "tl_static.h"
-
-@<|UNormalMoments| constructor code@>;
-@<|UNormalMoments::generateMoments| code@>;
-@<|UNormalMoments::selectEquiv| code@>;
-@<|FNormalMoments| constructor code@>;
-
-@ 
-@<|UNormalMoments| constructor code@>=
-UNormalMoments::UNormalMoments(int maxdim, const TwoDMatrix& v)
-	: TensorContainer<URSingleTensor>(1)
-{
-	if (maxdim >= 2)
-	   	generateMoments(maxdim, v);
-}
-
-
-@ Here we fill up the container with the tensors for $d=2,4,6,\ldots$
-up to the given dimension. Each tensor of moments is equal to
-$F_n\left(\otimes^nv\right).$ This has a dimension equal to
-$2n$. See the header file for proof and details.
-
-Here we sequentially construct the Kronecker power
-$\otimes^nv$, and apply $F_n$. 
-
-@<|UNormalMoments::generateMoments| code@>=
-void UNormalMoments::generateMoments(int maxdim, const TwoDMatrix& v)
-{
-	TL_RAISE_IF(v.nrows() != v.ncols(),
-				"Variance-covariance matrix is not square in UNormalMoments constructor");
-
-	int nv = v.nrows();
-	URSingleTensor* mom2 = new URSingleTensor(nv, 2);
-	mom2->getData() = v.getData();
-	insert(mom2);
-	URSingleTensor* kronv = new URSingleTensor(nv, 2);
-	kronv->getData() = v.getData();
-	for (int d = 4; d <= maxdim; d+=2) {
-		URSingleTensor* newkronv = new URSingleTensor(nv, d);
-		KronProd::kronMult(ConstVector(v.getData()),
-						   ConstVector(kronv->getData()),
-						   newkronv->getData());
-		delete kronv;
-		kronv = newkronv;
-		URSingleTensor* mom = new URSingleTensor(nv, d);
-		@<apply $F_n$ to |kronv|@>;
-		insert(mom);
-	}
-	delete kronv;
-}
-
-@ Here we go through all equivalences, select only those having 2
-elements in each class, then go through all elements in |kronv| and
-add to permuted location of |mom|.
-
-The permutation must be taken as inverse of the permutation implied by
-the equivalence, since we need a permutation which after application
-to identity of indices yileds indices in the equivalence classes. Note
-how the |Equivalence::apply| method works.
-
-@<apply $F_n$ to |kronv|@>=
-	mom->zeros();
-	const EquivalenceSet eset = ebundle.get(d);
-	for (EquivalenceSet::const_iterator cit = eset.begin();
-		 cit != eset.end(); cit++) { 
-		if (selectEquiv(*cit)) {
-			Permutation per(*cit);
-			per.inverse();
-			for (Tensor::index it = kronv->begin(); it != kronv->end(); ++it) {
-				IntSequence ind(kronv->dimen());
-				per.apply(it.getCoor(), ind);
-				Tensor::index it2(mom, ind);
-				mom->get(*it2, 0) += kronv->get(*it, 0);
-			}
-		}
-	}
-
-@ We return |true| for an equivalence whose each class has 2 elements.
-@<|UNormalMoments::selectEquiv| code@>=
-bool UNormalMoments::selectEquiv(const Equivalence& e)
-{
-	if (2*e.numClasses() != e.getN())
-		return false;
-	for (Equivalence::const_seqit si = e.begin();
-		 si != e.end(); ++si) {
-		if ((*si).length() != 2)
-			return false;
-	}
-	return true;
-}
-
-@ Here we go through all the unfolded container, fold each tensor and
-insert it.
-@<|FNormalMoments| constructor code@>=
-FNormalMoments::FNormalMoments(const UNormalMoments& moms)
-	: TensorContainer<FRSingleTensor>(1)
-{
-	for (UNormalMoments::const_iterator it = moms.begin();
-		 it != moms.end(); ++it) {
-		FRSingleTensor* fm = new FRSingleTensor(*((*it).second));
-		insert(fm);
-	}
-}
-
-
-@ End of {\tt normal\_moments.cpp} file.
--- a/dynare++/tl/cc/normal_moments.hh
+++ b/dynare++/tl/cc/normal_moments.hh
@ -0,0 +1,129 @@
+// Copyright 2004, Ondra Kamenik
+
+// Moments of normal distribution.
+
+/* Here we calculate the higher order moments of normally distributed
+   random vector $u$ with means equal to zero and given
+   variance--covariance matrix $V$, this is $u\sim N(0,V)$. The moment
+   generating function for such distribution is $f(t)=e^{{1\over 2}t^TVt}$. If
+   we derivate it wrt $t$ and unfold the higher dimensional tensors
+   row-wise, we obtain terms like
+   $$\eqalign{
+   {\partial\over\partial t}f(t)=&f(t)\cdot Vt\cr
+   {\partial^2\over\partial t^2}f(t)=&f(t)\cdot(Vt\otimes Vt+v)\cr
+   {\partial^3\over\partial t^3}f(t)=&f(t)\cdot
+   (Vt\otimes Vt\otimes Vt+P_?(v\otimes Vt)+P_?(Vt\otimes v)+v\otimes Vt)\cr
+   {\partial^4\over\partial t^4}f(t)=&f(t)\cdot
+   (Vt\otimes Vt\otimes Vt\otimes Vt+S_?(v\otimes Vt\otimes Vt)+
+   S_?(Vt\otimes v\otimes Vt)+S_?(Vt\otimes Vt\otimes v)+S_?(v\otimes v))}
+   $$
+   where $v$ is vectorized $V$ ($v=\hbox{vec}(V)$), and $P_?$ is a
+   suitable row permutation (corresponds to permutation of
+   multidimensional indices) which permutes the tensor data, so that the
+   index of a variable being derived would be the last. This ensures that
+   all (permuted) tensors can be summed yielding a tensor whose indices
+   have some order (in here we chose the order that more recent
+   derivating variables are to the right). Finally, $S_?$ is a suitable
+   sum of various $P_?$.
+
+   We are interested in $S_?$ multiplying the Kronecker powers
+   $\otimes^nv$. The $S_?$ is a (possibly) multi-set of permutations of
+   even order. Note that we know a number of permutations in $S_?$. The
+   above formulas for $F(t)$ derivatives are valid also for monomial
+   $u$, and from literature we know that $2n$-th moment is ${(2n!)\over
+   n!2^n}\sigma^2$. So there are ${(2n!)\over n!2^n}$ permutations in
+   $S_?$.
+
+   In order to find the $S_?$ we need to define a couple of
+   things. First we define a sort of equivalence between the permutations
+   applicable to even number of indices. We write $P_1\equiv P_2$
+   whenever $P_1^{-1}\circ P_2$ permutes only whole pairs, or items
+   within pairs, but not indices across the pairs. For instance the
+   permutations $(0,1,2,3)$ and $(3,2,0,1)$ are equivalent, but
+   $(0,2,1,3)$ is not equivalent with the two. Clearly, the $\equiv$ is
+   an equivalence.
+
+   This allows to define a relation $\sqsubseteq$ between the permutation
+   multi-sets $S$, which is basically the subset relation $\subseteq$ but
+   with respect to the equivalence $\equiv$, more formally:
+   $$S_1\sqsubseteq S_2\quad\hbox{iff}\quad P\in S_1
+   \Rightarrow\exists Q\in S_2:P\equiv Q$$
+   This induces an equivalence $S_1\equiv S_2$.
+
+   Now let $F_n$ denote a set of permutations on $2n$ indices which is
+   maximal with respect to $\sqsubseteq$, and minimal with respect to
+   $\equiv$. (In other words, it contains everything up to the
+   equivalence $\equiv$.) It is straightforward to calculate a number of
+   permutations in $F_n$. This is a total number of all permutations of
+   $2n$ divided by permutations of pairs divided by permutations within
+   the pairs. This is ${(2n!)\over n!2^n}$.
+
+   We prove that $S_?\equiv F_n$. Clearly $S_?\sqsubseteq F_n$, since
+   $F_n$ is maximal. In order to prove that $F_n\sqsubseteq S_?$, let us
+   assert that for any permutation $P$ and for any (semi)positive
+   definite matrix $V$ we have $PS_?\otimes^nv=S_?\otimes^nv$. Below we
+   show that there is a positive definite matrix $V$ of some dimension
+   that for any two permutation multi-sets $S_1$, $S_2$, we have
+   $$S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$$
+   So it follows that for any permutation $P$, we have $PS_?\equiv
+   S_?$. For a purpose of contradiction let $P\in F_n$ be a permutation
+   which is not equivalent to any permutation from $S_?$. Since $S_?$ is
+   non-empty, let us pick $P_0\in S_?$. Now assert that
+   $P_0^{-1}S_?\not\equiv P^{-1}S_?$ since the first contains an identity
+   and the second does not contain a permutation equivalent to
+   identity. Thus we have $(P\circ P_0^{-1})S_?\not\equiv S_?$ which
+   gives the contradiction and we have proved that $F_n\sqsubseteq
+   S_?$. Thus $F_n\equiv S_?$. Moreover, we know that $S_?$ and $F_n$
+   have the same number of permutations, hence the minimality of $S_?$
+   with respect to $\equiv$.
+
+   Now it suffices to prove that there exists a positive definite $V$
+   such that for any two permutation multi-sets $S_1$, and $S_2$ holds
+   $S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$. If
+   $V$ is $n\times n$ matrix, then $S_1\not\equiv S_2$ implies that there
+   is identically nonzero polynomial of elements from $V$ of order $n$
+   over integers. If $V=A^TA$ then there is identically non-zero
+   polynomial of elements from $A$ of order $2n$. This means, that we
+   have to find $n(n+1)/2$ tuple $x$ of real numbers such that all
+   identically non-zero polynomials $p$ of order $2n$ over integers yield
+   $p(x)\neq 0$.
+
+   The $x$ is constructed as follows: $x_i = \pi^{\log{r_i}}$, where $r_i$
+   is $i$-th prime. Let us consider monom $x_1^{j_1}\cdot\ldots\cdot
+   x_k^{j_k}$. When the monom is evaluated, we get
+   $$\pi^{\log{r_1^{j_1}}+\ldots+\log{r_k^{j_k}}}=
+   \pi^{\log{\left(r_1^{j_1}\cdot\ldots\cdot r_k^{j_k}\right)}}$$
+   Now it is easy to see that if an integer combination of such terms is
+   zero, then the combination must be either trivial or sum to $0$ and
+   all monoms must be equal. Both cases imply a polynomial identically
+   equal to zero. So, any non-trivial integer polynomial evaluated at $x$
+   must be non-zero.
+
+   So, having this result in hand, now it is straightforward to calculate
+   higher moments of normal distribution. Here we define a container,
+   which does the job. In its constructor, we simply calculate Kronecker
+   powers of $v$ and apply $F_n$ to $\otimes^nv$. $F_n$ is, in fact, a
+   set of all equivalences in sense of class |Equivalence| over $2n$
+   elements, having $n$ classes each of them having exactly 2 elements. */
+
+#ifndef NORMAL_MOMENTS_H
+#define NORMAL_MOMENTS_H
+
+#include "t_container.hh"
+
+class UNormalMoments : public TensorContainer<URSingleTensor>
+{
+public:
+  UNormalMoments(int maxdim, const TwoDMatrix &v);
+private:
+  void generateMoments(int maxdim, const TwoDMatrix &v);
+  static bool selectEquiv(const Equivalence &e);
+};
+
+class FNormalMoments : public TensorContainer<FRSingleTensor>
+{
+public:
+  FNormalMoments(const UNormalMoments &moms);
+};
+
+#endif
--- a/dynare++/tl/cc/normal_moments.hweb
+++ b/dynare++/tl/cc/normal_moments.hweb
@ -1,139 +0,0 @@
-@q $Id: normal_moments.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Moments of normal distribution. Start of {\tt normal\_moments.h} file.
-
-Here we calculate the higher order moments of normally distributed
-random vector $u$ with means equal to zero and given
-variance--covariance matrix $V$, this is $u\sim N(0,V)$. The moment
-generating function for such distribution is $f(t)=e^{{1\over 2}t^TVt}$. If
-we derivate it wrt $t$ and unfold the higher dimensional tensors
-row-wise, we obtain terms like
-$$\eqalign{
-{\partial\over\partial t}f(t)=&f(t)\cdot Vt\cr
-{\partial^2\over\partial t^2}f(t)=&f(t)\cdot(Vt\otimes Vt+v)\cr
-{\partial^3\over\partial t^3}f(t)=&f(t)\cdot
-  (Vt\otimes Vt\otimes Vt+P_?(v\otimes Vt)+P_?(Vt\otimes v)+v\otimes Vt)\cr
-{\partial^4\over\partial t^4}f(t)=&f(t)\cdot
-  (Vt\otimes Vt\otimes Vt\otimes Vt+S_?(v\otimes Vt\otimes Vt)+
-   S_?(Vt\otimes v\otimes Vt)+S_?(Vt\otimes Vt\otimes v)+S_?(v\otimes v))}
-$$
-where $v$ is vectorized $V$ ($v=\hbox{vec}(V)$), and $P_?$ is a
-suitable row permutation (corresponds to permutation of
-multidimensional indices) which permutes the tensor data, so that the
-index of a variable being derived would be the last. This ensures that
-all (permuted) tensors can be summed yielding a tensor whose indices
-have some order (in here we chose the order that more recent
-derivating variables are to the right). Finally, $S_?$ is a suitable
-sum of various $P_?$.
-
-We are interested in $S_?$ multiplying the Kronecker powers
-$\otimes^nv$. The $S_?$ is a (possibly) multi-set of permutations of
-even order. Note that we know a number of permutations in $S_?$. The
-above formulas for $F(t)$ derivatives are valid also for monomial
-$u$, and from literature we know that $2n$-th moment is ${(2n!)\over
-n!2^n}\sigma^2$. So there are ${(2n!)\over n!2^n}$ permutations in
-$S_?$.
-
-In order to find the $S_?$ we need to define a couple of
-things. First we define a sort of equivalence between the permutations
-applicable to even number of indices. We write $P_1\equiv P_2$
-whenever $P_1^{-1}\circ P_2$ permutes only whole pairs, or items
-within pairs, but not indices across the pairs. For instance the
-permutations $(0,1,2,3)$ and $(3,2,0,1)$ are equivalent, but
-$(0,2,1,3)$ is not equivalent with the two. Clearly, the $\equiv$ is
-an equivalence.
-
-This allows to define a relation $\sqsubseteq$ between the permutation
-multi-sets $S$, which is basically the subset relation $\subseteq$ but
-with respect to the equivalence $\equiv$, more formally:
-$$S_1\sqsubseteq S_2\quad\hbox{iff}\quad P\in S_1
-\Rightarrow\exists Q\in S_2:P\equiv Q$$
-This induces an equivalence $S_1\equiv S_2$.
-
-Now let $F_n$ denote a set of permutations on $2n$ indices which is
-maximal with respect to $\sqsubseteq$, and minimal with respect to
-$\equiv$. (In other words, it contains everything up to the
-equivalence $\equiv$.) It is straightforward to calculate a number of
-permutations in $F_n$. This is a total number of all permutations of
-$2n$ divided by permutations of pairs divided by permutations within
-the pairs. This is ${(2n!)\over n!2^n}$.
-
-We prove that $S_?\equiv F_n$. Clearly $S_?\sqsubseteq F_n$, since
-$F_n$ is maximal. In order to prove that $F_n\sqsubseteq S_?$, let us
-assert that for any permutation $P$ and for any (semi)positive
-definite matrix $V$ we have $PS_?\otimes^nv=S_?\otimes^nv$. Below we
-show that there is a positive definite matrix $V$ of some dimension
-that for any two permutation multi-sets $S_1$, $S_2$, we have
-$$S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$$
-So it follows that for any permutation $P$, we have $PS_?\equiv
-S_?$. For a purpose of contradiction let $P\in F_n$ be a permutation
-which is not equivalent to any permutation from $S_?$. Since $S_?$ is
-non-empty, let us pick $P_0\in S_?$. Now assert that
-$P_0^{-1}S_?\not\equiv P^{-1}S_?$ since the first contains an identity
-and the second does not contain a permutation equivalent to
-identity. Thus we have $(P\circ P_0^{-1})S_?\not\equiv S_?$ which
-gives the contradiction and we have proved that $F_n\sqsubseteq
-S_?$. Thus $F_n\equiv S_?$. Moreover, we know that $S_?$ and $F_n$
-have the same number of permutations, hence the minimality of $S_?$
-with respect to $\equiv$.
-
-Now it suffices to prove that there exists a positive definite $V$
-such that for any two permutation multi-sets $S_1$, and $S_2$ holds
-$S_1\not\equiv S_2\Rightarrow S_1(\otimes^nv)\neq S_2(\otimes^nv)$. If
-$V$ is $n\times n$ matrix, then $S_1\not\equiv S_2$ implies that there
-is identically nonzero polynomial of elements from $V$ of order $n$
-over integers. If $V=A^TA$ then there is identically non-zero
-polynomial of elements from $A$ of order $2n$. This means, that we
-have to find $n(n+1)/2$ tuple $x$ of real numbers such that all
-identically non-zero polynomials $p$ of order $2n$ over integers yield
-$p(x)\neq 0$.
-
-The $x$ is constructed as follows: $x_i = \pi^{\log{r_i}}$, where $r_i$
-is $i$-th prime. Let us consider monom $x_1^{j_1}\cdot\ldots\cdot
-x_k^{j_k}$. When the monom is evaluated, we get
-$$\pi^{\log{r_1^{j_1}}+\ldots+\log{r_k^{j_k}}}=
-\pi^{\log{\left(r_1^{j_1}\cdot\ldots\cdot r_k^{j_k}\right)}}$$
-Now it is easy to see that if an integer combination of such terms is
-zero, then the combination must be either trivial or sum to $0$ and
-all monoms must be equal. Both cases imply a polynomial identically
-equal to zero. So, any non-trivial integer polynomial evaluated at $x$
-must be non-zero.
-
-So, having this result in hand, now it is straightforward to calculate
-higher moments of normal distribution. Here we define a container,
-which does the job. In its constructor, we simply calculate Kronecker
-powers of $v$ and apply $F_n$ to $\otimes^nv$. $F_n$ is, in fact, a
-set of all equivalences in sense of class |Equivalence| over $2n$
-elements, having $n$ classes each of them having exactly 2 elements.
-
-@c
-#ifndef NORMAL_MOMENTS_H
-#define NORMAL_MOMENTS_H
-
-#include "t_container.h"
-
-@<|UNormalMoments| class declaration@>;
-@<|FNormalMoments| class declaration@>;
-
-#endif
-
-@ 
-@<|UNormalMoments| class declaration@>=
-class UNormalMoments : public TensorContainer<URSingleTensor> {
-public:@;
-	UNormalMoments(int maxdim, const TwoDMatrix& v);
-private:@;
-	void generateMoments(int maxdim, const TwoDMatrix& v);
-	static bool selectEquiv( const Equivalence& e);
-};
-
-@ 
-@<|FNormalMoments| class declaration@>=
-class FNormalMoments : public TensorContainer<FRSingleTensor> {
-public:@;
-	FNormalMoments(const UNormalMoments& moms);
-};
-
-
-@ End of {\tt normal\_moments.h} file.
--- a/dynare++/tl/cc/permutation.cc
+++ b/dynare++/tl/cc/permutation.cc
@ -0,0 +1,163 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "permutation.hh"
+#include "tl_exception.hh"
+
+/* This is easy, we simply apply the map in the fashion $s\circ m$.. */
+
+void
+Permutation::apply(const IntSequence &src, IntSequence &tar) const
+{
+  TL_RAISE_IF(src.size() != permap.size() || tar.size() != permap.size(),
+              "Wrong sizes of input or output in Permutation::apply");
+  for (int i = 0; i < permap.size(); i++)
+    tar[i] = src[permap[i]];
+}
+
+void
+Permutation::apply(IntSequence &tar) const
+{
+  IntSequence tmp(tar);
+  apply(tmp, tar);
+}
+
+void
+Permutation::inverse()
+{
+  IntSequence former(permap);
+  for (int i = 0; i < size(); i++)
+    permap[former[i]] = i;
+}
+
+/* Here we find a number of trailing indices which are identical with
+   the permutation. */
+
+int
+Permutation::tailIdentity() const
+{
+  int i = permap.size();
+  while (i > 0 && permap[i-1] == i-1)
+    i--;
+  return permap.size() - i;
+}
+
+/* This calculates a map which corresponds to sorting in the following
+   sense: $(\hbox{sorted }s)\circ m = s$, where $s$ is a given sequence.
+
+   We go through |s| and find an the same item in sorted |s|. We
+   construct the |permap| from the found pair of indices. We have to be
+   careful, to not assign to two positions in |s| the same position in
+   sorted |s|, so we maintain a bitmap |flag|, in which we remember
+   indices from the sorted |s| already assigned. */
+
+void
+Permutation::computeSortingMap(const IntSequence &s)
+{
+  IntSequence srt(s);
+  srt.sort();
+  IntSequence flags(s.size(), 0);
+
+  for (int i = 0; i < s.size(); i++)
+    {
+      int j = 0;
+      while (j < s.size() && (flags[j] || srt[j] != s[i]))
+        j++;
+      TL_RAISE_IF(j == s.size(),
+                  "Internal algorithm error in Permutation::computeSortingMap");
+      flags[j] = 1;
+      permap[i] = j;
+    }
+}
+
+PermutationSet::PermutationSet()
+  : order(1), size(1), pers(new const Permutation *[size])
+{
+  pers[0] = new Permutation(1);
+}
+
+PermutationSet::PermutationSet(const PermutationSet &sp, int n)
+  : order(n), size(n*sp.size),
+    pers(new const Permutation *[size])
+{
+  for (int i = 0; i < size; i++)
+    pers[i] = NULL;
+
+  TL_RAISE_IF(n != sp.order+1,
+              "Wrong new order in PermutationSet constructor");
+
+  int k = 0;
+  for (int i = 0; i < sp.size; i++)
+    {
+      for (int j = 0; j < order; j++, k++)
+        {
+          pers[k] = new Permutation(*(sp.pers[i]), j);
+        }
+    }
+}
+
+PermutationSet::~PermutationSet()
+{
+  for (int i = 0; i < size; i++)
+    if (pers[i])
+      delete pers[i];
+  delete [] pers;
+}
+
+vector<const Permutation *>
+PermutationSet::getPreserving(const IntSequence &s) const
+{
+  TL_RAISE_IF(s.size() != order,
+              "Wrong sequence length in PermutationSet::getPreserving");
+
+  vector<const Permutation *> res;
+  IntSequence tmp(s.size());
+  for (int i = 0; i < size; i++)
+    {
+      pers[i]->apply(s, tmp);
+      if (s == tmp)
+        {
+          res.push_back(pers[i]);
+        }
+    }
+
+  return res;
+}
+
+PermutationBundle::PermutationBundle(int nmax)
+{
+  nmax = max(nmax, 1);
+  generateUpTo(nmax);
+}
+
+PermutationBundle::~PermutationBundle()
+{
+  for (unsigned int i = 0; i < bundle.size(); i++)
+    delete bundle[i];
+}
+
+const PermutationSet &
+PermutationBundle::get(int n) const
+{
+  if (n > (int) (bundle.size()) || n < 1)
+    {
+      TL_RAISE("Permutation set not found in PermutationSet::get");
+      return *(bundle[0]);
+    }
+  else
+    {
+      return *(bundle[n-1]);
+    }
+}
+
+void
+PermutationBundle::generateUpTo(int nmax)
+{
+  if (bundle.size() == 0)
+    bundle.push_back(new PermutationSet());
+
+  int curmax = bundle.size();
+  for (int n = curmax+1; n <= nmax; n++)
+    {
+      bundle.push_back(new PermutationSet(*(bundle.back()), n));
+    }
+}
--- a/dynare++/tl/cc/permutation.cweb
+++ b/dynare++/tl/cc/permutation.cweb
@ -1,188 +0,0 @@
-@q $Id: permutation.cweb 332 2005-07-15 13:41:48Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt permutation.cweb} file.
-@c
-
-#include "permutation.h"
-#include "tl_exception.h"
-
-@<|Permutation::apply| code@>;
-@<|Permutation::inverse| code@>;
-@<|Permutation::tailIdentity| code@>;
-@<|Permutation::computeSortingMap| code@>;
-@<|PermutationSet| constructor code 1@>;
-@<|PermutationSet| constructor code 2@>;
-@<|PermutationSet| destructor code@>;
-@<|PermutationSet::getPreserving| code@>;
-@<|PermutationBundle| constructor code@>;
-@<|PermutationBundle| destructor code@>;
-@<|PermutationBundle::get| code@>;
-@<|PermutationBundle::generateUpTo| code@>;
-
-
-@ This is easy, we simply apply the map in the fashion $s\circ m$..
-@<|Permutation::apply| code@>=
-void Permutation::apply(const IntSequence& src, IntSequence& tar) const
-{
-	TL_RAISE_IF(src.size() != permap.size() || tar.size() != permap.size(),
-				"Wrong sizes of input or output in Permutation::apply");
-	for (int i = 0; i < permap.size(); i++)
-		tar[i] = src[permap[i]];
-}
-
-
-void Permutation::apply(IntSequence& tar) const
-{
-	IntSequence tmp(tar);
-	apply(tmp, tar);
-}
-
-@ 
-@<|Permutation::inverse| code@>=
-void Permutation::inverse()
-{
-	IntSequence former(permap);
-	for (int i = 0; i < size(); i++)
-		permap[former[i]] = i;
-}
-
-
-@ Here we find a number of trailing indices which are identical with
-the permutation.
-
-@<|Permutation::tailIdentity| code@>=
-int Permutation::tailIdentity() const
-{
-	int i = permap.size();
-	while (i > 0 && permap[i-1] == i-1)
-		i--;
-	return permap.size() - i;
-}
-
-@ This calculates a map which corresponds to sorting in the following
-sense: $(\hbox{sorted }s)\circ m = s$, where $s$ is a given sequence.
-
-We go through |s| and find an the same item in sorted |s|. We
-construct the |permap| from the found pair of indices. We have to be
-careful, to not assign to two positions in |s| the same position in
-sorted |s|, so we maintain a bitmap |flag|, in which we remember
-indices from the sorted |s| already assigned.
-
-@<|Permutation::computeSortingMap| code@>=
-void Permutation::computeSortingMap(const IntSequence& s)
-{
-	IntSequence srt(s);
-	srt.sort();
-	IntSequence flags(s.size(),0);
-
-	for (int i = 0; i < s.size(); i++) {
-		int j = 0;
-		while (j < s.size() && (flags[j] || srt[j] != s[i]))
-			j++;
-		TL_RAISE_IF(j == s.size(),
-					"Internal algorithm error in Permutation::computeSortingMap");
-		flags[j] = 1;
-		permap[i] = j;
-	}
-}
-
-@ 
-@<|PermutationSet| constructor code 1@>=
-PermutationSet::PermutationSet()
-	: order(1), size(1), pers(new const Permutation*[size])
-{
-	pers[0] = new Permutation(1);
-}
-
-@ 
-@<|PermutationSet| constructor code 2@>=
-PermutationSet::PermutationSet(const PermutationSet& sp, int n)
-	: order(n), size(n*sp.size),
-	  pers(new const Permutation*[size])
-{
-	for (int i = 0; i < size; i++)
-		pers[i] = NULL;
-
-	TL_RAISE_IF(n != sp.order+1,
-				"Wrong new order in PermutationSet constructor");
-
-	int k = 0;
-	for (int i = 0; i < sp.size; i++) {
-		for (int j = 0;	 j < order; j++,k++) {
-			pers[k] = new Permutation(*(sp.pers[i]), j);
-		}
-	}
-}
-
-@ 
-@<|PermutationSet| destructor code@>=
-PermutationSet::~PermutationSet()
-{
-	for (int i = 0; i < size; i++)
-		if (pers[i])
-			delete pers[i];
-	delete [] pers;
-}
-
-@ 
-@<|PermutationSet::getPreserving| code@>=
-vector<const Permutation*> PermutationSet::getPreserving(const IntSequence& s) const
-{
-	TL_RAISE_IF(s.size() != order,
-				"Wrong sequence length in PermutationSet::getPreserving");
-
-	vector<const Permutation*> res;
-	IntSequence tmp(s.size());
-	for (int i = 0; i < size; i++) {
-		pers[i]->apply(s, tmp);
-		if (s == tmp) {
-			res.push_back(pers[i]);
-		}
-	}
-
-	return res;
-}
-
-@ 
-@<|PermutationBundle| constructor code@>=
-PermutationBundle::PermutationBundle(int nmax)
-{
-	nmax = max(nmax, 1);
-	generateUpTo(nmax);
-}
-
-@ 
-@<|PermutationBundle| destructor code@>=
-PermutationBundle::~PermutationBundle()
-{
-	for (unsigned int i = 0; i < bundle.size(); i++)
-		delete bundle[i];
-}
-
-@ 
-@<|PermutationBundle::get| code@>=
-const PermutationSet& PermutationBundle::get(int n) const
-{
-	if (n > (int)(bundle.size()) || n < 1) {
-		TL_RAISE("Permutation set not found in PermutationSet::get");
-		return *(bundle[0]);
-	} else {
-		return *(bundle[n-1]);
-	}
-}
-
-@ 
-@<|PermutationBundle::generateUpTo| code@>=
-void PermutationBundle::generateUpTo(int nmax)
-{
-	if (bundle.size() == 0)
-		bundle.push_back(new PermutationSet());
-
-	int curmax = bundle.size();
-	for (int n = curmax+1; n <= nmax; n++) {
-		bundle.push_back(new PermutationSet(*(bundle.back()), n));
-	}
-}
-
-@ End of {\tt permutation.cweb} file.
--- a/dynare++/tl/cc/permutation.hh
+++ b/dynare++/tl/cc/permutation.hh
@ -0,0 +1,177 @@
+// Copyright 2004, Ondra Kamenik
+
+// Permutations.
+
+/* The permutation class is useful when describing a permutation of
+   indices in permuted symmetry tensor. This tensor comes to existence,
+   for instance, as a result of the following tensor multiplication:
+   $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
+   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
+   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
+   \left[g_u\right]^{\gamma_3}_{\beta_2}
+   $$
+   If this operation is done by a Kronecker product of unfolded tensors,
+   the resulting tensor has permuted indices. So, in this case the
+   permutation is implied by the equivalence:
+   $\{\{0,4\},\{1,3\},\{2\}\}$. This results in a permutation which maps
+   indices $(0,1,2,3,4)\mapsto(0,2,4,3,1)$.
+
+   The other application of |Permutation| class is to permute indices
+   with the same permutation as done during sorting.
+
+   Here we only define an abstraction for the permutation defined by an
+   equivalence. Its basic operation is to apply the permutation to the
+   integer sequence. The application is right (or inner), in sense that
+   it works on indices of the sequence not items of the sequence. More
+   formally $s\circ m \not=m\circ s$. In here, the application of the
+   permutation defined by map $m$ is $s\circ m$.
+
+   Also, we need |PermutationSet| class which contains all permutations
+   of $n$ element set, and a bundle of permutations |PermutationBundle|
+   which contains all permutation sets up to a given number. */
+
+#ifndef PERMUTATION_H
+#define PERMUTATION_H
+
+#include "int_sequence.hh"
+#include "equivalence.hh"
+
+#include <vector>
+
+/* The permutation object will have a map, which defines mapping of
+   indices $(0,1,\ldots,n-1)\mapsto(m_0,m_1,\ldots, m_{n-1})$. The map is
+   the sequence $(m_0,m_1,\ldots, m_{n-1}$. When the permutation with the
+   map $m$ is applied on sequence $s$, it permutes its indices:
+   $s\circ\hbox{id}\mapsto s\circ m$.
+
+   So we have one constructor from equivalence, then a method |apply|,
+   and finally a method |tailIdentity| which returns a number of trailing
+   indices which yield identity. Also we have a constructor calculating
+   map, which corresponds to permutation in sort. This is, we want
+   $(\hbox{sorted }s)\circ m = s$. */
+
+class Permutation
+{
+protected:
+  IntSequence permap;
+public:
+  Permutation(int len)
+    : permap(len)
+  {
+    for (int i = 0; i < len; i++)
+      permap[i] = i;
+  }
+  Permutation(const Equivalence &e)
+    : permap(e.getN())
+  {
+    e.trace(permap);
+  }
+  Permutation(const Equivalence &e, const Permutation &per)
+    : permap(e.getN())
+  {
+    e.trace(permap, per);
+  }
+  Permutation(const IntSequence &s)
+    : permap(s.size())
+  {
+    computeSortingMap(s);
+  };
+  Permutation(const Permutation &p)
+    : permap(p.permap)
+  {
+  }
+  Permutation(const Permutation &p1, const Permutation &p2)
+    : permap(p2.permap)
+  {
+    p1.apply(permap);
+  }
+  Permutation(const Permutation &p, int i)
+    : permap(p.size(), p.permap, i)
+  {
+  }
+  const Permutation &
+  operator=(const Permutation &p)
+  {
+    permap = p.permap; return *this;
+  }
+  bool
+  operator==(const Permutation &p)
+  {
+    return permap == p.permap;
+  }
+  int
+  size() const
+  {
+    return permap.size();
+  }
+  void
+  print() const
+  {
+    permap.print();
+  }
+  void apply(const IntSequence &src, IntSequence &tar) const;
+  void apply(IntSequence &tar) const;
+  void inverse();
+  int tailIdentity() const;
+  const IntSequence &
+  getMap() const
+  {
+    return permap;
+  }
+  IntSequence &
+  getMap()
+  {
+    return permap;
+  }
+protected:
+  void computeSortingMap(const IntSequence &s);
+};
+
+/* The |PermutationSet| maintains an array of of all permutations. The
+   default constructor constructs one element permutation set of one
+   element sets. The second constructor constructs a new permutation set
+   over $n$ from all permutations over $n-1$. The parameter $n$ need not
+   to be provided, but it serves to distinguish the constructor from copy
+   constructor, which is not provided.
+
+   The method |getPreserving| returns a factor subgroup of permutations,
+   which are invariants with respect to the given sequence. This are all
+   permutations $p$ yielding $p\circ s = s$, where $s$ is the given
+   sequence. */
+
+class PermutationSet
+{
+  int order;
+  int size;
+  const Permutation **const pers;
+public:
+  PermutationSet();
+  PermutationSet(const PermutationSet &ps, int n);
+  ~PermutationSet();
+  int
+  getNum() const
+  {
+    return size;
+  }
+  const Permutation &
+  get(int i) const
+  {
+    return *(pers[i]);
+  }
+  vector<const Permutation *> getPreserving(const IntSequence &s) const;
+};
+
+/* The permutation bundle encapsulates all permutations sets up to some
+   given dimension. */
+
+class PermutationBundle
+{
+  vector<PermutationSet *> bundle;
+public:
+  PermutationBundle(int nmax);
+  ~PermutationBundle();
+  const PermutationSet&get(int n) const;
+  void generateUpTo(int nmax);
+};
+
+#endif
--- a/dynare++/tl/cc/permutation.hweb
+++ b/dynare++/tl/cc/permutation.hweb
@ -1,147 +0,0 @@
-@q $Id: permutation.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Permutations. Start of {\tt permutation.h} file.
-
-The permutation class is useful when describing a permutation of
-indices in permuted symmetry tensor. This tensor comes to existence,
-for instance, as a result of the following tensor multiplication:
-$$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
-\left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
-\left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
-\left[g_u\right]^{\gamma_3}_{\beta_2}
-$$
-If this operation is done by a Kronecker product of unfolded tensors,
-the resulting tensor has permuted indices. So, in this case the
-permutation is implied by the equivalence:
-$\{\{0,4\},\{1,3\},\{2\}\}$. This results in a permutation which maps
-indices $(0,1,2,3,4)\mapsto(0,2,4,3,1)$.
-
-The other application of |Permutation| class is to permute indices
-with the same permutation as done during sorting.
-
-Here we only define an abstraction for the permutation defined by an
-equivalence. Its basic operation is to apply the permutation to the
-integer sequence. The application is right (or inner), in sense that
-it works on indices of the sequence not items of the sequence. More
-formally $s\circ m \not=m\circ s$. In here, the application of the
-permutation defined by map $m$ is $s\circ m$.
-
-Also, we need |PermutationSet| class which contains all permutations
-of $n$ element set, and a bundle of permutations |PermutationBundle|
-which contains all permutation sets up to a given number.
-
-@s Permutation int
-@s PermutationSet int
-@s PermutationBundle int
-
-@c
-#ifndef PERMUTATION_H
-#define PERMUTATION_H
-
-#include "int_sequence.h"
-#include "equivalence.h"
-
-#include <vector>
-
-@<|Permutation| class declaration@>;
-@<|PermutationSet| class declaration@>;
-@<|PermutationBundle| class declaration@>;
-
-#endif
-
-@ The permutation object will have a map, which defines mapping of
-indices $(0,1,\ldots,n-1)\mapsto(m_0,m_1,\ldots, m_{n-1})$. The map is
-the sequence $(m_0,m_1,\ldots, m_{n-1}$. When the permutation with the
-map $m$ is applied on sequence $s$, it permutes its indices:
-$s\circ\hbox{id}\mapsto s\circ m$.
-
-So we have one constructor from equivalence, then a method |apply|,
-and finally a method |tailIdentity| which returns a number of trailing
-indices which yield identity. Also we have a constructor calculating
-map, which corresponds to permutation in sort. This is, we want
-$(\hbox{sorted }s)\circ m = s$.
-
-@<|Permutation| class declaration@>=
-class Permutation {
-protected:@;
-	IntSequence permap;
-public:@;
-	Permutation(int len)
-		: permap(len) {@+ for (int i = 0; i < len; i++) permap[i] = i;@+}
-	Permutation(const Equivalence& e)
-		: permap(e.getN()) {@+ e.trace(permap);@+}
-	Permutation(const Equivalence& e, const Permutation& per)
-		: permap(e.getN()) {@+ e.trace(permap, per);@+}
-	Permutation(const IntSequence& s)
-		: permap(s.size()) {@+ computeSortingMap(s);@+};
-	Permutation(const Permutation& p)
-		: permap(p.permap)@+ {}
-	Permutation(const Permutation& p1, const Permutation& p2)
-		: permap(p2.permap) {@+ p1.apply(permap);@+}
-	Permutation(const Permutation& p, int i)
-		: permap(p.size(), p.permap, i)@+ {}
-	const Permutation& operator=(const Permutation& p)
-		{@+ permap = p.permap;@+ return *this;@+}
-	bool operator==(const Permutation& p)
-		{@+ return permap == p.permap;@+}
-	int size() const
-		{@+ return permap.size();@+}
-	void print() const
-		{@+ permap.print();@+}
-	void apply(const IntSequence& src, IntSequence& tar) const;
-	void apply(IntSequence& tar) const;
-	void inverse();
-	int tailIdentity() const;
-	const IntSequence& getMap() const
-		{@+ return permap;@+}
-	IntSequence& getMap()
-		{@+ return permap;@+}
-protected:@;
-	void computeSortingMap(const IntSequence& s);
-};
-
-
-@ The |PermutationSet| maintains an array of of all permutations. The
-default constructor constructs one element permutation set of one
-element sets. The second constructor constructs a new permutation set
-over $n$ from all permutations over $n-1$. The parameter $n$ need not
-to be provided, but it serves to distinguish the constructor from copy
-constructor, which is not provided.
-
-The method |getPreserving| returns a factor subgroup of permutations,
-which are invariants with respect to the given sequence. This are all
-permutations $p$ yielding $p\circ s = s$, where $s$ is the given
-sequence.
-
-@<|PermutationSet| class declaration@>=
-class PermutationSet {
-	int order;
-	int size;
-	const Permutation** const pers;
-public:@;
-	PermutationSet();
-	PermutationSet(const PermutationSet& ps, int n);
-	~PermutationSet();
-	int getNum() const
-		{@+ return size;@+}
-	const Permutation& get(int i) const
-		{@+ return *(pers[i]);@+}
-	vector<const Permutation*> getPreserving(const IntSequence& s) const;
-};
-
-
-@ The permutation bundle encapsulates all permutations sets up to some
-given dimension.
-
-@<|PermutationBundle| class declaration@>=
-class PermutationBundle {
-	vector<PermutationSet*> bundle;
-public:@;
-	PermutationBundle(int nmax);
-	~PermutationBundle(); 
-	const PermutationSet& get(int n) const;
-	void generateUpTo(int nmax);
-};
-
-@ End of {\tt permutation.h} file.
--- a/dynare++/tl/cc/ps_tensor.cc
+++ b/dynare++/tl/cc/ps_tensor.cc
@ -0,0 +1,399 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "ps_tensor.hh"
+#include "fs_tensor.hh"
+#include "tl_exception.hh"
+#include "tl_static.hh"
+#include "stack_container.hh"
+
+/* Here we decide, what method for filling a slice in slicing
+   constructor to use. A few experiments suggest, that if the tensor is
+   more than 8\% filled, the first method (|fillFromSparseOne|) is
+   better. For fill factors less than 1\%, the second can be 3 times
+   quicker. */
+
+UPSTensor::fill_method
+UPSTensor::decideFillMethod(const FSSparseTensor &t)
+{
+  if (t.getFillFactor() > 0.08)
+    return first;
+  else
+    return second;
+}
+
+/* Here we make a slice. We decide what fill method to use and set it. */
+
+UPSTensor::UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
+                     const IntSequence &coor, const PerTensorDimens &ptd)
+  : UTensor(along_col, ptd.getNVX(),
+            t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
+    tdims(ptd)
+{
+  TL_RAISE_IF(coor.size() != t.dimen(),
+              "Wrong coordinates length of stacks for UPSTensor slicing constructor");
+  TL_RAISE_IF(ss.sum() != t.nvar(),
+              "Wrong length of stacks for UPSTensor slicing constructor");
+
+  if (first == decideFillMethod(t))
+    fillFromSparseOne(t, ss, coor);
+  else
+    fillFromSparseTwo(t, ss, coor);
+}
+
+void
+UPSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UPSTensor::increment");
+
+  UTensor::increment(v, tdims.getNVX());
+}
+
+void
+UPSTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in UPSTensor::decrement");
+
+  UTensor::decrement(v, tdims.getNVX());
+}
+
+FTensor &
+UPSTensor::fold() const
+{
+  TL_RAISE("Never should come to this place in UPSTensor::fold");
+  FFSTensor *nothing = new FFSTensor(0, 0, 0);
+  return *nothing;
+}
+
+int
+UPSTensor::getOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in UPSTensor::getOffset");
+
+  return UTensor::getOffset(v, tdims.getNVX());
+}
+
+void
+UPSTensor::addTo(FGSTensor &out) const
+{
+  TL_RAISE_IF(out.getDims() != tdims,
+              "Tensors have incompatible dimens in UPSTensor::addTo");
+  for (index in = out.begin(); in != out.end(); ++in)
+    {
+      IntSequence vtmp(dimen());
+      tdims.getPer().apply(in.getCoor(), vtmp);
+      index tin(this, vtmp);
+      out.addColumn(*this, *tin, *in);
+    }
+}
+
+/* In here, we have to add this permuted symmetry unfolded tensor to an
+   unfolded not permuted tensor. One easy way would be to go through the
+   target tensor, permute each index, and add the column.
+
+   However, it may happen, that the permutation has some non-empty
+   identity tail. In this case, we can add not only individual columns,
+   but much bigger data chunks, which is usually more
+   efficient. Therefore, the code is quite dirty, because we have not an
+   iterator, which iterates over tensor at some higher levels. So we
+   simulate it by the following code.
+
+   First we set |cols| to the length of the data chunk and |off| to its
+   dimension. Then we need a front part of |nvmax| of |out|, which is
+   |nvmax_part|. Our iterator here is an integer sequence |outrun| with
+   full length, and |outrun_part| its front part. The |outrun| is
+   initialized to zeros. In each step we need to increment |outrun|
+   |cols|-times, this is done by incrementing its prefix |outrun_part|.
+
+   So we loop over all |cols|wide partitions of |out|, permute |outrun|
+   to obtain |perrun| to obtain column of this matrix. (note that the
+   trailing part of |perrun| is the same as of |outrun|. Then we
+   construct submatrices, add them, and increment |outrun|. */
+
+void
+UPSTensor::addTo(UGSTensor &out) const
+{
+  TL_RAISE_IF(out.getDims() != tdims,
+              "Tensors have incompatible dimens in UPSTensor::addTo");
+  int cols = tailIdentitySize();
+  int off = tdims.tailIdentity();
+  IntSequence outrun(out.dimen(), 0);
+  IntSequence outrun_part(outrun, 0, out.dimen()-off);
+  IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
+  for (int out_col = 0; out_col < out.ncols(); out_col += cols)
+    {
+      // permute |outrun|
+      IntSequence perrun(out.dimen());
+      tdims.getPer().apply(outrun, perrun);
+      index from(this, perrun);
+      // construct submatrices
+      ConstTwoDMatrix subfrom(*this, *from, cols);
+      TwoDMatrix subout(out, out_col, cols);
+      // add
+      subout.add(1, subfrom);
+      // increment |outrun| by cols
+      UTensor::increment(outrun_part, nvmax_part);
+    }
+}
+
+/* This returns a product of all items in |nvmax| which make up the
+   trailing identity part. */
+
+int
+UPSTensor::tailIdentitySize() const
+{
+  return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
+}
+
+/* This fill method is pretty dumb. We go through all columns in |this|
+   tensor, translate coordinates to sparse tensor, sort them and find an
+   item in the sparse tensor. There are many not successful lookups for
+   really sparse tensor, that is why the second method works better for
+   really sparse tensors. */
+
+void
+UPSTensor::fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
+                             const IntSequence &coor)
+{
+  IntSequence cumtmp(ss.size());
+  cumtmp[0] = 0;
+  for (int i = 1; i < ss.size(); i++)
+    cumtmp[i] = cumtmp[i-1] + ss[i-1];
+  IntSequence cum(coor.size());
+  for (int i = 0; i < coor.size(); i++)
+    cum[i] = cumtmp[coor[i]];
+
+  zeros();
+  for (Tensor::index run = begin(); run != end(); ++run)
+    {
+      IntSequence c(run.getCoor());
+      c.add(1, cum);
+      c.sort();
+      FSSparseTensor::const_iterator sl = t.getMap().lower_bound(c);
+      if (sl != t.getMap().end())
+        {
+          FSSparseTensor::const_iterator su = t.getMap().upper_bound(c);
+          for (FSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
+            get((*srun).second.first, *run) = (*srun).second.second;
+        }
+    }
+}
+
+/* This is the second way of filling the slice. For instance, let the
+   slice correspond to partitions $abac$. In here we first calculate
+   lower and upper bounds for index of the sparse tensor for the
+   slice. These are |lb_srt| and |ub_srt| respectively. They corresponds
+   to ordering $aabc$. Then we go through that interval, and select items
+   which are really between the bounds.  Then we take the index, subtract
+   the lower bound to get it to coordinates of the slice. We get
+   something like $(i_a,j_a,k_b,l_c)$. Then we apply the inverse
+   permutation as of the sorting form $abac\mapsto aabc$ to get index
+   $(i_a,k_b,j_a,l_c)$. Recall that the slice is unfolded, so we have to
+   apply all permutations preserving the stack coordinates $abac$. In our
+   case we get list of indices $(i_a,k_b,j_a,l_c)$ and
+   $(j_a,k_b,i_a,l_c)$. For all we copy the item of the sparse tensor to
+   the appropriate column. */
+
+void
+UPSTensor::fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
+                             const IntSequence &coor)
+{
+  IntSequence coor_srt(coor);
+  coor_srt.sort();
+  IntSequence cum(ss.size());
+  cum[0] = 0;
+  for (int i = 1; i < ss.size(); i++)
+    cum[i] = cum[i-1] + ss[i-1];
+  IntSequence lb_srt(coor.size());
+  IntSequence ub_srt(coor.size());
+  for (int i = 0; i < coor.size(); i++)
+    {
+      lb_srt[i] = cum[coor_srt[i]];
+      ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
+    }
+
+  const PermutationSet &pset = tls.pbundle->get(coor.size());
+  vector<const Permutation *> pp = pset.getPreserving(coor);
+
+  Permutation unsort(coor);
+  zeros();
+  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb_srt);
+  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub_srt);
+  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
+    {
+      if (lb_srt.lessEq((*run).first) && (*run).first.lessEq(ub_srt))
+        {
+          IntSequence c((*run).first);
+          c.add(-1, lb_srt);
+          unsort.apply(c);
+          for (unsigned int i = 0; i < pp.size(); i++)
+            {
+              IntSequence cp(coor.size());
+              pp[i]->apply(c, cp);
+              Tensor::index ind(this, cp);
+              TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
+                          "Internal error in slicing constructor of UPSTensor");
+              get((*run).second.first, *ind) = (*run).second.second;
+            }
+        }
+    }
+}
+
+/* Here we calculate the maximum offsets in each folded dimension
+   (dimension sizes, hence |ds|). */
+
+void
+PerTensorDimens2::setDimensionSizes()
+{
+  const IntSequence &nvs = getNVS();
+  for (int i = 0; i < numSyms(); i++)
+    {
+      TensorDimens td(syms[i], nvs);
+      ds[i] = td.calcFoldMaxOffset();
+    }
+}
+
+/* If there are two folded dimensions, the offset in such a dimension
+   is offset of the second plus offset of the first times the maximum
+   offset of the second. If there are $n+1$ dimensions, the offset is a
+   sum of offsets of the last dimension plus the offset in the first $n$
+   dimensions multiplied by the maximum offset of the last
+   dimension. This is exactly what the following code does. */
+
+int
+PerTensorDimens2::calcOffset(const IntSequence &coor) const
+{
+  TL_RAISE_IF(coor.size() != dimen(),
+              "Wrong length of coordinates in PerTensorDimens2::calcOffset");
+  IntSequence cc(coor);
+  int ret = 0;
+  int off = 0;
+  for (int i = 0; i < numSyms(); i++)
+    {
+      TensorDimens td(syms[i], getNVS());
+      IntSequence c(cc, off, off+syms[i].dimen());
+      int a = td.calcFoldOffset(c);
+      ret = ret*ds[i] + a;
+      off += syms[i].dimen();
+    }
+  return ret;
+}
+
+void
+PerTensorDimens2::print() const
+{
+  printf("nvmax: "); nvmax.print();
+  printf("per:   "); per.print();
+  printf("syms:  "); syms.print();
+  printf("dims:  "); ds.print();
+}
+
+/* Here we increment the given integer sequence. It corresponds to
+   |UTensor::increment| of the whole sequence, and then partial
+   monotonizing of the subsequences with respect to the
+   symmetries of each dimension. */
+
+void
+FPSTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong length of coordinates in FPSTensor::increment");
+  UTensor::increment(v, tdims.getNVX());
+  int off = 0;
+  for (int i = 0; i < tdims.numSyms(); i++)
+    {
+      IntSequence c(v, off, off+tdims.getSym(i).dimen());
+      c.pmonotone(tdims.getSym(i));
+      off += tdims.getSym(i).dimen();
+    }
+}
+
+void
+FPSTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE("FPSTensor::decrement not implemented");
+}
+
+UTensor &
+FPSTensor::unfold() const
+{
+  TL_RAISE("Unfolding of FPSTensor not implemented");
+  UFSTensor *nothing = new UFSTensor(0, 0, 0);
+  return *nothing;
+}
+
+/* We only call |calcOffset| of the |PerTensorDimens2|. */
+
+int
+FPSTensor::getOffset(const IntSequence &v) const
+{
+  return tdims.calcOffset(v);
+}
+
+/* Here we add the tensor to |out|. We go through all columns of the
+   |out|, apply the permutation to get index in the tensor, and add the
+   column. Note that if the permutation is identity, then the dimensions
+   of the tensors might not be the same (since this tensor is partially
+   folded). */
+
+void
+FPSTensor::addTo(FGSTensor &out) const
+{
+  for (index tar = out.begin(); tar != out.end(); ++tar)
+    {
+      IntSequence coor(dimen());
+      tdims.getPer().apply(tar.getCoor(), coor);
+      index src(this, coor);
+      out.addColumn(*this, *src, *tar);
+    }
+}
+
+/* Here is the constructor which multiplies the Kronecker product with
+   the general symmetry sparse tensor |GSSparseTensor|. The main idea is
+   to go through items in the sparse tensor (each item selects rows in
+   the matrices form the Kornecker product), then to Kronecker multiply
+   the rows and multiply with the item, and to add the resulting row to
+   the appropriate row of the resulting |FPSTensor|.
+
+   The realization of this idea is a bit more complicated since we have
+   to go through all items, and each item must be added as many times as
+   it has its symmetric elements. Moreover, the permutations shuffle
+   order of rows in their Kronecker product.
+
+   So, we through all unfolded indices in a tensor with the same
+   dimensions as the |GSSparseTensor| (sparse slice). For each such index
+   we calculate its folded version (corresponds to ordering of
+   subsequences within symmetries), we test if there is an item in the
+   sparse slice with such coordinates, and if there is, we construct the
+   Kronecker product of the rows, and go through all of items with the
+   coordinates, and add to appropriate rows of |this| tensor. */
+
+FPSTensor::FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+                     const GSSparseTensor &a, const KronProdAll &kp)
+  : FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
+            a.nrows(), kp.ncols(), td.dimen()),
+    tdims(td, e, p)
+{
+  zeros();
+
+  UGSTensor dummy(0, a.getDims());
+  for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run)
+    {
+      Tensor::index fold_ind = dummy.getFirstIndexOf(run);
+      const IntSequence &c = fold_ind.getCoor();
+      GSSparseTensor::const_iterator sl = a.getMap().lower_bound(c);
+      if (sl != a.getMap().end())
+        {
+          Vector *row_prod = kp.multRows(run.getCoor());
+          GSSparseTensor::const_iterator su = a.getMap().upper_bound(c);
+          for (GSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
+            {
+              Vector out_row((*srun).second.first, *this);
+              out_row.add((*srun).second.second, *row_prod);
+            }
+          delete row_prod;
+        }
+    }
+}
--- a/dynare++/tl/cc/ps_tensor.cweb
+++ b/dynare++/tl/cc/ps_tensor.cweb
@ -1,422 +0,0 @@
-@q $Id: ps_tensor.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt ps\_tensor.cpp} file.
-@c
-#include "ps_tensor.h"
-#include "fs_tensor.h"
-#include "tl_exception.h"
-#include "tl_static.h"
-#include "stack_container.h"
-
-@<|UPSTensor::decideFillMethod| code@>;
-@<|UPSTensor| slicing constructor code@>;
-@<|UPSTensor| increment and decrement@>;
-@<|UPSTensor::fold| code@>;
-@<|UPSTensor::getOffset| code@>;
-@<|UPSTensor::addTo| folded code@>;
-@<|UPSTensor::addTo| unfolded code@>;
-@<|UPSTensor::tailIdentitySize| code@>;
-@<|UPSTensor::fillFromSparseOne| code@>;
-@<|UPSTensor::fillFromSparseTwo| code@>;
-@<|PerTensorDimens2::setDimensionSizes| code@>;
-@<|PerTensorDimens2::calcOffset| code@>;
-@<|PerTensorDimens2::print| code@>;
-@<|FPSTensor::increment| code@>;
-@<|FPSTensor::decrement| code@>;
-@<|FPSTensor::unfold| code@>;
-@<|FPSTensor::getOffset| code@>;
-@<|FPSTensor::addTo| code@>;
-@<|FPSTensor| sparse constructor@>;
-
-@ Here we decide, what method for filling a slice in slicing
-constructor to use. A few experiments suggest, that if the tensor is
-more than 8\% filled, the first method (|fillFromSparseOne|) is
-better. For fill factors less than 1\%, the second can be 3 times
-quicker.
-
-@<|UPSTensor::decideFillMethod| code@>=
-UPSTensor::fill_method UPSTensor::decideFillMethod(const FSSparseTensor& t)
-{
-	if (t.getFillFactor() > 0.08)
-		return first;
-	else
-		return second;
-}
-
-@ Here we make a slice. We decide what fill method to use and set it.
- 
-@<|UPSTensor| slicing constructor code@>=
-UPSTensor::UPSTensor(const FSSparseTensor& t, const IntSequence& ss,
-					 const IntSequence& coor, const PerTensorDimens& ptd)
-	: UTensor(along_col, ptd.getNVX(),
-			  t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
-	  tdims(ptd)
-{
-	TL_RAISE_IF(coor.size() != t.dimen(),
-				"Wrong coordinates length of stacks for UPSTensor slicing constructor");
-	TL_RAISE_IF(ss.sum() != t.nvar(),
-				"Wrong length of stacks for UPSTensor slicing constructor");
-
-	if (first == decideFillMethod(t))
-		fillFromSparseOne(t, ss, coor);
-	else
-		fillFromSparseTwo(t, ss, coor);
-}
-
- 
-@ 
-@<|UPSTensor| increment and decrement@>=
-void UPSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UPSTensor::increment");
-
-	UTensor::increment(v, tdims.getNVX());
-}
-
-void UPSTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in UPSTensor::decrement");
-
-	UTensor::decrement(v, tdims.getNVX());
-}
-
-@ 
-@<|UPSTensor::fold| code@>=
-FTensor& UPSTensor::fold() const
-{
-	TL_RAISE("Never should come to this place in UPSTensor::fold");
-	FFSTensor* nothing = new FFSTensor(0,0,0);
-	return *nothing;
-}
-
-
-@ 
-@<|UPSTensor::getOffset| code@>=
-int UPSTensor::getOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in UPSTensor::getOffset");
-
-	return UTensor::getOffset(v, tdims.getNVX());
-}
-
-@ 
-@<|UPSTensor::addTo| folded code@>=
-void UPSTensor::addTo(FGSTensor& out) const
-{
-	TL_RAISE_IF(out.getDims() != tdims,
-				"Tensors have incompatible dimens in UPSTensor::addTo");
-	for (index in = out.begin(); in != out.end(); ++in) {
-		IntSequence vtmp(dimen());
-		tdims.getPer().apply(in.getCoor(), vtmp);
-		index tin(this, vtmp);
-		out.addColumn(*this, *tin, *in);
-	}
-}
-
-@ In here, we have to add this permuted symmetry unfolded tensor to an
-unfolded not permuted tensor. One easy way would be to go through the
-target tensor, permute each index, and add the column.
-
-However, it may happen, that the permutation has some non-empty
-identity tail. In this case, we can add not only individual columns,
-but much bigger data chunks, which is usually more
-efficient. Therefore, the code is quite dirty, because we have not an
-iterator, which iterates over tensor at some higher levels. So we
-simulate it by the following code.
-
-First we set |cols| to the length of the data chunk and |off| to its
-dimension. Then we need a front part of |nvmax| of |out|, which is
-|nvmax_part|. Our iterator here is an integer sequence |outrun| with
-full length, and |outrun_part| its front part. The |outrun| is
-initialized to zeros. In each step we need to increment |outrun|
-|cols|-times, this is done by incrementing its prefix |outrun_part|.
-
-So we loop over all |cols|wide partitions of |out|, permute |outrun|
-to obtain |perrun| to obtain column of this matrix. (note that the
-trailing part of |perrun| is the same as of |outrun|. Then we
-construct submatrices, add them, and increment |outrun|.
-
-@<|UPSTensor::addTo| unfolded code@>=
-void UPSTensor::addTo(UGSTensor& out) const
-{
-	TL_RAISE_IF(out.getDims() != tdims,
-				"Tensors have incompatible dimens in UPSTensor::addTo");
-	int cols = tailIdentitySize();
-	int off = tdims.tailIdentity();
-	IntSequence outrun(out.dimen(), 0);
-	IntSequence outrun_part(outrun, 0, out.dimen()-off);
-	IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
-	for (int out_col = 0; out_col < out.ncols(); out_col+=cols) {
-		// permute |outrun|
-		IntSequence perrun(out.dimen());
-		tdims.getPer().apply(outrun, perrun);
-		index from(this, perrun);
-		// construct submatrices
-		ConstTwoDMatrix subfrom(*this, *from, cols);
-		TwoDMatrix subout(out, out_col, cols);
-		// add
-		subout.add(1, subfrom);
-		// increment |outrun| by cols
-		UTensor::increment(outrun_part, nvmax_part);
-	}
-}
-
-
-@ This returns a product of all items in |nvmax| which make up the
-trailing identity part.
-
-@<|UPSTensor::tailIdentitySize| code@>=
-int UPSTensor::tailIdentitySize() const
-{
-	return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
-}
-
-@ This fill method is pretty dumb. We go through all columns in |this|
-tensor, translate coordinates to sparse tensor, sort them and find an
-item in the sparse tensor. There are many not successful lookups for
-really sparse tensor, that is why the second method works better for
-really sparse tensors.
- 
-@<|UPSTensor::fillFromSparseOne| code@>=
-void UPSTensor::fillFromSparseOne(const FSSparseTensor& t, const IntSequence& ss,
-								  const IntSequence& coor)
-{
-	IntSequence cumtmp(ss.size());
-	cumtmp[0] = 0;
-	for (int i = 1; i < ss.size(); i++)
-		cumtmp[i] = cumtmp[i-1] + ss[i-1];
-	IntSequence cum(coor.size());
-	for (int i = 0; i < coor.size(); i++)
-		cum[i] = cumtmp[coor[i]];
-
- 	zeros();
-	for (Tensor::index run = begin(); run != end(); ++run) {
-		IntSequence c(run.getCoor());
-		c.add(1, cum);
-		c.sort();
-		FSSparseTensor::const_iterator sl = t.getMap().lower_bound(c);
-		if (sl != t.getMap().end()) {
-			FSSparseTensor::const_iterator su = t.getMap().upper_bound(c);
-			for (FSSparseTensor::const_iterator srun = sl; srun != su; ++srun)
-				get((*srun).second.first, *run) = (*srun).second.second;
-		}
-	}
-}
-
-@ This is the second way of filling the slice. For instance, let the
-slice correspond to partitions $abac$. In here we first calculate
-lower and upper bounds for index of the sparse tensor for the
-slice. These are |lb_srt| and |ub_srt| respectively. They corresponds
-to ordering $aabc$. Then we go through that interval, and select items
-which are really between the bounds.  Then we take the index, subtract
-the lower bound to get it to coordinates of the slice. We get
-something like $(i_a,j_a,k_b,l_c)$. Then we apply the inverse
-permutation as of the sorting form $abac\mapsto aabc$ to get index
-$(i_a,k_b,j_a,l_c)$. Recall that the slice is unfolded, so we have to
-apply all permutations preserving the stack coordinates $abac$. In our
-case we get list of indices $(i_a,k_b,j_a,l_c)$ and
-$(j_a,k_b,i_a,l_c)$. For all we copy the item of the sparse tensor to
-the appropriate column.
- 
-@<|UPSTensor::fillFromSparseTwo| code@>=
-void UPSTensor::fillFromSparseTwo(const FSSparseTensor& t, const IntSequence& ss,
-								  const IntSequence& coor)
-{
-	IntSequence coor_srt(coor);
-	coor_srt.sort();
-	IntSequence cum(ss.size());
-	cum[0] = 0;
-	for (int i = 1; i < ss.size(); i++)
-		cum[i] = cum[i-1] + ss[i-1];
-	IntSequence lb_srt(coor.size());
-	IntSequence ub_srt(coor.size());
-	for (int i = 0; i < coor.size(); i++) {
-		lb_srt[i] = cum[coor_srt[i]];
-		ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
-	}
-
-	const PermutationSet& pset = tls.pbundle->get(coor.size());
-	vector<const Permutation*> pp = pset.getPreserving(coor);
-
-	Permutation unsort(coor);
-	zeros();
-	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb_srt);
-	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub_srt);
-	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
-		if (lb_srt.lessEq((*run).first) && (*run).first.lessEq(ub_srt)) {
-			IntSequence c((*run).first);
-			c.add(-1, lb_srt);
-			unsort.apply(c);
-			for (unsigned int i = 0; i < pp.size(); i++) {
-				IntSequence cp(coor.size());
-				pp[i]->apply(c, cp);
-				Tensor::index ind(this, cp);
-				TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
-							"Internal error in slicing constructor of UPSTensor");
-				get((*run).second.first, *ind) = (*run).second.second;
-			}
-		}
-	}
-}
-
-
-@ Here we calculate the maximum offsets in each folded dimension
-(dimension sizes, hence |ds|).
-
-@<|PerTensorDimens2::setDimensionSizes| code@>=
-void PerTensorDimens2::setDimensionSizes()
-{
-	const IntSequence& nvs = getNVS();
-	for (int i = 0; i < numSyms(); i++) {
-		TensorDimens td(syms[i], nvs);
-		ds[i] = td.calcFoldMaxOffset();
-	}
-}
-
-@ If there are two folded dimensions, the offset in such a dimension
-is offset of the second plus offset of the first times the maximum
-offset of the second. If there are $n+1$ dimensions, the offset is a
-sum of offsets of the last dimension plus the offset in the first $n$
-dimensions multiplied by the maximum offset of the last
-dimension. This is exactly what the following code does.
-
-@<|PerTensorDimens2::calcOffset| code@>=
-int PerTensorDimens2::calcOffset(const IntSequence& coor) const
-{
-	TL_RAISE_IF(coor.size() != dimen(),
-				"Wrong length of coordinates in PerTensorDimens2::calcOffset");
-	IntSequence cc(coor);
-	int ret = 0;
-	int off = 0;
-	for (int i = 0; i < numSyms(); i++) {
-		TensorDimens td(syms[i], getNVS());
-		IntSequence c(cc, off, off+syms[i].dimen());
-		int a = td.calcFoldOffset(c);
-		ret = ret*ds[i] + a;
-		off += syms[i].dimen();
-	}
-	return ret;
-}
-
-@ 
-@<|PerTensorDimens2::print| code@>=
-void PerTensorDimens2::print() const
-{
-	printf("nvmax: "); nvmax.print();
-	printf("per:   "); per.print();
-	printf("syms:  "); syms.print();
-	printf("dims:  "); ds.print();
-}
-
-@ Here we increment the given integer sequence. It corresponds to
-|UTensor::increment| of the whole sequence, and then partial
-monotonizing of the subsequences with respect to the
-symmetries of each dimension.
-
-@<|FPSTensor::increment| code@>=
-void FPSTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong length of coordinates in FPSTensor::increment");
-	UTensor::increment(v, tdims.getNVX());
-	int off = 0;
-	for (int i = 0; i < tdims.numSyms(); i++) {
-		IntSequence c(v, off, off+tdims.getSym(i).dimen());
-		c.pmonotone(tdims.getSym(i));
-		off += tdims.getSym(i).dimen();
-	}
-}
-
-
-@ 
-@<|FPSTensor::decrement| code@>=
-void FPSTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE("FPSTensor::decrement not implemented");
-}
-
-@ 
-@<|FPSTensor::unfold| code@>=
-UTensor& FPSTensor::unfold() const
-{
-	TL_RAISE("Unfolding of FPSTensor not implemented");
-	UFSTensor* nothing = new UFSTensor(0,0,0);
-	return *nothing;
-}
-
-@ We only call |calcOffset| of the |PerTensorDimens2|.
-@<|FPSTensor::getOffset| code@>=
-int FPSTensor::getOffset(const IntSequence& v) const
-{
-	return tdims.calcOffset(v);
-}
-
-@ Here we add the tensor to |out|. We go through all columns of the
-|out|, apply the permutation to get index in the tensor, and add the
-column. Note that if the permutation is identity, then the dimensions
-of the tensors might not be the same (since this tensor is partially
-folded).
-
-@<|FPSTensor::addTo| code@>=
-void FPSTensor::addTo(FGSTensor& out) const
-{
-	for (index tar = out.begin(); tar != out.end(); ++tar) {
-		IntSequence coor(dimen());
-		tdims.getPer().apply(tar.getCoor(), coor);
-		index src(this, coor);
-		out.addColumn(*this, *src, *tar);
-	}
-}
-
-@ Here is the constructor which multiplies the Kronecker product with
-the general symmetry sparse tensor |GSSparseTensor|. The main idea is
-to go through items in the sparse tensor (each item selects rows in
-the matrices form the Kornecker product), then to Kronecker multiply
-the rows and multiply with the item, and to add the resulting row to
-the appropriate row of the resulting |FPSTensor|.
- 
-The realization of this idea is a bit more complicated since we have
-to go through all items, and each item must be added as many times as
-it has its symmetric elements. Moreover, the permutations shuffle
-order of rows in their Kronecker product.
-
-So, we through all unfolded indices in a tensor with the same
-dimensions as the |GSSparseTensor| (sparse slice). For each such index
-we calculate its folded version (corresponds to ordering of
-subsequences within symmetries), we test if there is an item in the
-sparse slice with such coordinates, and if there is, we construct the
-Kronecker product of the rows, and go through all of items with the
-coordinates, and add to appropriate rows of |this| tensor.
-
-@<|FPSTensor| sparse constructor@>=
-FPSTensor::FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-					 const GSSparseTensor& a, const KronProdAll& kp)
-	: FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
-			  a.nrows(), kp.ncols(), td.dimen()),
-	  tdims(td, e, p)
-{
-	zeros();
-
-	UGSTensor dummy(0, a.getDims());
-	for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run) {
-		Tensor::index fold_ind = dummy.getFirstIndexOf(run);
-		const IntSequence& c = fold_ind.getCoor();
-		GSSparseTensor::const_iterator sl = a.getMap().lower_bound(c);
-		if (sl != a.getMap().end()) {
-			Vector* row_prod = kp.multRows(run.getCoor());
-			GSSparseTensor::const_iterator su = a.getMap().upper_bound(c);
-			for (GSSparseTensor::const_iterator srun = sl; srun != su; ++srun) {
-				Vector out_row((*srun).second.first, *this);
-				out_row.add((*srun).second.second, *row_prod);
-			}
-			delete row_prod;
-		}
-	}
-}
-
-
-@ End of {\tt ps\_tensor.cpp} file.
--- a/dynare++/tl/cc/ps_tensor.hh
+++ b/dynare++/tl/cc/ps_tensor.hh
@ -0,0 +1,384 @@
+// Copyright 2004, Ondra Kamenik
+
+// Even more general symmetry tensor.
+
+/* Here we define an abstraction for a tensor, which has a general
+   symmetry, but the symmetry is not of what is modelled by
+   |Symmetry|. This kind of tensor comes to existence when we evaluate
+   something like:
+   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
+   \cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
+   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
+   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
+   \left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
+   $$
+   If the tensors are unfolded, we obtain a tensor
+   $$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)$$
+
+   Obviously, this tensor can have a symmetry not compatible with
+   ordering $\alpha_1\alpha_2\beta_1\beta_2\beta_3$, (in other words, not
+   compatible with symmetry $y^2u^3$). In fact, the indices are permuted.
+
+   This kind of tensor must be added to $\left[B_{y^2u^3}\right]$. Its
+   dimensions are the same as of $\left[B_{y^2u^3}\right]$, but some
+   coordinates are permuted. The addition is the only action we need to
+   do with the tensor.
+
+   Another application where this permuted symmetry tensor appears is a
+   slice of a fully symmetric tensor. If the symmetric dimension of the
+   tensor is partitioned to continuous parts, and we are interested only
+   in data with a given symmetry (permuted) of the partitions, then we
+   have the permuted symmetry tensor. For instance, if $x$ is partitioned
+   $x=[a,b,c,d]$, and having tensor $\left[f_{x^3}\right]$, one can d a
+   slice (subtensor) $\left[f_{aca}\right]$. The data of this tensor are
+   permuted of $\left[f_{a^c}\right]$.
+
+   Here we also define the folded version of permuted symmetry tensor. It
+   has permuted symmetry and is partially folded. One can imagine it as a
+   product of a few dimensions, each of them is folded and having a few
+   variables. The underlying variables are permuted. The product of such
+   dimensions is described by |PerTensorDimens2|. The tensor holding the
+   underlying data is |FPSTensor|. */
+
+#ifndef PS_TENSOR_H
+#define PS_TENSOR_H
+
+#include "tensor.hh"
+#include "gs_tensor.hh"
+#include "equivalence.hh"
+#include "permutation.hh"
+#include "kron_prod.hh"
+#include "sparse_tensor.hh"
+
+/* This is just a helper class for ordering a sequence on call stack. */
+
+class SortIntSequence : public IntSequence
+{
+public:
+  SortIntSequence(const IntSequence &s)
+    : IntSequence(s)
+  {
+    sort();
+  }
+};
+
+/* Here we declare a class describing dimensions of permuted symmetry
+   tensor. It inherits from |TensorDimens| and adds a permutation which
+   permutes |nvmax|. It has two constructors, each corresponds to a
+   context where the tensor appears.
+
+   The first constructor calculates the permutation from a given equivalence.
+
+   The second constructor corresponds to dimensions of a slice. Let us
+   take $\left[f_{aca}\right]$ as an example. First it calculates
+   |TensorDimens| of $\left[f_{a^c}\right]$, then it calculates a
+   permutation corresponding to ordering of $aca$ to $a^2c$, and applies
+   this permutation on the dimensions as the first constructor. The
+   constructor takes only stack sizes (lengths of $a$, $b$, $c$, and
+   $d$), and coordinates of picked partitions.
+
+   Note that inherited methods |calcUnfoldColumns| and |calcFoldColumns|
+   work, since number of columns is independent on the permutation, and
+   |calcFoldColumns| does not use changed |nvmax|, it uses |nvs|, so it
+   is OK. */
+
+class PerTensorDimens : public TensorDimens
+{
+protected:
+  Permutation per;
+public:
+  PerTensorDimens(const Symmetry &s, const IntSequence &nvars,
+                  const Equivalence &e)
+    : TensorDimens(s, nvars), per(e)
+  {
+    per.apply(nvmax);
+  }
+  PerTensorDimens(const TensorDimens &td, const Equivalence &e)
+    : TensorDimens(td), per(e)
+  {
+    per.apply(nvmax);
+  }
+  PerTensorDimens(const TensorDimens &td, const Permutation &p)
+    : TensorDimens(td), per(p)
+  {
+    per.apply(nvmax);
+  }
+  PerTensorDimens(const IntSequence &ss, const IntSequence &coor)
+    : TensorDimens(ss, SortIntSequence(coor)), per(coor)
+  {
+    per.apply(nvmax);
+  }
+  PerTensorDimens(const PerTensorDimens &td)
+    : TensorDimens(td), per(td.per)
+  {
+  }
+  const PerTensorDimens &
+  operator=(const PerTensorDimens &td)
+  {
+    TensorDimens::operator=(td); per = td.per; return *this;
+  }
+  bool
+  operator==(const PerTensorDimens &td)
+  {
+    return TensorDimens::operator==(td) && per == td.per;
+  }
+  int
+  tailIdentity() const
+  {
+    return per.tailIdentity();
+  }
+  const Permutation &
+  getPer() const
+  {
+    return per;
+  }
+};
+
+/* Here we declare the permuted symmetry unfolded tensor. It has
+   |PerTensorDimens| as a member. It inherits from |UTensor| which
+   requires to implement |fold| method. There is no folded counterpart,
+   so in our implementation we raise unconditional exception, and return
+   some dummy object (just to make it compilable without warnings).
+
+   The class has two sorts of constructors corresponding to a context where it
+   appears. The first constructs object from a given matrix, and
+   Kronecker product. Within the constructor, all the calculations are
+   performed. Also we need to define dimensions, these are the same of
+   the resulting matrix (in our example $\left[B_{y^2u^3}\right]$) but
+   permuted. The permutation is done in |PerTensorDimens| constructor.
+
+   The second type of constructor is slicing. It makes a slice from
+   |FSSparseTensor|. The slice is given by stack sizes, and coordinates of
+   picked stacks.
+
+   There are two algorithms for filling a slice of a sparse tensor. The
+   first |fillFromSparseOne| works well for more dense tensors, the
+   second |fillFromSparseTwo| is better for very sparse tensors. We
+   provide a static method, which decides what of the two algorithms is
+   better. */
+
+class UPSTensor : public UTensor
+{
+  const PerTensorDimens tdims;
+public:
+  // |UPSTensor| constructors from Kronecker product
+  /* Here we have four constructors making an |UPSTensor| from a product
+     of matrix and Kronecker product. The first constructs the tensor from
+     equivalence classes of the given equivalence in an order given by the
+     equivalence. The second does the same but with optimized
+     |KronProdAllOptim|, which has a different order of matrices than given
+     by the classes in the equivalence. This permutation is projected to
+     the permutation of the |UPSTensor|. The third, is the same as the
+     first, but the classes of the equivalence are permuted by the given
+     permutation. Finally, the fourth is the most general combination. It
+     allows for a permutation of equivalence classes, and for optimized
+     |KronProdAllOptim|, which permutes the permuted equivalence classes. */
+  UPSTensor(const TensorDimens &td, const Equivalence &e,
+            const ConstTwoDMatrix &a, const KronProdAll &kp)
+    : UTensor(along_col, PerTensorDimens(td, e).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
+  {
+    kp.mult(a, *this);
+  }
+  UPSTensor(const TensorDimens &td, const Equivalence &e,
+            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
+    : UTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, kp.getPer()))
+  {
+    kp.mult(a, *this);
+  }
+  UPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+            const ConstTwoDMatrix &a, const KronProdAll &kp)
+    : UTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, p))
+  {
+    kp.mult(a, *this);
+  }
+  UPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
+    : UTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, Permutation(p, kp.getPer())))
+  {
+    kp.mult(a, *this);
+  }
+  UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
+            const IntSequence &coor, const PerTensorDimens &ptd);
+  UPSTensor(const UPSTensor &ut)
+    : UTensor(ut), tdims(ut.tdims)
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  FTensor&fold() const;
+
+  int getOffset(const IntSequence &v) const;
+  void addTo(FGSTensor &out) const;
+  void addTo(UGSTensor &out) const;
+
+  enum fill_method {first, second};
+  static fill_method decideFillMethod(const FSSparseTensor &t);
+private:
+  int tailIdentitySize() const;
+  void fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
+                         const IntSequence &coor);
+  void fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
+                         const IntSequence &coor);
+};
+
+/* Here we define an abstraction for the tensor dimension with the
+   symmetry like $xuv\vert uv\vert xu\vert y\vert y\vert x\vert x\vert
+   y$. These symmetries come as induces symmetries of equivalence and
+   some outer symmetry. Thus the underlying variables are permuted. One
+   can imagine the dimensions as an unfolded product of dimensions which
+   consist of folded products of variables.
+
+   We inherit from |PerTensorDimens| since we need the permutation
+   implied by the equivalence. The new member are the induced symmetries
+   (symmetries of each folded dimensions) and |ds| which are sizes of the
+   dimensions. The number of folded dimensions is return by |numSyms|.
+
+   The object is constructed from outer tensor dimensions and from
+   equivalence with optionally permuted classes. */
+
+class PerTensorDimens2 : public PerTensorDimens
+{
+  InducedSymmetries syms;
+  IntSequence ds;
+public:
+  PerTensorDimens2(const TensorDimens &td, const Equivalence &e,
+                   const Permutation &p)
+    : PerTensorDimens(td, Permutation(e, p)),
+      syms(e, p, td.getSym()),
+      ds(syms.size())
+  {
+    setDimensionSizes();
+  }
+  PerTensorDimens2(const TensorDimens &td, const Equivalence &e)
+    : PerTensorDimens(td, e),
+      syms(e, td.getSym()),
+      ds(syms.size())
+  {
+    setDimensionSizes();
+  }
+  int
+  numSyms() const
+  {
+    return (int) syms.size();
+  }
+  const Symmetry &
+  getSym(int i) const
+  {
+    return syms[i];
+  }
+  int
+  calcMaxOffset() const
+  {
+    return ds.mult();
+  }
+  int calcOffset(const IntSequence &coor) const;
+  void print() const;
+protected:
+  void setDimensionSizes();
+};
+
+/* Here we define an abstraction of the permuted symmetry folded
+   tensor. It is needed in context of the Faa Di Bruno formula for folded
+   stack container multiplied with container of dense folded tensors, or
+   multiplied by one full symmetry sparse tensor.
+
+   For example, if we perform the Faa Di Bruno for $F=f(z)$, where
+   $z=[g(x,y,u,v), h(x,y,u), x, y]^T$, we get for one concrete
+   equivalence:
+   $$
+   \left[F_{x^4y^3u^3v^2}\right]=\ldots+
+   \left[f_{g^2h^2x^2y}\right]\left(
+   [g]_{xv}\otimes[g]_{u^2v}\otimes
+   [h]_{xu}\otimes[h]_{y^2}\otimes
+   \left[\vphantom{\sum}[I]_x\otimes[I]_x\right]\otimes
+   \left[\vphantom{\sum}[I]_y\right]
+   \right)
+   +\ldots
+   $$
+
+   The class |FPSTensor| represents the tensor at the right. Its
+   dimension corresponds to a product of 7 dimensions with the following
+   symmetries: $xv\vert u^v\vert xu\vert y^2\vert x\vert x\vert y$. Such
+   the dimension is described by |PerTensorDimens2|.
+
+   The tensor is constructed in a context of stack container
+   multiplication, so, it is constructed from dimensions |td| (dimensions
+   of the output tensor), stack product |sp| (implied symmetries picking
+   tensors from a stack container, here it is $z$), then a sorted integer
+   sequence of the picked stacks of the stack product (it is always
+   sorted, here it is $(0,0,1,1,2,2,3)$), then the tensor
+   $\left[f_{g^2h^2x^2y}\right]$ (its symmetry must be the same as
+   symmetry given by the |istacks|), and finally from the equivalence
+   with permuted classes.
+
+   We implement |increment| and |getOffset| methods, |decrement| and
+   |unfold| raise an exception. Also, we implement |addTo| method, which
+   adds the tensor data (partially unfolded) to folded general symmetry
+   tensor. */
+
+template<typename _Ttype>
+class StackProduct;
+
+class FPSTensor : public FTensor
+{
+  const PerTensorDimens2 tdims;
+public:
+  /* As for |UPSTensor|, we provide four constructors allowing for
+     combinations of permuting equivalence classes, and optimization of
+     |KronProdAllOptim|. These constructors multiply with dense general
+     symmetry tensor (coming from the dense container, or as a dense slice
+     of the full symmetry sparse tensor). In addition to these 4
+     constructors, we have one constructor multiplying with general
+     symmetry sparse tensor (coming as a sparse slice of the full symmetry
+     sparse tensor). */
+  FPSTensor(const TensorDimens &td, const Equivalence &e,
+            const ConstTwoDMatrix &a, const KronProdAll &kp)
+    : FTensor(along_col, PerTensorDimens(td, e).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
+  {
+    kp.mult(a, *this);
+  }
+  FPSTensor(const TensorDimens &td, const Equivalence &e,
+            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
+    : FTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, kp.getPer())
+  {
+    kp.mult(a, *this);
+  }
+  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+            const ConstTwoDMatrix &a, const KronProdAll &kp)
+    : FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, p)
+  {
+    kp.mult(a, *this);
+  }
+  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+            const ConstTwoDMatrix &a, const KronProdAllOptim &kp)
+    : FTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
+              a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, Permutation(p, kp.getPer()))
+  {
+    kp.mult(a, *this);
+  }
+
+  FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
+            const GSSparseTensor &t, const KronProdAll &kp);
+
+  FPSTensor(const FPSTensor &ft)
+    : FTensor(ft), tdims(ft.tdims)
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  UTensor&unfold() const;
+
+  int getOffset(const IntSequence &v) const;
+  void addTo(FGSTensor &out) const;
+};
+
+#endif
--- a/dynare++/tl/cc/ps_tensor.hweb
+++ b/dynare++/tl/cc/ps_tensor.hweb
@ -1,351 +0,0 @@
-@q $Id: ps_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Even more general symmetry tensor. Start of {\tt ps\_tensor.h} file.
-
-Here we define an abstraction for a tensor, which has a general
-symmetry, but the symmetry is not of what is modelled by
-|Symmetry|. This kind of tensor comes to existence when we evaluate
-something like:
-$$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
-\cdots+\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
-\left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
-\left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
-\left[g_u\right]^{\gamma_3}_{\beta_2}+\cdots
-$$ 
-If the tensors are unfolded, we obtain a tensor
-$$g_{y^3}\cdot\left(g_{yu}\otimes g_{yu}\otimes g_{u}\right)$$
-
-Obviously, this tensor can have a symmetry not compatible with
-ordering $\alpha_1\alpha_2\beta_1\beta_2\beta_3$, (in other words, not
-compatible with symmetry $y^2u^3$). In fact, the indices are permuted.
-
-This kind of tensor must be added to $\left[B_{y^2u^3}\right]$. Its
-dimensions are the same as of $\left[B_{y^2u^3}\right]$, but some
-coordinates are permuted. The addition is the only action we need to
-do with the tensor.
-
-Another application where this permuted symmetry tensor appears is a
-slice of a fully symmetric tensor. If the symmetric dimension of the
-tensor is partitioned to continuous parts, and we are interested only
-in data with a given symmetry (permuted) of the partitions, then we
-have the permuted symmetry tensor. For instance, if $x$ is partitioned
-$x=[a,b,c,d]$, and having tensor $\left[f_{x^3}\right]$, one can d a
-slice (subtensor) $\left[f_{aca}\right]$. The data of this tensor are
-permuted of $\left[f_{a^c}\right]$.
-
-Here we also define the folded version of permuted symmetry tensor. It
-has permuted symmetry and is partially folded. One can imagine it as a
-product of a few dimensions, each of them is folded and having a few
-variables. The underlying variables are permuted. The product of such
-dimensions is described by |PerTensorDimens2|. The tensor holding the
-underlying data is |FPSTensor|.
-
-@s SortIntSequence int
-@s PerTensorDimens int
-@s UPSTensor int
-@s PerTensorDimens2 int
-@s FPSTensor int
-@s KronProdFoldStacks int
-
-@c
-
-#ifndef PS_TENSOR_H
-#define PS_TENSOR_H
-
-#include "tensor.h"
-#include "gs_tensor.h"
-#include "equivalence.h"
-#include "permutation.h"
-#include "kron_prod.h"
-#include "sparse_tensor.h"
-
-@<|SortIntSequence| class declaration@>;
-@<|PerTensorDimens| class declaration@>;
-@<|UPSTensor| class declaration@>;
-@<|PerTensorDimens2| class declaration@>;
-@<|FPSTensor| class declaration@>;
-
-#endif
-
-@ This is just a helper class for ordering a sequence on call stack.
-
-@<|SortIntSequence| class declaration@>=
-class SortIntSequence : public IntSequence {
-public:@;
-	SortIntSequence(const IntSequence& s)
-		: IntSequence(s) {@+ sort();@+}
-};
-
-
-@ Here we declare a class describing dimensions of permuted symmetry
-tensor. It inherits from |TensorDimens| and adds a permutation which
-permutes |nvmax|. It has two constructors, each corresponds to a
-context where the tensor appears.
-
-The first constructor calculates the permutation from a given equivalence.
-
-The second constructor corresponds to dimensions of a slice. Let us
-take $\left[f_{aca}\right]$ as an example. First it calculates
-|TensorDimens| of $\left[f_{a^c}\right]$, then it calculates a
-permutation corresponding to ordering of $aca$ to $a^2c$, and applies
-this permutation on the dimensions as the first constructor. The
-constructor takes only stack sizes (lengths of $a$, $b$, $c$, and
-$d$), and coordinates of picked partitions.
-
-Note that inherited methods |calcUnfoldColumns| and |calcFoldColumns|
-work, since number of columns is independent on the permutation, and
-|calcFoldColumns| does not use changed |nvmax|, it uses |nvs|, so it
-is OK.
-
-@<|PerTensorDimens| class declaration@>=
-class PerTensorDimens : public TensorDimens {
-protected:@;
-	Permutation per;
-public:@;
-	PerTensorDimens(const Symmetry& s, const IntSequence& nvars,
-					const Equivalence& e)
-		: TensorDimens(s, nvars), per(e)
-		{@+ per.apply(nvmax);@+}
-	PerTensorDimens(const TensorDimens& td, const Equivalence& e)
-		: TensorDimens(td), per(e)
-		{@+ per.apply(nvmax);@+}
-	PerTensorDimens(const TensorDimens& td, const Permutation& p)
-		: TensorDimens(td), per(p)
-		{@+ per.apply(nvmax);@+}
-	PerTensorDimens(const IntSequence& ss, const IntSequence& coor)
-		: TensorDimens(ss, SortIntSequence(coor)), per(coor)
-		{@+ per.apply(nvmax);@+}
-	PerTensorDimens(const PerTensorDimens& td)
-		: TensorDimens(td), per(td.per)@+ {}
-	const PerTensorDimens& operator=(const PerTensorDimens& td)
-		{@+ TensorDimens::operator=(td);@+ per = td.per;@+ return *this;@+}
-	bool operator==(const PerTensorDimens& td)
-		{@+ return TensorDimens::operator==(td) && per == td.per;@+}
-	int tailIdentity() const
-		{@+ return per.tailIdentity();@+}
-	const Permutation& getPer() const
-		{@+ return per;@+}
-};
-
-@ Here we declare the permuted symmetry unfolded tensor. It has
-|PerTensorDimens| as a member. It inherits from |UTensor| which
-requires to implement |fold| method. There is no folded counterpart,
-so in our implementation we raise unconditional exception, and return
-some dummy object (just to make it compilable without warnings).
-
-The class has two sorts of constructors corresponding to a context where it
-appears. The first constructs object from a given matrix, and
-Kronecker product. Within the constructor, all the calculations are
-performed. Also we need to define dimensions, these are the same of
-the resulting matrix (in our example $\left[B_{y^2u^3}\right]$) but
-permuted. The permutation is done in |PerTensorDimens| constructor.
-
-The second type of constructor is slicing. It makes a slice from
-|FSSparseTensor|. The slice is given by stack sizes, and coordinates of
-picked stacks.
-
-There are two algorithms for filling a slice of a sparse tensor. The
-first |fillFromSparseOne| works well for more dense tensors, the
-second |fillFromSparseTwo| is better for very sparse tensors. We
-provide a static method, which decides what of the two algorithms is
-better.
-
-@<|UPSTensor| class declaration@>=
-class UPSTensor : public UTensor {
-	const PerTensorDimens tdims;
-public:@;
-	@<|UPSTensor| constructors from Kronecker product@>;
-	UPSTensor(const FSSparseTensor& t, const IntSequence& ss,
-			  const IntSequence& coor, const PerTensorDimens& ptd);
-	UPSTensor(const UPSTensor& ut)
-		: UTensor(ut), tdims(ut.tdims)@+ {}
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	FTensor& fold() const;
-
-	int getOffset(const IntSequence& v) const;
-	void addTo(FGSTensor& out) const;
-	void addTo(UGSTensor& out) const;
-
-	enum fill_method {first, second};
-	static fill_method decideFillMethod(const FSSparseTensor& t);
-private:@;
-	int tailIdentitySize() const;
-	void fillFromSparseOne(const FSSparseTensor& t, const IntSequence& ss,
-						   const IntSequence& coor);
-	void fillFromSparseTwo(const FSSparseTensor& t, const IntSequence& ss,
-						   const IntSequence& coor);
-};
-
-@ Here we have four constructors making an |UPSTensor| from a product
-of matrix and Kronecker product. The first constructs the tensor from
-equivalence classes of the given equivalence in an order given by the
-equivalence. The second does the same but with optimized
-|KronProdAllOptim|, which has a different order of matrices than given
-by the classes in the equivalence. This permutation is projected to
-the permutation of the |UPSTensor|. The third, is the same as the
-first, but the classes of the equivalence are permuted by the given
-permutation. Finally, the fourth is the most general combination. It
-allows for a permutation of equivalence classes, and for optimized
-|KronProdAllOptim|, which permutes the permuted equivalence classes.
-
-@<|UPSTensor| constructors from Kronecker product@>=
-	UPSTensor(const TensorDimens& td, const Equivalence& e,
-			  const ConstTwoDMatrix& a, const KronProdAll& kp)
-		: UTensor(along_col, PerTensorDimens(td, e).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
-		{@+ kp.mult(a, *this);@+}
-	UPSTensor(const TensorDimens& td, const Equivalence& e,
-			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
-		: UTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, kp.getPer()))
-		{@+ kp.mult(a, *this);@+}
-	UPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-			  const ConstTwoDMatrix& a, const KronProdAll& kp)
-		: UTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, p))
-		{@+ kp.mult(a, *this);@+}
-	UPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
-		: UTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, Permutation(e, Permutation(p, kp.getPer())))
-		{@+ kp.mult(a, *this);@+}
-
-@ Here we define an abstraction for the tensor dimension with the
-symmetry like $xuv\vert uv\vert xu\vert y\vert y\vert x\vert x\vert
-y$. These symmetries come as induces symmetries of equivalence and
-some outer symmetry. Thus the underlying variables are permuted. One
-can imagine the dimensions as an unfolded product of dimensions which
-consist of folded products of variables.
-
-We inherit from |PerTensorDimens| since we need the permutation
-implied by the equivalence. The new member are the induced symmetries
-(symmetries of each folded dimensions) and |ds| which are sizes of the
-dimensions. The number of folded dimensions is return by |numSyms|.
-
-The object is constructed from outer tensor dimensions and from
-equivalence with optionally permuted classes.
-
-@<|PerTensorDimens2| class declaration@>=
-class PerTensorDimens2 : public PerTensorDimens {
-	InducedSymmetries syms;
-	IntSequence ds;
-public:@;
-	PerTensorDimens2(const TensorDimens& td, const Equivalence& e,
-					 const Permutation& p)
-		: PerTensorDimens(td, Permutation(e, p)),
-		  syms(e, p, td.getSym()),
-		  ds(syms.size())
-		{@+ setDimensionSizes();@+}
-	PerTensorDimens2(const TensorDimens& td, const Equivalence& e)
-		: PerTensorDimens(td, e),
-		  syms(e, td.getSym()),
-		  ds(syms.size())
-		{@+ setDimensionSizes();@+}
-	int numSyms() const
-		{@+ return (int)syms.size();@+}
-	const Symmetry& getSym(int i) const
-		{@+ return syms[i];@+}
-	int calcMaxOffset() const
-		{@+ return ds.mult(); @+}
-	int calcOffset(const IntSequence& coor) const;
-	void print() const;
-protected:@;
-	void setDimensionSizes();
-};
-
-@ Here we define an abstraction of the permuted symmetry folded
-tensor. It is needed in context of the Faa Di Bruno formula for folded
-stack container multiplied with container of dense folded tensors, or
-multiplied by one full symmetry sparse tensor.
-
-For example, if we perform the Faa Di Bruno for $F=f(z)$, where
-$z=[g(x,y,u,v), h(x,y,u), x, y]^T$, we get for one concrete
-equivalence:
-$$
-\left[F_{x^4y^3u^3v^2}\right]=\ldots+
-\left[f_{g^2h^2x^2y}\right]\left(
-[g]_{xv}\otimes[g]_{u^2v}\otimes
-[h]_{xu}\otimes[h]_{y^2}\otimes
-\left[\vphantom{\sum}[I]_x\otimes[I]_x\right]\otimes
-\left[\vphantom{\sum}[I]_y\right]
-\right)
-+\ldots
-$$
-
-The class |FPSTensor| represents the tensor at the right. Its
-dimension corresponds to a product of 7 dimensions with the following
-symmetries: $xv\vert u^v\vert xu\vert y^2\vert x\vert x\vert y$. Such
-the dimension is described by |PerTensorDimens2|.
-
-The tensor is constructed in a context of stack container
-multiplication, so, it is constructed from dimensions |td| (dimensions
-of the output tensor), stack product |sp| (implied symmetries picking
-tensors from a stack container, here it is $z$), then a sorted integer
-sequence of the picked stacks of the stack product (it is always
-sorted, here it is $(0,0,1,1,2,2,3)$), then the tensor
-$\left[f_{g^2h^2x^2y}\right]$ (its symmetry must be the same as
-symmetry given by the |istacks|), and finally from the equivalence
-with permuted classes.
-
-We implement |increment| and |getOffset| methods, |decrement| and
-|unfold| raise an exception. Also, we implement |addTo| method, which
-adds the tensor data (partially unfolded) to folded general symmetry
-tensor.
-
-@<|FPSTensor| class declaration@>=
-template<typename _Ttype> class StackProduct;
-
-class FPSTensor : public FTensor {
-	const PerTensorDimens2 tdims;
-public:@;
-	@<|FPSTensor| constructors@>;
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	UTensor& unfold() const;
-
-	int getOffset(const IntSequence& v) const;
-	void addTo(FGSTensor& out) const;
-};
-
-@ As for |UPSTensor|, we provide four constructors allowing for
-combinations of permuting equivalence classes, and optimization of
-|KronProdAllOptim|. These constructors multiply with dense general
-symmetry tensor (coming from the dense container, or as a dense slice
-of the full symmetry sparse tensor). In addition to these 4
-constructors, we have one constructor multiplying with general
-symmetry sparse tensor (coming as a sparse slice of the full symmetry
-sparse tensor).
-
-@<|FPSTensor| constructors@>=
-	FPSTensor(const TensorDimens& td, const Equivalence& e,
-			  const ConstTwoDMatrix& a, const KronProdAll& kp)
-		: FTensor(along_col, PerTensorDimens(td, e).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e)
-		{@+ kp.mult(a, *this);@+}
-	FPSTensor(const TensorDimens& td, const Equivalence& e,
-			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
-		: FTensor(along_col, PerTensorDimens(td, Permutation(e, kp.getPer())).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, kp.getPer())
-		{@+ kp.mult(a, *this);@+}
-	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-			  const ConstTwoDMatrix& a, const KronProdAll& kp)
-		: FTensor(along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, p)
-		{@+ kp.mult(a, *this);@+}
-	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-			  const ConstTwoDMatrix& a, const KronProdAllOptim& kp)
-		: FTensor(along_col, PerTensorDimens(td, Permutation(e, Permutation(p, kp.getPer()))).getNVX(),
-				  a.nrows(), kp.ncols(), td.dimen()), tdims(td, e, Permutation(p, kp.getPer()))
-		{@+ kp.mult(a, *this);@+}
-
-	FPSTensor(const TensorDimens& td, const Equivalence& e, const Permutation& p,
-			  const GSSparseTensor& t, const KronProdAll& kp);
-
-	FPSTensor(const FPSTensor& ft)
-		: FTensor(ft), tdims(ft.tdims)@+ {}
-
-@ End of {\tt ps\_tensor.h} file.
--- a/dynare++/tl/cc/pyramid_prod.cc
+++ b/dynare++/tl/cc/pyramid_prod.cc
@ -0,0 +1,78 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "pyramid_prod.hh"
+#include "permutation.hh"
+#include "tl_exception.hh"
+
+/* Here we construct the |USubTensor| object. We allocate space via the
+   parent |URTensor|. Number of columns is a length of the list of
+   indices |lst|, number of variables and dimensions are of the tensor
+   $h$, this is given by |hdims|.
+
+   We go through all equivalences with number of classes equal to
+   dimension of $B$. For each equivalence we make a permutation
+   |per|. Then we fetch all the necessary tensors $g$ with symmetries
+   implied by symmetry of $B$ and the equivalence. Then we go through the
+   list of indices, permute them by the permutation and add the Kronecker
+   product of the selected columns. This is done by |addKronColumn|. */
+
+USubTensor::USubTensor(const TensorDimens &bdims,
+                       const TensorDimens &hdims,
+                       const FGSContainer &cont,
+                       const vector<IntSequence> &lst)
+  : URTensor(lst.size(), hdims.getNVX()[0], hdims.dimen())
+{
+  TL_RAISE_IF(!hdims.getNVX().isConstant(),
+              "Tensor has not full symmetry in USubTensor()");
+  const EquivalenceSet &eset = cont.getEqBundle().get(bdims.dimen());
+  zeros();
+  for (EquivalenceSet::const_iterator it = eset.begin();
+       it != eset.end(); ++it)
+    {
+      if ((*it).numClasses() == hdims.dimen())
+        {
+          Permutation per(*it);
+          vector<const FGSTensor *> ts
+            = cont.fetchTensors(bdims.getSym(), *it);
+          for (int i = 0; i < (int) lst.size(); i++)
+            {
+              IntSequence perindex(lst[i].size());
+              per.apply(lst[i], perindex);
+              addKronColumn(i, ts, perindex);
+            }
+        }
+    }
+}
+
+/* This makes a Kronecker product of appropriate columns from tensors
+   in |fs| and adds such data to |i|-th column of this matrix. The
+   appropriate columns are defined by |pindex| sequence. A column of a
+   tensor has index created from a corresponding part of |pindex|. The
+   sizes of these parts are given by dimensions of the tensors in |ts|.
+
+   Here we break the given index |pindex| according to the dimensions of
+   the tensors in |ts|, and for each subsequence of the |pindex| we find
+   an index of the folded tensor, which involves calling |getOffset| for
+   folded tensor, which might be costly. We gather all columns to a
+   vector |tmpcols| which are Kronecker multiplied in constructor of
+   |URSingleTensor|. Finally we add data of |URSingleTensor| to the
+   |i|-th column. */
+
+void
+USubTensor::addKronColumn(int i, const vector<const FGSTensor *> &ts,
+                          const IntSequence &pindex)
+{
+  vector<ConstVector> tmpcols;
+  int lastdim = 0;
+  for (unsigned int j = 0; j < ts.size(); j++)
+    {
+      IntSequence ind(pindex, lastdim, lastdim+ts[j]->dimen());
+      lastdim += ts[j]->dimen();
+      index in(ts[j], ind);
+      tmpcols.push_back(ConstVector(*(ts[j]), *in));
+    }
+
+  URSingleTensor kronmult(tmpcols);
+  Vector coli(*this, i);
+  coli.add(1.0, kronmult.getData());
+}
--- a/dynare++/tl/cc/pyramid_prod.cweb
+++ b/dynare++/tl/cc/pyramid_prod.cweb
@ -1,86 +0,0 @@
-@q $Id: pyramid_prod.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt pyramid\_prod.cpp} file.
-@c
-
-#include "pyramid_prod.h"
-#include "permutation.h"
-#include "tl_exception.h"
-
-@<|USubTensor| constructor code@>;
-@<|USubTensor::addKronColumn| code@>;
-
-
-@ Here we construct the |USubTensor| object. We allocate space via the
-parent |URTensor|. Number of columns is a length of the list of
-indices |lst|, number of variables and dimensions are of the tensor
-$h$, this is given by |hdims|.
-
-We go through all equivalences with number of classes equal to
-dimension of $B$. For each equivalence we make a permutation
-|per|. Then we fetch all the necessary tensors $g$ with symmetries
-implied by symmetry of $B$ and the equivalence. Then we go through the
-list of indices, permute them by the permutation and add the Kronecker
-product of the selected columns. This is done by |addKronColumn|.
-
-@<|USubTensor| constructor code@>=
-USubTensor::USubTensor(const TensorDimens& bdims,
-					   const TensorDimens& hdims,
-					   const FGSContainer& cont,
-					   const vector<IntSequence>& lst)
-	: URTensor(lst.size(), hdims.getNVX()[0], hdims.dimen())
-{
-	TL_RAISE_IF(! hdims.getNVX().isConstant(),
-				"Tensor has not full symmetry in USubTensor()");
-	const EquivalenceSet& eset = cont.getEqBundle().get(bdims.dimen());
-	zeros();
-	for (EquivalenceSet::const_iterator it = eset.begin();
-		 it != eset.end(); ++it) {
-		if ((*it).numClasses() == hdims.dimen()) {
-			Permutation per(*it);
-			vector<const FGSTensor*> ts =
-				cont.fetchTensors(bdims.getSym(), *it);
-			for (int i = 0; i < (int)lst.size(); i++) {
-				IntSequence perindex(lst[i].size());
-				per.apply(lst[i], perindex);
-				addKronColumn(i, ts, perindex); 
-			}
-		}
-	}
-}
-
-@ This makes a Kronecker product of appropriate columns from tensors
-in |fs| and adds such data to |i|-th column of this matrix. The
-appropriate columns are defined by |pindex| sequence. A column of a
-tensor has index created from a corresponding part of |pindex|. The
-sizes of these parts are given by dimensions of the tensors in |ts|.
-
-Here we break the given index |pindex| according to the dimensions of
-the tensors in |ts|, and for each subsequence of the |pindex| we find
-an index of the folded tensor, which involves calling |getOffset| for
-folded tensor, which might be costly. We gather all columns to a
-vector |tmpcols| which are Kronecker multiplied in constructor of
-|URSingleTensor|. Finally we add data of |URSingleTensor| to the
-|i|-th column.
-
-@<|USubTensor::addKronColumn| code@>=
-void USubTensor::addKronColumn(int i, const vector<const FGSTensor*>& ts,
-							   const IntSequence& pindex)
-{
-	vector<ConstVector> tmpcols;
-	int lastdim = 0;
-	for (unsigned int j = 0; j < ts.size(); j++) {
-		IntSequence ind(pindex, lastdim, lastdim+ts[j]->dimen());
-		lastdim += ts[j]->dimen();
-		index in(ts[j], ind);
-		tmpcols.push_back(ConstVector(*(ts[j]), *in));
-	}
-
-	URSingleTensor kronmult(tmpcols);
-	Vector coli(*this, i);
-	coli.add(1.0, kronmult.getData());
-}
-
-
-@ End of {\tt pyramid\_prod.cpp} file.
--- a/dynare++/tl/cc/pyramid_prod.hh
+++ b/dynare++/tl/cc/pyramid_prod.hh
@ -0,0 +1,74 @@
+// Copyright 2004, Ondra Kamenik
+
+// Multiplying tensor columns.
+
+/* In here, we implement the Faa Di Bruno for folded
+   tensors. Recall, that one step of the Faa Di Bruno is a formula:
+   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
+   [h_{y^l}]_{\gamma_1\ldots\gamma_l}
+   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
+   $$
+
+   In contrast to unfolded implementation of |UGSContainer::multAndAdd|
+   with help of |KronProdAll| and |UPSTensor|, we take a completely
+   different strategy. We cannot afford full instantiation of
+   $$\sum_{c\in M_{l,k}}
+   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
+   and therefore we do it per partes. We select some number of columns,
+   for instance 10, calculate 10 continuous iterators of tensor $B$. Then we
+   form unfolded tensor
+   $$[G]_S^{\gamma_1\ldots\gamma_l}=\left[\sum_{c\in M_{l,k}}
+   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
+   \right]_S$$
+   where $S$ is the selected set of 10 indices. This is done as Kronecker
+   product of vectors corresponding to selected columns. Note that, in
+   general, there is no symmetry in $G$, its type is special class for
+   this purpose.
+
+   If $g$ is folded, then we have to form folded version of $G$. There is
+   no symmetry in $G$ data, so we sum all unfolded indices corresponding
+   to folded index together. This is perfectly OK, since we multiply
+   these groups of (equivalent) items with the same number in fully
+   symmetric $g$.
+
+   After this, we perform ordinary matrix multiplication to obtain a
+   selected set of columns of $B$.
+
+   In here, we define a class for forming and representing
+   $[G]_S^{\gamma_1\ldots\gamma_l}$. Basically, this tensor is
+   row-oriented (multidimensional index is along rows), and it is fully
+   symmetric. So we inherit from |URTensor|. If we need its folded
+   version, we simply use a suitable conversion. The new abstraction will
+   have only a new constructor allowing a construction from the given set
+   of indices $S$, and given set of tensors $g$. The rest of the process
+   is implemented in |@<|FGSContainer::multAndAdd| unfolded code@>| or
+   |@<|FGSContainer::multAndAdd| folded code@>|. */
+
+#ifndef PYRAMID_PROD_H
+#define PYRAMID_PROD_H
+
+#include "int_sequence.hh"
+#include "rfs_tensor.hh"
+#include "gs_tensor.hh"
+#include "t_container.hh"
+
+#include <vector>
+
+using namespace std;
+
+/* Here we define the new tensor for representing
+   $[G]_S^{\gamma_1\ldots\gamma_l}$. It allows a construction from
+   container of folded general symmetry tensors |cont|, and set of
+   indices |ts|. Also we have to supply dimensions of resulting tensor
+   $B$, and dimensions of tensor $h$. */
+
+class USubTensor : public URTensor
+{
+public:
+  USubTensor(const TensorDimens &bdims, const TensorDimens &hdims,
+             const FGSContainer &cont, const vector<IntSequence> &lst);
+  void addKronColumn(int i, const vector<const FGSTensor *> &ts,
+                     const IntSequence &pindex);
+};
+
+#endif
--- a/dynare++/tl/cc/pyramid_prod.hweb
+++ b/dynare++/tl/cc/pyramid_prod.hweb
@ -1,80 +0,0 @@
-@q $Id: pyramid_prod.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Multiplying tensor columns. Start of {\tt pyramid\_prod.h} file.
-
-In here, we implement the Faa Di Bruno for folded
-tensors. Recall, that one step of the Faa Di Bruno is a formula:
-$$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
-[h_{y^l}]_{\gamma_1\ldots\gamma_l}
-\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
-$$
-
-In contrast to unfolded implementation of |UGSContainer::multAndAdd|
-with help of |KronProdAll| and |UPSTensor|, we take a completely
-different strategy. We cannot afford full instantiation of
-$$\sum_{c\in M_{l,k}}
-\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
-and therefore we do it per partes. We select some number of columns,
-for instance 10, calculate 10 continuous iterators of tensor $B$. Then we
-form unfolded tensor
-$$[G]_S^{\gamma_1\ldots\gamma_l}=\left[\sum_{c\in M_{l,k}}
-\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
-\right]_S$$
-where $S$ is the selected set of 10 indices. This is done as Kronecker
-product of vectors corresponding to selected columns. Note that, in
-general, there is no symmetry in $G$, its type is special class for
-this purpose.
-
-If $g$ is folded, then we have to form folded version of $G$. There is
-no symmetry in $G$ data, so we sum all unfolded indices corresponding
-to folded index together. This is perfectly OK, since we multiply
-these groups of (equivalent) items with the same number in fully
-symmetric $g$.
-
-After this, we perform ordinary matrix multiplication to obtain a
-selected set of columns of $B$.
-
-In here, we define a class for forming and representing
-$[G]_S^{\gamma_1\ldots\gamma_l}$. Basically, this tensor is
-row-oriented (multidimensional index is along rows), and it is fully
-symmetric. So we inherit from |URTensor|. If we need its folded
-version, we simply use a suitable conversion. The new abstraction will
-have only a new constructor allowing a construction from the given set
-of indices $S$, and given set of tensors $g$. The rest of the process
-is implemented in |@<|FGSContainer::multAndAdd| unfolded code@>| or
-|@<|FGSContainer::multAndAdd| folded code@>|.
- 
-@c
-#ifndef PYRAMID_PROD_H
-#define PYRAMID_PROD_H
-
-#include "int_sequence.h"
-#include "rfs_tensor.h"
-#include "gs_tensor.h"
-#include "t_container.h"
-
-#include <vector>
-
-using namespace std;
-
-@<|USubTensor| class declaration@>;
-
-#endif
-
-@ Here we define the new tensor for representing
-$[G]_S^{\gamma_1\ldots\gamma_l}$. It allows a construction from
-container of folded general symmetry tensors |cont|, and set of
-indices |ts|. Also we have to supply dimensions of resulting tensor
-$B$, and dimensions of tensor $h$.
-
-@<|USubTensor| class declaration@>=
-class USubTensor : public URTensor {
-public:@;
-	USubTensor(const TensorDimens& bdims, const TensorDimens& hdims,
-			   const FGSContainer& cont, const vector<IntSequence>& lst);
-	void addKronColumn(int i, const vector<const FGSTensor*>& ts,
-					   const IntSequence& pindex);
-};
-
-@ End of {\tt pyramid\_prod.h} file.
--- a/dynare++/tl/cc/pyramid_prod2.cc
+++ b/dynare++/tl/cc/pyramid_prod2.cc
@ -0,0 +1,116 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "pyramid_prod2.hh"
+#include "rfs_tensor.hh"
+
+/* Here we only call |sp.createPackedColumns(c, cols, unit_flag)| which
+   fills |cols| and |unit_flag| for the given column |c|. Then we set
+   |end_seq| according to |unit_flag| and columns lengths. */
+
+IrregTensorHeader::IrregTensorHeader(const StackProduct<FGSTensor> &sp,
+                                     const IntSequence &c)
+  : nv(sp.getAllSize()),
+    unit_flag(sp.dimen()),
+    cols(new Vector *[sp.dimen()]),
+    end_seq(sp.dimen())
+{
+  sp.createPackedColumns(c, cols, unit_flag);
+  for (int i = 0; i < sp.dimen(); i++)
+    {
+      end_seq[i] = cols[i]->length();
+      if (unit_flag[i] != -1)
+        end_seq[i] = unit_flag[i]+1;
+    }
+}
+
+/* Here we have to increment the given integer sequence. We do it by
+   the following code, whose pattern is valid for all tensor. The only
+   difference is how we increment item of coordinates. */
+
+void
+IrregTensorHeader::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong size of coordinates in IrregTensorHeader::increment");
+
+  if (v.size() == 0)
+    return;
+  int i = v.size()-1;
+
+  // increment |i|-th item in coordinate |v|
+  /* Here we increment item of coordinates. Whenever we reached end of
+     column coming from matrices, and |unit_flag| is not $-1$, we have to
+     jump to that |unit_flag|. */
+  v[i]++;
+  if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
+    v[i] = unit_flag[i];
+
+  while (i > 0 && v[i] == end_seq[i])
+    {
+      v[i] = 0;
+      i--;
+      // increment |i|-th item in coordinate |v|
+      /* Same code as above */
+      v[i]++;
+      if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
+        v[i] = unit_flag[i];
+    }
+}
+
+IrregTensorHeader::~IrregTensorHeader()
+{
+  for (int i = 0; i < dimen(); i++)
+    delete cols[i];
+  delete [] cols;
+}
+
+/* It is a product of all column lengths. */
+
+int
+IrregTensorHeader::calcMaxOffset() const
+{
+  int res = 1;
+  for (int i = 0; i < dimen(); i++)
+    res *= cols[i]->length();
+  return res;
+}
+
+/* Everything is done in |IrregTensorHeader|, only we have to Kronecker
+   multiply all columns of the header. */
+
+IrregTensor::IrregTensor(const IrregTensorHeader &h)
+  : Tensor(along_row, IntSequence(h.dimen(), 0), h.end_seq,
+           h.calcMaxOffset(), 1, h.dimen()),
+    header(h)
+{
+  if (header.dimen() == 1)
+    {
+      getData() = *(header.cols[0]);
+      return;
+    }
+
+  Vector *last = new Vector(*(header.cols[header.dimen()-1]));
+  for (int i = header.dimen()-2; i > 0; i--)
+    {
+      Vector *newlast = new Vector(last->length()*header.cols[i]->length());
+      KronProd::kronMult(ConstVector(*(header.cols[i])),
+                         ConstVector(*last), *newlast);
+      delete last;
+      last = newlast;
+    }
+  KronProd::kronMult(ConstVector(*(header.cols[0])),
+                     ConstVector(*last), getData());
+  delete last;
+}
+
+void
+IrregTensor::addTo(FRSingleTensor &out) const
+{
+  for (index it = begin(); it != end(); ++it)
+    {
+      IntSequence tmp(it.getCoor());
+      tmp.sort();
+      Tensor::index ind(&out, tmp);
+      out.get(*ind, 0) += get(*it, 0);
+    }
+}
--- a/dynare++/tl/cc/pyramid_prod2.cweb
+++ b/dynare++/tl/cc/pyramid_prod2.cweb
@ -1,129 +0,0 @@
-@q $Id: pyramid_prod2.cweb 332 2005-07-15 13:41:48Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt pyramid\_prod2.cpp} file.
-
-@c
-#include "pyramid_prod2.h"
-#include "rfs_tensor.h"
-
-@<|IrregTensorHeader| constructor code@>;
-@<|IrregTensorHeader::increment| code@>;
-@<|IrregTensorHeader| destructor code@>;
-@<|IrregTensorHeader::calcMaxOffset| code@>;
-@<|IrregTensor| constructor code@>;
-@<|IrregTensor::addTo| code@>;
-
-@ Here we only call |sp.createPackedColumns(c, cols, unit_flag)| which
-fills |cols| and |unit_flag| for the given column |c|. Then we set
-|end_seq| according to |unit_flag| and columns lengths.
-
-@<|IrregTensorHeader| constructor code@>=
-IrregTensorHeader::IrregTensorHeader(const StackProduct<FGSTensor>& sp,
-									 const IntSequence& c)
-	: nv(sp.getAllSize()),
-	  unit_flag(sp.dimen()),
-	  cols(new Vector*[sp.dimen()]),
-	  end_seq(sp.dimen())
-{
-	sp.createPackedColumns(c, cols, unit_flag);
-	for (int i = 0; i < sp.dimen(); i++) {
-		end_seq[i] = cols[i]->length();
-		if (unit_flag[i] != -1)
-			end_seq[i] = unit_flag[i]+1;
-	}
-}
-
-
-@ Here we have to increment the given integer sequence. We do it by
-the following code, whose pattern is valid for all tensor. The only
-difference is how we increment item of coordinates.
-
-@<|IrregTensorHeader::increment| code@>=
-void IrregTensorHeader::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong size of coordinates in IrregTensorHeader::increment");
-
-	if (v.size() == 0)
-		return;
-	int i = v.size()-1;
-	@<increment |i|-th item in coordinate |v|@>;
-	while (i > 0 && v[i] == end_seq[i]) {
-		v[i] = 0;
-		i--;
-		@<increment |i|-th item in coordinate |v|@>;
-	}
-}
-
-@ Here we increment item of coordinates. Whenever we reached end of
-column coming from matrices, and |unit_flag| is not $-1$, we have to
-jump to that |unit_flag|.
-
-@<increment |i|-th item in coordinate |v|@>=
-	v[i]++;
-	if (unit_flag[i] != -1 && v[i] == cols[i]->length()-1)
-		v[i] = unit_flag[i];
-
-
-@ 
-@<|IrregTensorHeader| destructor code@>=
-IrregTensorHeader::~IrregTensorHeader()
-{
-  for (int i = 0; i < dimen(); i++)
-	  delete cols[i];
-  delete [] cols;
-}
-
-@ It is a product of all column lengths.
-@<|IrregTensorHeader::calcMaxOffset| code@>=
-int IrregTensorHeader::calcMaxOffset() const
-{
-	int res = 1;
-	for (int i = 0; i < dimen(); i++)
-		res *= cols[i]->length();
-	return res;
-}
-
-
-@ Everything is done in |IrregTensorHeader|, only we have to Kronecker
-multiply all columns of the header.
-
-@<|IrregTensor| constructor code@>=
-IrregTensor::IrregTensor(const IrregTensorHeader& h)
-	: Tensor(along_row, IntSequence(h.dimen(), 0), h.end_seq,
-			 h.calcMaxOffset(), 1, h.dimen()),
-	  header(h)
-{
-	if (header.dimen() == 1) {
-		getData() = *(header.cols[0]);
-		return;
-	}
-
-	Vector* last = new Vector(*(header.cols[header.dimen()-1]));
-	for (int i = header.dimen()-2; i > 0; i--) {
-		Vector* newlast = new Vector(last->length()*header.cols[i]->length());
-		KronProd::kronMult(ConstVector(*(header.cols[i])),
-						   ConstVector(*last), *newlast);
-		delete last;
-		last = newlast;
-	}
-	KronProd::kronMult(ConstVector(*(header.cols[0])),
-					   ConstVector(*last), getData());
-	delete last;
-}
-
-@ Clear.
-@<|IrregTensor::addTo| code@>=
-void IrregTensor::addTo(FRSingleTensor& out) const
-{
-	for (index it = begin(); it != end(); ++it) {
-		IntSequence tmp(it.getCoor());
-		tmp.sort();
-		Tensor::index ind(&out, tmp);
-		out.get(*ind, 0) += get(*it, 0);
-	}
-}
-
-
-@ End of {\tt pyramid\_prod2.cpp} file.
--- a/dynare++/tl/cc/pyramid_prod2.hh
+++ b/dynare++/tl/cc/pyramid_prod2.hh
@ -0,0 +1,155 @@
+// Copyright 2004, Ondra Kamenik
+
+// Multiplying stacked tensor columns.
+
+/* We need to calculate the following tensor product:
+   $$\left[f_{s^j}\right]_{\alpha_1\ldots\alpha_j}=
+   \sum_{l=1}^j\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
+   \sum_{c\in M_{l,j}}\prod_{m=1}^l\left[z_{c_m}\right]^{\beta_m}_{c_m(\alpha)}
+   $$
+   where $s=[y,u,u',\sigma]$, and $z$ is a composition of four variables,
+   say $[v,w,y,u]$. Note that $z$ ends with $y$ and $u$, and the only
+   non-zero derivative of the trailing part of $z$ involving $y$ or $u$
+   is the first derivative and is the unit matrix $y_y=[1]$ or
+   $u_u=[1]$. Also, we suppose that the dependence of $v$, and $w$ on $s$
+   is such that whenever derivative of $w$ is nonzero, then also of
+   $v$. This means that there for any derivative and any index there is a
+   continuous part of derivatives of $v$ and optionally of $w$ followed by
+   column of zeros containing at most one $1$.
+
+   This structure can be modelled and exploited with some costs at
+   programming. For example, let us consider the following product:
+   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
+   \ldots
+   \left[f_{z^3}\right]_{\gamma_1\gamma_2\gamma_3}
+   \left[z_{yu}\right]^{\gamma_1}_{\alpha_1\beta_1}
+   \left[z_{y}\right]^{\gamma_2}_{\alpha_2}
+   \left[z_{uu}\right]^{\gamma_3}_{\beta_2\beta_3}
+   \ldots$$
+   The term corresponds to equivalence $\{\{0,2\},\{1\},\{3,4\}\}$. For
+   the fixed index $\alpha_1\alpha_2\beta_1\beta_2\beta_3$ we have to
+   make a Kronecker product of the columns
+   $$
+   \left[z_{yu}\right]_{\alpha_1\beta_1}\otimes
+   \left[z_{y}\right]_{\alpha_2}\otimes
+   \left[z_{uu}\right]_{\beta_2\beta_3}
+   $$
+   which can be written as
+   $$
+   \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
+   \left[w_{yu}\right]_{\alpha_1\beta_1}\cr 0\cr 0}\right]\otimes
+   \left[\matrix{\left[v_y\right]_{\alpha_2\vphantom{(}}\cr
+   \left[w_y\right]_{\alpha_2}\cr 1_{\alpha_2}\cr 0}\right]\otimes
+   \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3\vphantom{(}}\cr
+   \left[w_{uu}\right]_{\beta_2\beta_3}\cr 0\cr 0}\right]
+   $$
+   where $1_{\alpha_2}$ is a column of zeros having the only $1$ at
+   $\alpha_2$ index.
+
+   This file develops the abstraction for this Kronecker product column
+   without multiplication of the zeros at the top. Basically, it will be
+   a column which is a Kronecker product of the columns without the
+   zeros:
+   $$
+   \left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
+   \left[w_{yu}\right]_{\alpha_1\beta_1}}\right]\otimes
+   \left[\matrix{\left[v_y\right]_{\alpha_2}\cr
+   \left[w_y\right]_{\alpha_2}\cr 1}\right]\otimes
+   \left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3}\cr
+   \left[w_{uu}\right]_{\beta_2\beta_3}}\right]
+   $$
+   The class will have a tensor infrastructure introducing |index| which
+   iterates over all items in the column with $\gamma_1\gamma_2\gamma_3$
+   as coordinates in $\left[f_{z^3}\right]$. The data of such a tensor is
+   not suitable for any matrix operation and will have to be accessed
+   only through the |index|. Note that this does not matter, since
+   $\left[f_{z^l}\right]$ are sparse. */
+
+#ifndef PYRAMID_PROD2_H
+#define PYRAMID_PROD2_H
+
+#include "permutation.hh"
+#include "tensor.hh"
+#include "tl_exception.hh"
+#include "rfs_tensor.hh"
+#include "stack_container.hh"
+
+#include "Vector.h"
+
+/* First we declare a helper class for the tensor. Its purpose is to
+   gather the columns which are going to be Kronecker multiplied. The
+   input of this helper class is |StackProduct<FGSTensor>| and coordinate
+   |c| of the column.
+
+   It maintains |unit_flag| array which says for what columns we must
+   stack 1 below $v$ and $w$. In this case, the value of |unit_flag| is
+   an index of the $1$, otherwise the value of |unit_flag| is -1.
+
+   Also we have storage for the stacked columns |cols|. The object is
+   responsible for memory management associated to this storage. That is
+   why we do not allow any copy constructor, since we need to be sure
+   that no accidental copies take place. We declare the copy constructor
+   as private and not implement it. */
+
+class IrregTensor;
+class IrregTensorHeader
+{
+  friend class IrregTensor;
+  int nv;
+  IntSequence unit_flag;
+  Vector **const cols;
+  IntSequence end_seq;
+public:
+  IrregTensorHeader(const StackProduct<FGSTensor> &sp, const IntSequence &c);
+  ~IrregTensorHeader();
+  int
+  dimen() const
+  {
+    return unit_flag.size();
+  }
+  void increment(IntSequence &v) const;
+  int calcMaxOffset() const;
+private:
+  IrregTensorHeader(const IrregTensorHeader &);
+};
+
+/* Here we declare the irregular tensor. There is no special logic
+   here. We inherit from |Tensor| and we must implement three methods,
+   |increment|, |decrement| and |getOffset|. The last two are not
+   implemented now, since they are not needed, and they raise an
+   exception. The first just calls |increment| of the header. Also we
+   declare a method |addTo| which adds this unfolded irregular single
+   column tensor to folded (regular) single column tensor.
+
+   The header |IrregTensorHeader| lives with an object by a
+   reference. This is dangerous. However, we will use this class only in
+   a simple loop and both |IrregTensor| and |IrregTensorHeader| will be
+   destructed at the end of a block. Since the super class |Tensor| must
+   be initialized before any member, we could do either a save copy of
+   |IrregTensorHeader|, or relatively dangerous the reference member. For
+   the reason above we chose the latter. */
+
+class IrregTensor : public Tensor
+{
+  const IrregTensorHeader &header;
+public:
+  IrregTensor(const IrregTensorHeader &h);
+  void addTo(FRSingleTensor &out) const;
+  void
+  increment(IntSequence &v) const
+  {
+    header.increment(v);
+  }
+  void
+  decrement(IntSequence &v) const
+  {
+    TL_RAISE("Not implemented error in IrregTensor::decrement");
+  }
+  int
+  getOffset(const IntSequence &v) const
+  {
+    TL_RAISE("Not implemented error in IrregTensor::getOffset"); return 0;
+  }
+};
+
+#endif
--- a/dynare++/tl/cc/pyramid_prod2.hweb
+++ b/dynare++/tl/cc/pyramid_prod2.hweb
@ -1,151 +0,0 @@
-@q $Id: pyramid_prod2.hweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Multiplying stacked tensor columns. Start of {\tt pyramid\_prod2.h} file.
-
-We need to calculate the following tensor product:
-$$\left[f_{s^j}\right]_{\alpha_1\ldots\alpha_j}=
-\sum_{l=1}^j\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
-\sum_{c\in M_{l,j}}\prod_{m=1}^l\left[z_{c_m}\right]^{\beta_m}_{c_m(\alpha)}
-$$
-where $s=[y,u,u',\sigma]$, and $z$ is a composition of four variables,
-say $[v,w,y,u]$. Note that $z$ ends with $y$ and $u$, and the only
-non-zero derivative of the trailing part of $z$ involving $y$ or $u$
-is the first derivative and is the unit matrix $y_y=[1]$ or
-$u_u=[1]$. Also, we suppose that the dependence of $v$, and $w$ on $s$
-is such that whenever derivative of $w$ is nonzero, then also of
-$v$. This means that there for any derivative and any index there is a
-continuous part of derivatives of $v$ and optionally of $w$ followed by
-column of zeros containing at most one $1$.
-
-This structure can be modelled and exploited with some costs at
-programming. For example, let us consider the following product:
-$$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}=
-\ldots
-\left[f_{z^3}\right]_{\gamma_1\gamma_2\gamma_3}
-\left[z_{yu}\right]^{\gamma_1}_{\alpha_1\beta_1}
-\left[z_{y}\right]^{\gamma_2}_{\alpha_2}
-\left[z_{uu}\right]^{\gamma_3}_{\beta_2\beta_3}
-\ldots$$
-The term corresponds to equivalence $\{\{0,2\},\{1\},\{3,4\}\}$. For
-the fixed index $\alpha_1\alpha_2\beta_1\beta_2\beta_3$ we have to
-make a Kronecker product of the columns
-$$
-\left[z_{yu}\right]_{\alpha_1\beta_1}\otimes
-\left[z_{y}\right]_{\alpha_2}\otimes
-\left[z_{uu}\right]_{\beta_2\beta_3}
-$$
-which can be written as
-$$
-\left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
-              \left[w_{yu}\right]_{\alpha_1\beta_1}\cr 0\cr 0}\right]\otimes
-\left[\matrix{\left[v_y\right]_{\alpha_2\vphantom{(}}\cr
-              \left[w_y\right]_{\alpha_2}\cr 1_{\alpha_2}\cr 0}\right]\otimes
-\left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3\vphantom{(}}\cr
-              \left[w_{uu}\right]_{\beta_2\beta_3}\cr 0\cr 0}\right]
-$$
-where $1_{\alpha_2}$ is a column of zeros having the only $1$ at
-$\alpha_2$ index.
-
-This file develops the abstraction for this Kronecker product column
-without multiplication of the zeros at the top. Basically, it will be
-a column which is a Kronecker product of the columns without the
-zeros:
-$$
-\left[\matrix{\left[v_{yu}\right]_{\alpha_1\beta_1}\cr
-              \left[w_{yu}\right]_{\alpha_1\beta_1}}\right]\otimes
-\left[\matrix{\left[v_y\right]_{\alpha_2}\cr
-              \left[w_y\right]_{\alpha_2}\cr 1}\right]\otimes
-\left[\matrix{\left[v_{uu}\right]_{\beta_2\beta_3}\cr
-              \left[w_{uu}\right]_{\beta_2\beta_3}}\right]
-$$
-The class will have a tensor infrastructure introducing |index| which
-iterates over all items in the column with $\gamma_1\gamma_2\gamma_3$
-as coordinates in $\left[f_{z^3}\right]$. The data of such a tensor is
-not suitable for any matrix operation and will have to be accessed
-only through the |index|. Note that this does not matter, since
-$\left[f_{z^l}\right]$ are sparse.
-
-@c
-#ifndef PYRAMID_PROD2_H
-#define PYRAMID_PROD2_H
-
-#include "permutation.h"
-#include "tensor.h"
-#include "tl_exception.h"
-#include "rfs_tensor.h"
-#include "stack_container.h"
-
-#include "Vector.h"
-
-@<|IrregTensorHeader| class declaration@>;
-@<|IrregTensor| class declaration@>;
-
-#endif
-
-@ First we declare a helper class for the tensor. Its purpose is to
-gather the columns which are going to be Kronecker multiplied. The
-input of this helper class is |StackProduct<FGSTensor>| and coordinate
-|c| of the column.
-
-It maintains |unit_flag| array which says for what columns we must
-stack 1 below $v$ and $w$. In this case, the value of |unit_flag| is
-an index of the $1$, otherwise the value of |unit_flag| is -1.
-
-Also we have storage for the stacked columns |cols|. The object is
-responsible for memory management associated to this storage. That is
-why we do not allow any copy constructor, since we need to be sure
-that no accidental copies take place. We declare the copy constructor
-as private and not implement it.
- 
-@<|IrregTensorHeader| class declaration@>=
-class IrregTensor;
-class IrregTensorHeader {
-	friend class IrregTensor;
-	int nv;
-	IntSequence unit_flag;
-    Vector** const cols;
-	IntSequence end_seq;
-public:@;
-	IrregTensorHeader(const StackProduct<FGSTensor>& sp, const IntSequence& c);
-	~IrregTensorHeader();
-	int dimen() const
-		{@+ return unit_flag.size();@+}
-	void increment(IntSequence& v) const;
-	int calcMaxOffset() const;
-private:@;
-	IrregTensorHeader(const IrregTensorHeader&);
-};
-
-
-@ Here we declare the irregular tensor. There is no special logic
-here. We inherit from |Tensor| and we must implement three methods,
-|increment|, |decrement| and |getOffset|. The last two are not
-implemented now, since they are not needed, and they raise an
-exception. The first just calls |increment| of the header. Also we
-declare a method |addTo| which adds this unfolded irregular single
-column tensor to folded (regular) single column tensor.
-
-The header |IrregTensorHeader| lives with an object by a
-reference. This is dangerous. However, we will use this class only in
-a simple loop and both |IrregTensor| and |IrregTensorHeader| will be
-destructed at the end of a block. Since the super class |Tensor| must
-be initialized before any member, we could do either a save copy of
-|IrregTensorHeader|, or relatively dangerous the reference member. For
-the reason above we chose the latter.
-
-@<|IrregTensor| class declaration@>=
-class IrregTensor : public Tensor {
-	const IrregTensorHeader& header;
-public:@;
-	IrregTensor(const IrregTensorHeader& h);
-	void addTo(FRSingleTensor& out) const;
-	void increment(IntSequence& v) const
-		{@+ header.increment(v);@+}
-	void decrement(IntSequence& v) const
-		{@+ TL_RAISE("Not implemented error in IrregTensor::decrement");@+}
-	int getOffset(const IntSequence& v) const
-		{@+ TL_RAISE("Not implemented error in IrregTensor::getOffset");@+return 0;@+}
-};
-
-@ End of {\tt pyramid\_prod2.h} file.
--- a/dynare++/tl/cc/rfs_tensor.cc
+++ b/dynare++/tl/cc/rfs_tensor.cc
@ -0,0 +1,187 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "rfs_tensor.hh"
+#include "kron_prod.hh"
+#include "tl_exception.hh"
+
+// |FRTensor| conversion from unfolded
+/* The conversion from unfolded to folded sums up all data from
+   unfolded corresponding to one folded index. So we go through all the
+   rows in the unfolded tensor |ut|, make an index of the folded tensor
+   by sorting the coordinates, and add the row. */
+FRTensor::FRTensor(const URTensor &ut)
+  : FTensor(along_row, IntSequence(ut.dimen(), ut.nvar()),
+            FFSTensor::calcMaxOffset(ut.nvar(), ut.dimen()), ut.ncols(),
+            ut.dimen()),
+    nv(ut.nvar())
+{
+  zeros();
+  for (index in = ut.begin(); in != ut.end(); ++in)
+    {
+      IntSequence vtmp(in.getCoor());
+      vtmp.sort();
+      index tar(this, vtmp);
+      addRow(ut, *in, *tar);
+    }
+}
+
+/* Here just make a new instance and return the reference. */
+
+UTensor &
+FRTensor::unfold() const
+{
+  return *(new URTensor(*this));
+}
+
+/* Incrementing is easy. The same as for |FFSTensor|. */
+
+void
+FRTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in FRTensor::increment");
+
+  UTensor::increment(v, nv);
+  v.monotone();
+}
+
+/* Decrement calls static |FTensor::decrement|. */
+
+void
+FRTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in FRTensor::decrement");
+
+  FTensor::decrement(v, nv);
+}
+
+// |URTensor| conversion from folded
+/* Here we convert folded full symmetry tensor to unfolded. We copy all
+   columns of folded tensor to unfolded and leave other columns
+   (duplicates) zero. In this way, if the unfolded tensor is folded back,
+   we should get the same data. */
+URTensor::URTensor(const FRTensor &ft)
+  : UTensor(along_row, IntSequence(ft.dimen(), ft.nvar()),
+            UFSTensor::calcMaxOffset(ft.nvar(), ft.dimen()), ft.ncols(),
+            ft.dimen()),
+    nv(ft.nvar())
+{
+  zeros();
+  for (index src = ft.begin(); src != ft.end(); ++src)
+    {
+      index in(this, src.getCoor());
+      copyRow(ft, *src, *in);
+    }
+}
+
+/* Here we just return a reference to new instance of folded tensor. */
+
+FTensor &
+URTensor::fold() const
+{
+  return *(new FRTensor(*this));
+}
+
+/* Here we just call |UTensor| respective static methods. */
+
+void
+URTensor::increment(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in URTensor::increment");
+
+  UTensor::increment(v, nv);
+}
+
+void
+URTensor::decrement(IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input/output vector size in URTensor::decrement");
+
+  UTensor::decrement(v, nv);
+}
+
+int
+URTensor::getOffset(const IntSequence &v) const
+{
+  TL_RAISE_IF(v.size() != dimen(),
+              "Wrong input vector size in URTensor::getOffset");
+
+  return UTensor::getOffset(v, nv);
+}
+
+/* Here we construct $v_1\otimes v_2\otimes\ldots\otimes v_n$, where
+   $v_1,v_2,\ldots,v_n$ are stored in |vector<ConstVector>|. */
+
+URSingleTensor::URSingleTensor(const vector<ConstVector> &cols)
+  : URTensor(1, cols[0].length(), cols.size())
+{
+  if (dimen() == 1)
+    {
+      getData() = cols[0];
+      return;
+    }
+
+  Vector *last = new Vector(cols[cols.size()-1]);
+  for (int i = cols.size()-2; i > 0; i--)
+    {
+      Vector *newlast = new Vector(Tensor::power(nvar(), cols.size()-i));
+      KronProd::kronMult(cols[i], ConstVector(*last), *newlast);
+      delete last;
+      last = newlast;
+    }
+  KronProd::kronMult(cols[0], ConstVector(*last), getData());
+  delete last;
+}
+
+/* Here we construct $v\otimes\ldots\otimes v$, where the number of $v$
+   copies is |d|. */
+
+URSingleTensor::URSingleTensor(const ConstVector &v, int d)
+  : URTensor(1, v.length(), d)
+{
+  if (d == 1)
+    {
+      getData() = v;
+      return;
+    }
+
+  Vector *last = new Vector(v);
+  for (int i = d-2; i > 0; i--)
+    {
+      Vector *newlast = new Vector(last->length()*v.length());
+      KronProd::kronMult(v, ConstVector(*last), *newlast);
+      delete last;
+      last = newlast;
+    }
+  KronProd::kronMult(v, ConstVector(*last), getData());
+  delete last;
+}
+
+/* Here we construct |FRSingleTensor| from |URSingleTensor| and return
+   its reference. */
+
+FTensor &
+URSingleTensor::fold() const
+{
+  return *(new FRSingleTensor(*this));
+}
+
+// |FRSingleTensor| conversion from unfolded
+/* The conversion from unfolded |URSingleTensor| to folded
+   |FRSingleTensor| is completely the same as conversion from |URTensor|
+   to |FRTensor|, only we do not copy rows but elements. */
+FRSingleTensor::FRSingleTensor(const URSingleTensor &ut)
+  : FRTensor(1, ut.nvar(), ut.dimen())
+{
+  zeros();
+  for (index in = ut.begin(); in != ut.end(); ++in)
+    {
+      IntSequence vtmp(in.getCoor());
+      vtmp.sort();
+      index tar(this, vtmp);
+      get(*tar, 0) += ut.get(*in, 0);
+    }
+}
--- a/dynare++/tl/cc/rfs_tensor.cweb
+++ b/dynare++/tl/cc/rfs_tensor.cweb
@ -1,205 +0,0 @@
-@q $Id: rfs_tensor.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt rfs\_tensor.cpp} file.
-
-@c
-#include "rfs_tensor.h"
-#include "kron_prod.h"
-#include "tl_exception.h"
-
-@<|FRTensor| conversion from unfolded@>;
-@<|FRTensor::unfold| code@>;
-@<|FRTensor::increment| code@>;
-@<|FRTensor::decrement| code@>;
-@<|URTensor| conversion from folded@>;
-@<|URTensor::fold| code@>;
-@<|URTensor| increment and decrement@>;
-@<|URTensor::getOffset| code@>;
-@<|URSingleTensor| constructor 1 code@>;
-@<|URSingleTensor| constructor 2 code@>;
-@<|URSingleTensor::fold| code@>;
-@<|FRSingleTensor| conversion from unfolded@>;
-
-@ The conversion from unfolded to folded sums up all data from
-unfolded corresponding to one folded index. So we go through all the
-rows in the unfolded tensor |ut|, make an index of the folded tensor
-by sorting the coordinates, and add the row.
- 
-@<|FRTensor| conversion from unfolded@>=
-FRTensor::FRTensor(const URTensor& ut)
-	: FTensor(along_row, IntSequence(ut.dimen(), ut.nvar()),
-			  FFSTensor::calcMaxOffset(ut.nvar(), ut.dimen()), ut.ncols(),
-			  ut.dimen()),
-	  nv(ut.nvar())
-{
-	zeros();
-	for (index in = ut.begin(); in != ut.end(); ++in) {
-		IntSequence vtmp(in.getCoor());
-		vtmp.sort();
-		index tar(this, vtmp);
-		addRow(ut, *in, *tar);
-	}
-}
-
-@ Here just make a new instance and return the reference.
-@<|FRTensor::unfold| code@>=
-UTensor& FRTensor::unfold() const
-{
-	return *(new URTensor(*this));
-}
-
-@ Incrementing is easy. The same as for |FFSTensor|.
-
-@<|FRTensor::increment| code@>=
-void FRTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in FRTensor::increment");
-
-	UTensor::increment(v, nv);
-	v.monotone();
-}
-
-@ Decrement calls static |FTensor::decrement|.
-
-@<|FRTensor::decrement| code@>=
-void FRTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in FRTensor::decrement");
-
-	FTensor::decrement(v, nv);
-}
-
-
-@ Here we convert folded full symmetry tensor to unfolded. We copy all
-columns of folded tensor to unfolded and leave other columns
-(duplicates) zero. In this way, if the unfolded tensor is folded back,
-we should get the same data.
-
-@<|URTensor| conversion from folded@>=
-URTensor::URTensor(const FRTensor& ft)
-	: UTensor(along_row, IntSequence(ft.dimen(), ft.nvar()),
-			  UFSTensor::calcMaxOffset(ft.nvar(), ft.dimen()), ft.ncols(),
-			  ft.dimen()),
-	  nv(ft.nvar())
-{
-	zeros();
-	for (index src = ft.begin(); src != ft.end(); ++src) {
-		index in(this, src.getCoor());
-		copyRow(ft, *src, *in);
-	}
-}
-
-@ Here we just return a reference to new instance of folded tensor.
-@<|URTensor::fold| code@>=
-FTensor& URTensor::fold() const
-{
-	return *(new FRTensor(*this));
-}
-
-@ Here we just call |UTensor| respective static methods.
-@<|URTensor| increment and decrement@>=
-void URTensor::increment(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in URTensor::increment");
-
-	UTensor::increment(v, nv);
-}
-
-void URTensor::decrement(IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input/output vector size in URTensor::decrement");
-
-	UTensor::decrement(v, nv);
-}
-
-@ 
-@<|URTensor::getOffset| code@>=
-int URTensor::getOffset(const IntSequence& v) const
-{
-	TL_RAISE_IF(v.size() != dimen(),
-				"Wrong input vector size in URTensor::getOffset");
-
-	return UTensor::getOffset(v, nv);
-}
-
-@ Here we construct $v_1\otimes v_2\otimes\ldots\otimes v_n$, where
-$v_1,v_2,\ldots,v_n$ are stored in |vector<ConstVector>|.
-
-@<|URSingleTensor| constructor 1 code@>=
-URSingleTensor::URSingleTensor(const vector<ConstVector>& cols)
-	: URTensor(1, cols[0].length(), cols.size())
-{
-	if (dimen() == 1) {
-		getData() = cols[0];
-		return;
-	}
-
-	Vector* last = new Vector(cols[cols.size()-1]);
-	for (int i = cols.size()-2; i > 0; i--) {
-		Vector* newlast = new Vector(Tensor::power(nvar(), cols.size()-i));
-		KronProd::kronMult(cols[i], ConstVector(*last), *newlast);
-		delete last;
-		last = newlast;
-	}
-	KronProd::kronMult(cols[0], ConstVector(*last), getData());
-	delete last;
-}
-
-@ Here we construct $v\otimes\ldots\otimes v$, where the number of $v$
-copies is |d|.
-
-@<|URSingleTensor| constructor 2 code@>=
-URSingleTensor::URSingleTensor(const ConstVector& v, int d)
-	: URTensor(1, v.length(), d)
-{
-	if (d == 1) {
-		getData() = v;
-		return;
-	}
-
-	Vector* last = new Vector(v);
-	for (int i = d-2; i > 0; i--) {
-		Vector* newlast = new Vector(last->length()*v.length());
-		KronProd::kronMult(v, ConstVector(*last), *newlast);
-		delete last;
-		last = newlast;
-	}
-	KronProd::kronMult(v, ConstVector(*last), getData());
-	delete last;
-}
-
-@ Here we construct |FRSingleTensor| from |URSingleTensor| and return
-its reference.
-
-@<|URSingleTensor::fold| code@>=
-FTensor& URSingleTensor::fold() const
-{
-	return *(new FRSingleTensor(*this));
-}
-
-
-
-@ The conversion from unfolded |URSingleTensor| to folded
-|FRSingleTensor| is completely the same as conversion from |URTensor|
-to |FRTensor|, only we do not copy rows but elements.
- 
-@<|FRSingleTensor| conversion from unfolded@>=
-FRSingleTensor::FRSingleTensor(const URSingleTensor& ut)
-	: FRTensor(1, ut.nvar(), ut.dimen())
-{
-	zeros();
-	for (index in = ut.begin(); in != ut.end(); ++in) {
-		IntSequence vtmp(in.getCoor());
-		vtmp.sort();
-		index tar(this, vtmp);
-		get(*tar, 0) += ut.get(*in, 0);
-	}
-}
-
-
-@ End of {\tt rfs\_tensor.cpp} file.
--- a/dynare++/tl/cc/rfs_tensor.hh
+++ b/dynare++/tl/cc/rfs_tensor.hh
@ -0,0 +1,173 @@
+// Copyright 2004, Ondra Kamenik
+
+// Row-wise full symmetry tensor.
+
+/* Here we define classes for full symmetry tensors with the
+   multidimensional index identified with rows. The primary usage is for
+   storage of data coming from (or from a sum of)
+   $$\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
+   where $\alpha$ coming from a multidimensional index go through some
+   set $S$ and $c$ is some equivalence. So we model a tensor of the form:
+   $$\left[\prod_{m=1}^l
+   \left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
+   \right]_S^{\gamma_1\ldots\gamma_l}$$
+   Since all $\gamma_1,\ldots,\gamma_l$ correspond to the same variable,
+   the tensor is fully symmetric.  The set of indices $S$ cannot be very
+   large and sometimes it is only one element. This case is handled in a
+   special subclass.
+
+   We provide both folded and unfolded versions. Their logic is perfectly
+   the same as in |UFSTensor| and |FFSTensor| with two exceptions. One
+   has been already mentioned, the multidimensional index is along the
+   rows. The second are conversions between the two types. Since this
+   kind of tensor is used to multiply (from the right) a tensor whose
+   multidimensional index is identified with columns, we will need a
+   different way of a conversion. If the multiplication of two folded
+   tensors is to be equivalent with multiplication of two unfolded, the
+   folding of the right tensor must sum all equivalent elements since
+   they are multiplied with the same number from the folded
+   tensor. (Equivalent here means all elements of unfolded tensor
+   corresponding to one element in folded tensor.) For this reason, it is
+   necessary to calculate a column number from the given sequence, so we
+   implement |getOffset|. Process of unfolding is not used, so we
+   implemented it so that unfolding and then folding a tensor would yield
+   the same data. */
+
+#ifndef RFS_TENSOR_H
+#define RFS_TENSOR_H
+
+#include "tensor.hh"
+#include "fs_tensor.hh"
+#include "symmetry.hh"
+
+/* This is straightforward and very similar to |UFSTensor|. */
+
+class FRTensor;
+class URTensor : public UTensor
+{
+  int nv;
+public:
+  URTensor(int c, int nvar, int d)
+    : UTensor(along_row, IntSequence(d, nvar),
+              UFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)
+  {
+  }
+  URTensor(const URTensor &ut)
+    : UTensor(ut), nv(ut.nv)
+  {
+  }
+  URTensor(const FRTensor &ft);
+
+  virtual ~URTensor()
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  FTensor&fold() const;
+
+  int getOffset(const IntSequence &v) const;
+  int
+  nvar() const
+  {
+    return nv;
+  }
+  Symmetry
+  getSym() const
+  {
+    return Symmetry(dimen());
+  }
+};
+
+/* This is straightforward and very similar to |FFSTensor|. */
+
+class FRTensor : public FTensor
+{
+  int nv;
+public:
+  FRTensor(int c, int nvar, int d)
+    : FTensor(along_row, IntSequence(d, nvar),
+              FFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)
+  {
+  }
+  FRTensor(const FRTensor &ft)
+    : FTensor(ft), nv(ft.nv)
+  {
+  }
+  FRTensor(const URTensor &ut);
+
+  virtual ~FRTensor()
+  {
+  }
+
+  void increment(IntSequence &v) const;
+  void decrement(IntSequence &v) const;
+  UTensor&unfold() const;
+
+  int
+  nvar() const
+  {
+    return nv;
+  }
+  int
+  getOffset(const IntSequence &v) const
+  {
+    return FTensor::getOffset(v, nv);
+  }
+  Symmetry
+  getSym() const
+  {
+    return Symmetry(dimen());
+  }
+};
+
+/* The following class represents specialization of |URTensor| coming
+   from Kronecker multiplication of a few vectors. So the resulting
+   row-oriented tensor has one column. We provide two constructors,
+   one constructs the tensor from a few vectors stored as
+   |vector<ConstVector>|. The second makes the Kronecker power of one
+   given vector. */
+
+class URSingleTensor : public URTensor
+{
+public:
+  URSingleTensor(int nvar, int d)
+    : URTensor(1, nvar, d)
+  {
+  }
+  URSingleTensor(const vector<ConstVector> &cols);
+  URSingleTensor(const ConstVector &v, int d);
+  URSingleTensor(const URSingleTensor &ut)
+    : URTensor(ut)
+  {
+  }
+  virtual ~URSingleTensor()
+  {
+  }
+  FTensor&fold() const;
+};
+
+/* This class represents one column row-oriented tensor. The only way
+   how to construct it is from the |URSingleTensor| or from the
+   scratch. The folding algorithm is the same as folding of general
+   |URTensor|. Only its implementation is different, since we do not copy
+   rows, but only elements. */
+
+class FRSingleTensor : public FRTensor
+{
+public:
+  FRSingleTensor(int nvar, int d)
+    : FRTensor(1, nvar, d)
+  {
+  }
+  FRSingleTensor(const URSingleTensor &ut);
+  FRSingleTensor(const FRSingleTensor &ft)
+    : FRTensor(ft)
+  {
+  }
+  virtual ~FRSingleTensor()
+  {
+  }
+};
+
+#endif
--- a/dynare++/tl/cc/rfs_tensor.hweb
+++ b/dynare++/tl/cc/rfs_tensor.hweb
@ -1,148 +0,0 @@
-@q $Id: rfs_tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Row-wise full symmetry tensor. Start of {\tt rfs\_tensor.h} file.
-
-Here we define classes for full symmetry tensors with the
-multidimensional index identified with rows. The primary usage is for
-storage of data coming from (or from a sum of)
-$$\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}$$
-where $\alpha$ coming from a multidimensional index go through some
-set $S$ and $c$ is some equivalence. So we model a tensor of the form:
-$$\left[\prod_{m=1}^l
-\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
-\right]_S^{\gamma_1\ldots\gamma_l}$$
-Since all $\gamma_1,\ldots,\gamma_l$ correspond to the same variable,
-the tensor is fully symmetric.  The set of indices $S$ cannot be very
-large and sometimes it is only one element. This case is handled in a
-special subclass.
-
-We provide both folded and unfolded versions. Their logic is perfectly
-the same as in |UFSTensor| and |FFSTensor| with two exceptions. One
-has been already mentioned, the multidimensional index is along the
-rows. The second are conversions between the two types. Since this
-kind of tensor is used to multiply (from the right) a tensor whose
-multidimensional index is identified with columns, we will need a
-different way of a conversion. If the multiplication of two folded
-tensors is to be equivalent with multiplication of two unfolded, the
-folding of the right tensor must sum all equivalent elements since
-they are multiplied with the same number from the folded
-tensor. (Equivalent here means all elements of unfolded tensor
-corresponding to one element in folded tensor.) For this reason, it is
-necessary to calculate a column number from the given sequence, so we
-implement |getOffset|. Process of unfolding is not used, so we
-implemented it so that unfolding and then folding a tensor would yield
-the same data.
-
-@c
-#ifndef RFS_TENSOR_H
-#define RFS_TENSOR_H
-
-#include "tensor.h"
-#include "fs_tensor.h"
-#include "symmetry.h"
-
-@<|URTensor| class declaration@>;
-@<|FRTensor| class declaration@>;
-@<|URSingleTensor| class declaration@>;
-@<|FRSingleTensor| class declaration@>;
-
-#endif
-
-@ This is straightforward and very similar to |UFSTensor|.
-@<|URTensor| class declaration@>=
-class FRTensor;
-class URTensor : public UTensor {
-	int nv;
-public:@;
-	@<|URTensor| constructor declaration@>;
-	virtual ~URTensor()@+ {}
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	FTensor& fold() const;
-
-	int getOffset(const IntSequence& v) const;
-	int nvar() const
-		{@+ return nv;@+}
-	Symmetry getSym() const
-		{@+ return Symmetry(dimen());@+}
-};
-
-@ 
-@<|URTensor| constructor declaration@>=
-	URTensor(int c, int nvar, int d)
-		: UTensor(along_row, IntSequence(d, nvar),
-				  UFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)@+ {}
-	URTensor(const URTensor& ut)
-		: UTensor(ut), nv(ut.nv)@+ {}
-	URTensor(const FRTensor& ft);
-
-@ This is straightforward and very similar to |FFSTensor|.
-@<|FRTensor| class declaration@>=
-class FRTensor : public FTensor {
-	int nv;
-public:@;
-    @<|FRTensor| constructor declaration@>;
-	virtual ~FRTensor()@+ {}
-
-	void increment(IntSequence& v) const;
-	void decrement(IntSequence& v) const;
-	UTensor& unfold() const;
-
-	int nvar() const
-		{@+ return nv;@+}
-	int getOffset(const IntSequence& v) const
-		{@+ return FTensor::getOffset(v, nv);@+}
-	Symmetry getSym() const
-		{@+ return Symmetry(dimen());@+}
-};
-
-@ 
-@<|FRTensor| constructor declaration@>=
-	FRTensor(int c, int nvar, int d)
-		: FTensor(along_row, IntSequence(d, nvar),
-				  FFSTensor::calcMaxOffset(nvar, d), c, d), nv(nvar)@+ {}
-	FRTensor(const FRTensor& ft)
-		: FTensor(ft), nv(ft.nv)@+ {}
-	FRTensor(const URTensor& ut);
-
-@ The following class represents specialization of |URTensor| coming
-from Kronecker multiplication of a few vectors. So the resulting
-row-oriented tensor has one column. We provide two constructors,
-one constructs the tensor from a few vectors stored as
-|vector<ConstVector>|. The second makes the Kronecker power of one
-given vector.
-
-@<|URSingleTensor| class declaration@>=
-class URSingleTensor : public URTensor {
-public:@;
-	URSingleTensor(int nvar, int d)
-		: URTensor(1, nvar, d)@+ {}
-	URSingleTensor(const vector<ConstVector>& cols);
-	URSingleTensor(const ConstVector& v, int d);
-	URSingleTensor(const URSingleTensor& ut)
-		: URTensor(ut)@+ {}
-	virtual ~URSingleTensor()@+ {}
-	FTensor& fold() const;
-};
-
-@ This class represents one column row-oriented tensor. The only way
-how to construct it is from the |URSingleTensor| or from the
-scratch. The folding algorithm is the same as folding of general
-|URTensor|. Only its implementation is different, since we do not copy
-rows, but only elements.
-
-@<|FRSingleTensor| class declaration@>=
-class FRSingleTensor : public FRTensor {
-public:@;
-	FRSingleTensor(int nvar, int d)
-		: FRTensor(1, nvar, d)@+ {}
-	FRSingleTensor(const URSingleTensor& ut);
-	FRSingleTensor(const FRSingleTensor& ft)
-		: FRTensor(ft)@+ {}
-	virtual ~FRSingleTensor()@+ {}
-};
-
-
-@ End of {\tt rfs\_tensor.h} file.
--- a/dynare++/tl/cc/sparse_tensor.cc
+++ b/dynare++/tl/cc/sparse_tensor.cc
@ -0,0 +1,248 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "sparse_tensor.hh"
+#include "fs_tensor.hh"
+#include "tl_exception.hh"
+
+#include <cmath>
+
+/* This is straightforward. Before we insert anything, we do a few
+   checks. Then we reset |first_nz_row| and |last_nz_row| if necessary. */
+
+void
+SparseTensor::insert(const IntSequence &key, int r, double c)
+{
+  TL_RAISE_IF(r < 0 || r >= nr,
+              "Row number out of dimension of tensor in SparseTensor::insert");
+  TL_RAISE_IF(key.size() != dimen(),
+              "Wrong length of key in SparseTensor::insert");
+  TL_RAISE_IF(!std::isfinite(c),
+              "Insertion of non-finite value in SparseTensor::insert");
+
+  iterator first_pos = m.lower_bound(key);
+
+  // check that pair |key| and |r| is unique
+  iterator last_pos = m.upper_bound(key);
+  for (iterator it = first_pos; it != last_pos; ++it)
+    if ((*it).second.first == r)
+      {
+        TL_RAISE("Duplicate <key, r> insertion in SparseTensor::insert");
+        return;
+      }
+
+  m.insert(first_pos, Map::value_type(key, Item(r, c)));
+  if (first_nz_row > r)
+    first_nz_row = r;
+  if (last_nz_row < r)
+    last_nz_row = r;
+}
+
+/* This returns true if all items are finite (not Nan nor Inf). */
+
+bool
+SparseTensor::isFinite() const
+{
+  bool res = true;
+  const_iterator run = m.begin();
+  while (res && run != m.end())
+    {
+      if (!std::isfinite((*run).second.second))
+        res = false;
+      ++run;
+    }
+  return res;
+}
+
+/* This returns a ratio of a number of non-zero columns in folded
+   tensor to the total number of columns. */
+
+double
+SparseTensor::getFoldIndexFillFactor() const
+{
+  int cnt = 0;
+  const_iterator start_col = m.begin();
+  while (start_col != m.end())
+    {
+      cnt++;
+      const IntSequence &key = (*start_col).first;
+      start_col = m.upper_bound(key);
+    }
+
+  return ((double) cnt)/ncols();
+}
+
+/* This returns a ratio of a number of non-zero columns in unfolded
+   tensor to the total number of columns. */
+
+double
+SparseTensor::getUnfoldIndexFillFactor() const
+{
+  int cnt = 0;
+  const_iterator start_col = m.begin();
+  while (start_col != m.end())
+    {
+      const IntSequence &key = (*start_col).first;
+      Symmetry s(key);
+      cnt += Tensor::noverseq(s);
+      start_col = m.upper_bound(key);
+    }
+
+  return ((double) cnt)/ncols();
+}
+
+/* This prints the fill factor and all items. */
+
+void
+SparseTensor::print() const
+{
+  printf("Fill: %3.2f %%\n", 100*getFillFactor());
+  const_iterator start_col = m.begin();
+  while (start_col != m.end())
+    {
+      const IntSequence &key = (*start_col).first;
+      printf("Column: "); key.print();
+      const_iterator end_col = m.upper_bound(key);
+      int cnt = 1;
+      for (const_iterator run = start_col; run != end_col; ++run, cnt++)
+        {
+          if ((cnt/7)*7 == cnt)
+            printf("\n");
+          printf("%d(%6.2g)  ", (*run).second.first, (*run).second.second);
+        }
+      printf("\n");
+      start_col = end_col;
+    }
+}
+
+FSSparseTensor::FSSparseTensor(int d, int nvar, int r)
+  : SparseTensor(d, r, FFSTensor::calcMaxOffset(nvar, d)),
+    nv(nvar), sym(d)
+{
+}
+
+FSSparseTensor::FSSparseTensor(const FSSparseTensor &t)
+  : SparseTensor(t),
+    nv(t.nvar()), sym(t.sym)
+{
+}
+
+void
+FSSparseTensor::insert(const IntSequence &key, int r, double c)
+{
+  TL_RAISE_IF(!key.isSorted(),
+              "Key is not sorted in FSSparseTensor::insert");
+  TL_RAISE_IF(key[key.size()-1] >= nv || key[0] < 0,
+              "Wrong value of the key in FSSparseTensor::insert");
+  SparseTensor::insert(key, r, c);
+}
+
+/* We go through the tensor |t| which is supposed to have single
+   column. If the item of |t| is nonzero, we make a key by sorting the
+   index, and then we go through all items having the same key (it is its
+   column), obtain the row number and the element, and do the
+   multiplication.
+
+   The test for non-zero is |a != 0.0|, since there will be items which
+   are exact zeros.
+
+   I have also tried to make the loop through the sparse tensor outer, and
+   find index of tensor |t| within the loop. Surprisingly, it is little
+   slower (for monomial tests with probability of zeros equal 0.3). But
+   everything depends how filled is the sparse tensor. */
+
+void
+FSSparseTensor::multColumnAndAdd(const Tensor &t, Vector &v) const
+{
+  // check compatibility of input parameters
+  TL_RAISE_IF(v.length() != nrows(),
+              "Wrong size of output vector in FSSparseTensor::multColumnAndAdd");
+  TL_RAISE_IF(t.dimen() != dimen(),
+              "Wrong dimension of tensor in FSSparseTensor::multColumnAndAdd");
+  TL_RAISE_IF(t.ncols() != 1,
+              "The input tensor is not single-column in FSSparseTensor::multColumnAndAdd");
+
+  for (Tensor::index it = t.begin(); it != t.end(); ++it)
+    {
+      int ind = *it;
+      double a = t.get(ind, 0);
+      if (a != 0.0)
+        {
+          IntSequence key(it.getCoor());
+          key.sort();
+
+          // check that |key| is within the range
+          TL_RAISE_IF(key[0] < 0 || key[key.size()-1] >= nv,
+                      "Wrong coordinates of index in FSSparseTensor::multColumnAndAdd");
+
+          const_iterator first_pos = m.lower_bound(key);
+          const_iterator last_pos = m.upper_bound(key);
+          for (const_iterator cit = first_pos; cit != last_pos; ++cit)
+            {
+              int r = (*cit).second.first;
+              double c = (*cit).second.second;
+              v[r] += c * a;
+            }
+        }
+    }
+}
+
+void
+FSSparseTensor::print() const
+{
+  printf("FS Sparse tensor: dim=%d, nv=%d, (%dx%d)\n", dim, nv, nr, nc);
+  SparseTensor::print();
+}
+
+// |GSSparseTensor| slicing constructor
+/* This is the same as |@<|FGSTensor| slicing from |FSSparseTensor|@>|. */
+GSSparseTensor::GSSparseTensor(const FSSparseTensor &t, const IntSequence &ss,
+                               const IntSequence &coor, const TensorDimens &td)
+  : SparseTensor(td.dimen(), t.nrows(), td.calcFoldMaxOffset()),
+    tdims(td)
+{
+  // set |lb| and |ub| to lower and upper bounds of slice indices
+  /* This is the same as |@<set |lb| and |ub| to lower and upper bounds
+     of indices@>| in {\tt gs\_tensor.cpp}, see that file for details. */
+  IntSequence s_offsets(ss.size(), 0);
+  for (int i = 1; i < ss.size(); i++)
+    s_offsets[i] = s_offsets[i-1] + ss[i-1];
+
+  IntSequence lb(coor.size());
+  IntSequence ub(coor.size());
+  for (int i = 0; i < coor.size(); i++)
+    {
+      lb[i] = s_offsets[coor[i]];
+      ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
+    }
+
+  FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
+  FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
+  for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run)
+    {
+      if (lb.lessEq((*run).first) && (*run).first.lessEq(ub))
+        {
+          IntSequence c((*run).first);
+          c.add(-1, lb);
+          insert(c, (*run).second.first, (*run).second.second);
+        }
+    }
+
+}
+
+void
+GSSparseTensor::insert(const IntSequence &s, int r, double c)
+{
+  TL_RAISE_IF(!s.less(tdims.getNVX()),
+              "Wrong coordinates of index in GSSparseTensor::insert");
+  SparseTensor::insert(s, r, c);
+}
+
+void
+GSSparseTensor::print() const
+{
+  printf("GS Sparse tensor: (%dx%d)\nSymmetry: ", nr, nc);
+  tdims.getSym().print();
+  printf("NVS: ");
+  tdims.getNVS().print();
+  SparseTensor::print();
+}
--- a/dynare++/tl/cc/sparse_tensor.cweb
+++ b/dynare++/tl/cc/sparse_tensor.cweb
@ -1,274 +0,0 @@
-@q $Id: sparse_tensor.cweb 1258 2007-05-11 13:59:10Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt sparse\_tensor.cpp} file.
-
-@c
-#include "sparse_tensor.h"
-#include "fs_tensor.h"
-#include "tl_exception.h"
-
-#include <cmath>
-
-@<|SparseTensor::insert| code@>;
-@<|SparseTensor::isFinite| code@>;
-@<|SparseTensor::getFoldIndexFillFactor| code@>;
-@<|SparseTensor::getUnfoldIndexFillFactor| code@>;
-@<|SparseTensor::print| code@>;
-@<|FSSparseTensor| constructor code@>;
-@<|FSSparseTensor| copy constructor code@>;
-@<|FSSparseTensor::insert| code@>;
-@<|FSSparseTensor::multColumnAndAdd| code@>;
-@<|FSSparseTensor::print| code@>;
-@<|GSSparseTensor| slicing constructor@>;
-@<|GSSparseTensor::insert| code@>;
-@<|GSSparseTensor::print| code@>;
-
-@ This is straightforward. Before we insert anything, we do a few
-checks. Then we reset |first_nz_row| and |last_nz_row| if necessary.
-
-@<|SparseTensor::insert| code@>=
-void SparseTensor::insert(const IntSequence& key, int r, double c)
-{
-	TL_RAISE_IF(r < 0 || r >= nr,
-				"Row number out of dimension of tensor in SparseTensor::insert");
-	TL_RAISE_IF(key.size() != dimen(),
-				"Wrong length of key in SparseTensor::insert");
-	TL_RAISE_IF(! std::isfinite(c),
-				"Insertion of non-finite value in SparseTensor::insert");
-
-	iterator first_pos = m.lower_bound(key);
-	@<check that pair |key| and |r| is unique@>;
-	m.insert(first_pos, Map::value_type(key, Item(r,c)));
-	if (first_nz_row > r)
-		first_nz_row = r;
-	if (last_nz_row < r)
-		last_nz_row = r;
-}
-
-@ 
-@<check that pair |key| and |r| is unique@>=
-	iterator last_pos = m.upper_bound(key);
-	for (iterator it = first_pos; it != last_pos; ++it)
-		if ((*it).second.first == r) {
-			TL_RAISE("Duplicate <key, r> insertion in SparseTensor::insert");
-			return;
-		}
-
-@ This returns true if all items are finite (not Nan nor Inf).
-@<|SparseTensor::isFinite| code@>=
-bool SparseTensor::isFinite() const
-{
-	bool res = true;
-	const_iterator run = m.begin();
-	while (res && run != m.end()) {
-		if (! std::isfinite((*run).second.second))
-			res = false;
-		++run;
-	}
-	return res;
-}
-
-@ This returns a ratio of a number of non-zero columns in folded
-tensor to the total number of columns.
-
-@<|SparseTensor::getFoldIndexFillFactor| code@>=
-double SparseTensor::getFoldIndexFillFactor() const
-{
-	int cnt = 0;
-	const_iterator start_col = m.begin();
-	while (start_col != m.end()) {
-		cnt++;
-		const IntSequence& key = (*start_col).first;
-		start_col = m.upper_bound(key);
-	}
-
-	return ((double)cnt)/ncols();
-}
-
-@ This returns a ratio of a number of non-zero columns in unfolded
-tensor to the total number of columns.
-
-@<|SparseTensor::getUnfoldIndexFillFactor| code@>=
-double SparseTensor::getUnfoldIndexFillFactor() const
-{
-	int cnt = 0;
-	const_iterator start_col = m.begin();
-	while (start_col != m.end()) {
-		const IntSequence& key = (*start_col).first;
-		Symmetry s(key);
-		cnt += Tensor::noverseq(s);
-		start_col = m.upper_bound(key);
-	}
-
-	return ((double)cnt)/ncols();
-}
-
-
-
-@ This prints the fill factor and all items.
-@<|SparseTensor::print| code@>=
-void SparseTensor::print() const
-{
-	printf("Fill: %3.2f %%\n", 100*getFillFactor());
-	const_iterator start_col = m.begin();
-	while (start_col != m.end()) {
-		const IntSequence& key = (*start_col).first;
-		printf("Column: ");key.print();
-		const_iterator end_col = m.upper_bound(key);
-		int cnt = 1;
-		for (const_iterator run = start_col; run != end_col; ++run, cnt++) {
-			if ((cnt/7)*7 == cnt)
-				printf("\n");
-			printf("%d(%6.2g)  ", (*run).second.first, (*run).second.second);
-		}
-		printf("\n");
-		start_col = end_col;
-	}
-}
-
-
-
-@ 
-@<|FSSparseTensor| constructor code@>=
-FSSparseTensor::FSSparseTensor(int d, int nvar, int r)
-	: SparseTensor(d, r, FFSTensor::calcMaxOffset(nvar, d)),
-	  nv(nvar), sym(d)
-{}
-
-@ 
-@<|FSSparseTensor| copy constructor code@>=
-FSSparseTensor::FSSparseTensor(const FSSparseTensor& t)
-	: SparseTensor(t),
-	  nv(t.nvar()), sym(t.sym)
-{}
-
-@ 
-@<|FSSparseTensor::insert| code@>=
-void FSSparseTensor::insert(const IntSequence& key, int r, double c)
-{
-	TL_RAISE_IF(!key.isSorted(),
-				"Key is not sorted in FSSparseTensor::insert");
-	TL_RAISE_IF(key[key.size()-1] >= nv || key[0] < 0,
-				"Wrong value of the key in FSSparseTensor::insert"); 
-	SparseTensor::insert(key, r, c);
-}
-
-@ We go through the tensor |t| which is supposed to have single
-column. If the item of |t| is nonzero, we make a key by sorting the
-index, and then we go through all items having the same key (it is its
-column), obtain the row number and the element, and do the
-multiplication.
-
-The test for non-zero is |a != 0.0|, since there will be items which
-are exact zeros.
-
-I have also tried to make the loop through the sparse tensor outer, and
-find index of tensor |t| within the loop. Surprisingly, it is little
-slower (for monomial tests with probability of zeros equal 0.3). But
-everything depends how filled is the sparse tensor.
-
-@<|FSSparseTensor::multColumnAndAdd| code@>=
-void FSSparseTensor::multColumnAndAdd(const Tensor& t, Vector& v) const
-{
-	@<check compatibility of input parameters@>;
-	for (Tensor::index it = t.begin(); it != t.end(); ++it) {
-		int ind = *it;
-		double a = t.get(ind, 0); 
-		if (a != 0.0) {
-			IntSequence key(it.getCoor());
-			key.sort();
-			@<check that |key| is within the range@>;
-			const_iterator first_pos = m.lower_bound(key);
-			const_iterator last_pos = m.upper_bound(key);
-			for (const_iterator cit = first_pos; cit != last_pos; ++cit) {
-				int r = (*cit).second.first;
-				double c = (*cit).second.second;
-				v[r] += c * a;
-			}
-		}
-	}
-}
-
-
-@ 
-@<check compatibility of input parameters@>=
-	TL_RAISE_IF(v.length() != nrows(),
-				"Wrong size of output vector in FSSparseTensor::multColumnAndAdd");
-	TL_RAISE_IF(t.dimen() != dimen(),
-				"Wrong dimension of tensor in FSSparseTensor::multColumnAndAdd");
-	TL_RAISE_IF(t.ncols() != 1,
-				"The input tensor is not single-column in FSSparseTensor::multColumnAndAdd");
-
-
-@ 
-@<check that |key| is within the range@>=
-	TL_RAISE_IF(key[0] < 0 || key[key.size()-1] >= nv,
-				"Wrong coordinates of index in FSSparseTensor::multColumnAndAdd");
-
-@ 
-@<|FSSparseTensor::print| code@>=
-void FSSparseTensor::print() const
-{
-	printf("FS Sparse tensor: dim=%d, nv=%d, (%dx%d)\n", dim, nv, nr, nc);
-	SparseTensor::print();
-}
-
-@ This is the same as |@<|FGSTensor| slicing from |FSSparseTensor|@>|. 
-@<|GSSparseTensor| slicing constructor@>=
-GSSparseTensor::GSSparseTensor(const FSSparseTensor& t, const IntSequence& ss,
-							   const IntSequence& coor, const TensorDimens& td)
-	: SparseTensor(td.dimen(), t.nrows(), td.calcFoldMaxOffset()),
-	  tdims(td)
-{
-	@<set |lb| and |ub| to lower and upper bounds of slice indices@>;
-
-	FSSparseTensor::const_iterator lbi = t.getMap().lower_bound(lb);
-	FSSparseTensor::const_iterator ubi = t.getMap().upper_bound(ub);
-	for (FSSparseTensor::const_iterator run = lbi; run != ubi; ++run) {
-		if (lb.lessEq((*run).first) && (*run).first.lessEq(ub)) {
-			IntSequence c((*run).first);
-			c.add(-1, lb);
-			insert(c, (*run).second.first, (*run).second.second);
-		}
-	}
-
-}
-
-@ This is the same as |@<set |lb| and |ub| to lower and upper bounds
-of indices@>| in {\tt gs\_tensor.cpp}, see that file for details.
-
-@<set |lb| and |ub| to lower and upper bounds of slice indices@>=
-	IntSequence s_offsets(ss.size(), 0);
-	for (int i = 1; i < ss.size(); i++)
-		s_offsets[i] = s_offsets[i-1] + ss[i-1];
-
-	IntSequence lb(coor.size());
-	IntSequence ub(coor.size());
-	for (int i = 0; i < coor.size(); i++) {
-		lb[i] = s_offsets[coor[i]];
-		ub[i] = s_offsets[coor[i]] + ss[coor[i]] - 1;
-	}
-
-
-@ 
-@<|GSSparseTensor::insert| code@>=
-void GSSparseTensor::insert(const IntSequence& s, int r, double c)
-{
-	TL_RAISE_IF(! s.less(tdims.getNVX()),
-				"Wrong coordinates of index in GSSparseTensor::insert");
-	SparseTensor::insert(s, r, c);
-}
-
-@ 
-@<|GSSparseTensor::print| code@>=
-void GSSparseTensor::print() const
-{
-	printf("GS Sparse tensor: (%dx%d)\nSymmetry: ", nr, nc);
-	tdims.getSym().print();
-	printf("NVS: ");
-	tdims.getNVS().print();
-	SparseTensor::print();
-}
-
-@ End of {\tt sparse\_tensor.cpp} file.
--- a/dynare++/tl/cc/sparse_tensor.hh
+++ b/dynare++/tl/cc/sparse_tensor.hh
@ -0,0 +1,187 @@
+// Copyright 2004, Ondra Kamenik
+
+// Sparse tensor.
+
+/* Here we declare a sparse full and general symmetry tensors with the
+   multidimensional index along columns. We implement them as a |multimap|
+   associating to each sequence of coordinates |IntSequence| a set of
+   pairs (row, number). This is very convenient but not optimal in terms
+   of memory consumption. So the implementation can be changed.
+
+   The current |multimap| implementation allows insertions.  Another
+   advantage of this approach is that we do not need to calculate column
+   numbers from the |IntSequence|, since the column is accessed directly
+   via the key which is |IntSequence|.
+
+   The only operation we need to do with the full symmetry sparse tensor
+   is a left multiplication of a row oriented single column tensor. The
+   result of such operation is a column of the same size as the sparse
+   tensor. Other important operations are slicing operations. We need to
+   do sparse and dense slices of full symmetry sparse tensors. In fact,
+   the only constructor of general symmetry sparse tensor is slicing from
+   the full symmetry sparse. */
+
+#ifndef SPARSE_TENSOR_H
+#define SPARSE_TENSOR_H
+
+#include "symmetry.hh"
+#include "tensor.hh"
+#include "gs_tensor.hh"
+#include "Vector.h"
+
+#include <map>
+
+using namespace std;
+
+// |ltseq| predicate
+struct ltseq
+{
+  bool
+  operator()(const IntSequence &s1, const IntSequence &s2) const
+  {
+    return s1 < s2;
+  }
+};
+
+/* This is a super class of both full symmetry and general symmetry
+   sparse tensors. It contains a |multimap| and implements insertions. It
+   tracks maximum and minimum row, for which there is an item. */
+
+class SparseTensor
+{
+public:
+  typedef pair<int, double> Item;
+  typedef multimap<IntSequence, Item, ltseq> Map;
+  typedef Map::const_iterator const_iterator;
+protected:
+  typedef Map::iterator iterator;
+
+  Map m;
+  const int dim;
+  const int nr;
+  const int nc;
+  int first_nz_row;
+  int last_nz_row;
+public:
+  SparseTensor(int d, int nnr, int nnc)
+    : dim(d), nr(nnr), nc(nnc), first_nz_row(nr), last_nz_row(-1)
+  {
+  }
+  SparseTensor(const SparseTensor &t)
+    : m(t.m), dim(t.dim), nr(t.nr), nc(t.nc)
+  {
+  }
+  virtual ~SparseTensor()
+  {
+  }
+  void insert(const IntSequence &s, int r, double c);
+  const Map &
+  getMap() const
+  {
+    return m;
+  }
+  int
+  dimen() const
+  {
+    return dim;
+  }
+  int
+  nrows() const
+  {
+    return nr;
+  }
+  int
+  ncols() const
+  {
+    return nc;
+  }
+  double
+  getFillFactor() const
+  {
+    return ((double) m.size())/(nrows()*ncols());
+  }
+  double getFoldIndexFillFactor() const;
+  double getUnfoldIndexFillFactor() const;
+  int
+  getNumNonZero() const
+  {
+    return m.size();
+  }
+  int
+  getFirstNonZeroRow() const
+  {
+    return first_nz_row;
+  }
+  int
+  getLastNonZeroRow() const
+  {
+    return last_nz_row;
+  }
+  virtual const Symmetry&getSym() const = 0;
+  void print() const;
+  bool isFinite() const;
+};
+
+/* This is a full symmetry sparse tensor. It implements
+   |multColumnAndAdd| and in addition to |sparseTensor|, it has |nv|
+   (number of variables), and symmetry (basically it is a dimension). */
+
+class FSSparseTensor : public SparseTensor
+{
+public:
+  typedef SparseTensor::const_iterator const_iterator;
+private:
+  const int nv;
+  const Symmetry sym;
+public:
+  FSSparseTensor(int d, int nvar, int r);
+  FSSparseTensor(const FSSparseTensor &t);
+  void insert(const IntSequence &s, int r, double c);
+  void multColumnAndAdd(const Tensor &t, Vector &v) const;
+  const Symmetry &
+  getSym() const
+  {
+    return sym;
+  }
+  int
+  nvar() const
+  {
+    return nv;
+  }
+  void print() const;
+};
+
+/* This is a general symmetry sparse tensor. It has |TensorDimens| and
+   can be constructed as a slice of the full symmetry sparse tensor. The
+   slicing constructor takes the same form as the slicing |FGSTensor|
+   constructor from full symmetry sparse tensor. */
+
+class GSSparseTensor : public SparseTensor
+{
+public:
+  typedef SparseTensor::const_iterator const_iterator;
+private:
+  const TensorDimens tdims;
+public:
+  GSSparseTensor(const FSSparseTensor &t, const IntSequence &ss,
+                 const IntSequence &coor, const TensorDimens &td);
+  GSSparseTensor(const GSSparseTensor &t)
+    : SparseTensor(t), tdims(t.tdims)
+  {
+  }
+  void insert(const IntSequence &s, int r, double c);
+  const Symmetry &
+  getSym() const
+  {
+    return tdims.getSym();
+  }
+  const TensorDimens &
+  getDims() const
+  {
+    return tdims;
+  }
+  void print() const;
+
+};
+
+#endif
--- a/dynare++/tl/cc/sparse_tensor.hweb
+++ b/dynare++/tl/cc/sparse_tensor.hweb
@ -1,154 +0,0 @@
-@q $Id: sparse_tensor.hweb 522 2005-11-25 15:45:54Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Sparse tensor. Start of {\tt sparse\_tensor.h} file.
-
-Here we declare a sparse full and general symmetry tensors with the
-multidimensional index along columns. We implement them as a |multimap|
-associating to each sequence of coordinates |IntSequence| a set of
-pairs (row, number). This is very convenient but not optimal in terms
-of memory consumption. So the implementation can be changed.
-
-The current |multimap| implementation allows insertions.  Another
-advantage of this approach is that we do not need to calculate column
-numbers from the |IntSequence|, since the column is accessed directly
-via the key which is |IntSequence|.
-
-The only operation we need to do with the full symmetry sparse tensor
-is a left multiplication of a row oriented single column tensor. The
-result of such operation is a column of the same size as the sparse
-tensor. Other important operations are slicing operations. We need to
-do sparse and dense slices of full symmetry sparse tensors. In fact,
-the only constructor of general symmetry sparse tensor is slicing from
-the full symmetry sparse.
-
-@s SparseTensor int
-@s FSSparseTensor int
-@s GSSparseTensor int
-
-@c 
-#ifndef SPARSE_TENSOR_H
-#define SPARSE_TENSOR_H
-
-#include "symmetry.h"
-#include "tensor.h"
-#include "gs_tensor.h"
-#include "Vector.h"
-
-#include <map>
-
-using namespace std;
-
-@<|ltseq| predicate@>;
-@<|SparseTensor| class declaration@>;
-@<|FSSparseTensor| class declaration@>;
-@<|GSSparseTensor| class declaration@>;
-
-#endif
-
-@ 
-@<|ltseq| predicate@>=
-struct ltseq {
-	bool operator()(const IntSequence& s1, const IntSequence& s2) const
-		{@+ return s1 < s2;@+}
-};
-
-@ This is a super class of both full symmetry and general symmetry
-sparse tensors. It contains a |multimap| and implements insertions. It
-tracks maximum and minimum row, for which there is an item.
-
-@<|SparseTensor| class declaration@>=
-class SparseTensor {
-public:@;
-	typedef pair<int, double> Item;
-	typedef multimap<IntSequence, Item, ltseq> Map;
-	typedef Map::const_iterator const_iterator;
-protected:@;
-	typedef Map::iterator iterator;
-
-	Map m;
-	const int dim;
-	const int nr;
-	const int nc;
-	int first_nz_row;
-	int last_nz_row;
-public:@;
-	SparseTensor(int d, int nnr, int nnc)
-		: dim(d), nr(nnr), nc(nnc), first_nz_row(nr), last_nz_row(-1) @+{}
-	SparseTensor(const SparseTensor& t)
-		: m(t.m), dim(t.dim), nr(t.nr), nc(t.nc) @+{}
-	virtual ~SparseTensor() @+{}
-	void insert(const IntSequence& s, int r, double c);
-	const Map& getMap() const
-		{@+ return m;@+}
-	int dimen() const
-		{@+ return dim;@+}
-	int nrows() const
-		{@+ return nr;@+}
-	int ncols() const
-		{@+ return nc;@+}
-	double getFillFactor() const
-		{@+ return ((double)m.size())/(nrows()*ncols());@+}
-	double getFoldIndexFillFactor() const;
-	double getUnfoldIndexFillFactor() const;
-	int getNumNonZero() const
-		{@+ return m.size();@+}
-	int getFirstNonZeroRow() const
-		{@+ return first_nz_row;@+}
-	int getLastNonZeroRow() const
-		{@+ return last_nz_row;@+}
-	virtual const Symmetry& getSym() const =0;
-	void print() const;
-	bool isFinite() const;
-}
-
-@ This is a full symmetry sparse tensor. It implements
-|multColumnAndAdd| and in addition to |sparseTensor|, it has |nv|
-(number of variables), and symmetry (basically it is a dimension).
-
-@<|FSSparseTensor| class declaration@>=
-class FSSparseTensor : public SparseTensor {
-public:@;
-	typedef SparseTensor::const_iterator const_iterator;
-private:@;
-	const int nv;
-	const Symmetry sym; 
-public:@;
-	FSSparseTensor(int d, int nvar, int r);
-	FSSparseTensor(const FSSparseTensor& t);
-	void insert(const IntSequence& s, int r, double c);
-	void multColumnAndAdd(const Tensor& t, Vector& v) const;
-	const Symmetry& getSym() const
-		{@+ return sym;@+}
-	int nvar() const
-		{@+ return nv;@+}
-	void print() const;
-};
-
-
-@ This is a general symmetry sparse tensor. It has |TensorDimens| and
-can be constructed as a slice of the full symmetry sparse tensor. The
-slicing constructor takes the same form as the slicing |FGSTensor|
-constructor from full symmetry sparse tensor.
-  
-@<|GSSparseTensor| class declaration@>=
-class GSSparseTensor : public SparseTensor {
-public:@;
-	typedef SparseTensor::const_iterator const_iterator;
-private:@;
-	const TensorDimens tdims;
-public:@;
-	GSSparseTensor(const FSSparseTensor& t, const IntSequence& ss,
-				   const IntSequence& coor, const TensorDimens& td);
-	GSSparseTensor(const GSSparseTensor& t)
-		: SparseTensor(t), tdims(t.tdims) @+{}
-	void insert(const IntSequence& s, int r, double c);
-	const Symmetry& getSym() const
-		{@+ return tdims.getSym();@+}
-	const TensorDimens& getDims() const
-		{@+ return tdims;@+}
-	void print() const;
-	
-};
-
-@ End of {\tt sparse\_tensor.h} file.
--- a/dynare++/tl/cc/stack_container.cc
+++ b/dynare++/tl/cc/stack_container.cc
@ -0,0 +1,662 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "stack_container.hh"
+#include "pyramid_prod2.hh"
+#include "ps_tensor.hh"
+
+double FoldedStackContainer::fill_threshold = 0.00005;
+double UnfoldedStackContainer::fill_threshold = 0.00005;
+
+// |FoldedStackContainer::multAndAdd| sparse code
+/* Here we multiply the sparse tensor with the
+   |FoldedStackContainer|. We have four implementations,
+   |multAndAddSparse1|, |multAndAddSparse2|, |multAndAddSparse3|, and
+   |multAndAddSparse4|.  The third is not threaded yet and I expect that
+   it is certainly the slowest. The |multAndAddSparse4| exploits the
+   sparsity, however, it seems to be still worse than |multAndAddSparse2|
+   even for really sparse matrices. On the other hand, it can be more
+   efficient than |multAndAddSparse2| for large problems, since it does
+   not need that much of memory and can avoid much swapping. Very
+   preliminary examination shows that |multAndAddSparse2| is the best in
+   terms of time. */
+void
+FoldedStackContainer::multAndAdd(const FSSparseTensor &t,
+                                 FGSTensor &out) const
+{
+  TL_RAISE_IF(t.nvar() != getAllSize(),
+              "Wrong number of variables of tensor for FoldedStackContainer::multAndAdd");
+  multAndAddSparse2(t, out);
+}
+
+// |FoldedStackContainer::multAndAdd| dense code
+/* Here we perform the Faa Di Bruno step for a given dimension |dim|, and for
+   the dense fully symmetric tensor which is scattered in the container
+   of general symmetric tensors. The implementation is pretty the same as
+   |@<|UnfoldedStackContainer::multAndAdd| dense code@>|. */
+void
+FoldedStackContainer::multAndAdd(int dim, const FGSContainer &c, FGSTensor &out) const
+{
+  TL_RAISE_IF(c.num() != numStacks(),
+              "Wrong symmetry length of container for FoldedStackContainer::multAndAdd");
+
+  THREAD_GROUP gr;
+  SymmetrySet ss(dim, c.num());
+  for (symiterator si(ss); !si.isEnd(); ++si)
+    {
+      if (c.check(*si))
+        {
+          THREAD *worker = new WorkerFoldMAADense(*this, *si, c, out);
+          gr.insert(worker);
+        }
+    }
+  gr.run();
+}
+
+/* This is analogous to |@<|WorkerUnfoldMAADense::operator()()|
+   code@>|. */
+
+void
+WorkerFoldMAADense::operator()()
+{
+  Permutation iden(dense_cont.num());
+  IntSequence coor(sym, iden.getMap());
+  const FGSTensor *g = dense_cont.get(sym);
+  cont.multAndAddStacks(coor, *g, out, &out);
+}
+
+WorkerFoldMAADense::WorkerFoldMAADense(const FoldedStackContainer &container,
+                                       const Symmetry &s,
+                                       const FGSContainer &dcontainer,
+                                       FGSTensor &outten)
+  : cont(container), sym(s), dense_cont(dcontainer), out(outten)
+{
+}
+
+/* This is analogous to |@<|UnfoldedStackContainer::multAndAddSparse1|
+   code@>|. */
+void
+FoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
+                                        FGSTensor &out) const
+{
+  THREAD_GROUP gr;
+  UFSTensor dummy(0, numStacks(), t.dimen());
+  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
+    {
+      THREAD *worker = new WorkerFoldMAASparse1(*this, t, out, ui.getCoor());
+      gr.insert(worker);
+    }
+  gr.run();
+}
+
+/* This is analogous to |@<|WorkerUnfoldMAASparse1::operator()()| code@>|.
+   The only difference is that instead of |UPSTensor| as a
+   result of multiplication of unfolded tensor and tensors from
+   containers, we have |FPSTensor| with partially folded permuted
+   symmetry.
+
+   todo: make slice vertically narrowed according to the fill of t,
+   vertically narrow out accordingly. */
+
+void
+WorkerFoldMAASparse1::operator()()
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+  const PermutationSet &pset = tls.pbundle->get(t.dimen());
+  Permutation iden(t.dimen());
+
+  UPSTensor slice(t, cont.getStackSizes(), coor,
+                  PerTensorDimens(cont.getStackSizes(), coor));
+  for (int iper = 0; iper < pset.getNum(); iper++)
+    {
+      const Permutation &per = pset.get(iper);
+      IntSequence percoor(coor.size());
+      per.apply(coor, percoor);
+      for (EquivalenceSet::const_iterator it = eset.begin();
+           it != eset.end(); ++it)
+        {
+          if ((*it).numClasses() == t.dimen())
+            {
+              StackProduct<FGSTensor> sp(cont, *it, out.getSym());
+              if (!sp.isZero(percoor))
+                {
+                  KronProdStack<FGSTensor> kp(sp, percoor);
+                  kp.optimizeOrder();
+                  const Permutation &oper = kp.getPer();
+                  if (Permutation(oper, per) == iden)
+                    {
+                      FPSTensor fps(out.getDims(), *it, slice, kp);
+                      {
+                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
+                        fps.addTo(out);
+                      }
+                    }
+                }
+            }
+        }
+    }
+}
+
+WorkerFoldMAASparse1::WorkerFoldMAASparse1(const FoldedStackContainer &container,
+                                           const FSSparseTensor &ten,
+                                           FGSTensor &outten, const IntSequence &c)
+  : cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle))
+{
+}
+
+/* Here is the second implementation of sparse folded |multAndAdd|. It
+   is pretty similar to implementation of
+   |@<|UnfoldedStackContainer::multAndAddSparse2| code@>|. We make a
+   dense folded |slice|, and then call folded |multAndAddStacks|, which
+   multiplies all the combinations compatible with the slice. */
+
+void
+FoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
+                                        FGSTensor &out) const
+{
+  THREAD_GROUP gr;
+  FFSTensor dummy_f(0, numStacks(), t.dimen());
+  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
+    {
+      THREAD *worker = new WorkerFoldMAASparse2(*this, t, out, fi.getCoor());
+      gr.insert(worker);
+    }
+  gr.run();
+}
+
+/* Here we make a sparse slice first and then call |multAndAddStacks|
+   if the slice is not empty. If the slice is really sparse, we call
+   sparse version of |multAndAddStacks|. What means ``really sparse'' is
+   given by |fill_threshold|. It is not tuned yet, a practice shows that
+   it must be a really low number, since sparse |multAndAddStacks| is
+   much slower than the dense version.
+
+   Further, we take only nonzero rows of the slice, and accordingly of
+   the out tensor. We jump over zero initial rows and drop zero tailing
+   rows. */
+
+void
+WorkerFoldMAASparse2::operator()()
+{
+  GSSparseTensor slice(t, cont.getStackSizes(), coor,
+                       TensorDimens(cont.getStackSizes(), coor));
+  if (slice.getNumNonZero())
+    {
+      if (slice.getUnfoldIndexFillFactor() > FoldedStackContainer::fill_threshold)
+        {
+          FGSTensor dense_slice(slice);
+          int r1 = slice.getFirstNonZeroRow();
+          int r2 = slice.getLastNonZeroRow();
+          FGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
+          FGSTensor out1(r1, r2-r1+1, out);
+          cont.multAndAddStacks(coor, dense_slice1, out1, &out);
+        }
+      else
+        cont.multAndAddStacks(coor, slice, out, &out);
+    }
+}
+
+WorkerFoldMAASparse2::WorkerFoldMAASparse2(const FoldedStackContainer &container,
+                                           const FSSparseTensor &ten,
+                                           FGSTensor &outten, const IntSequence &c)
+  : cont(container), t(ten), out(outten), coor(c)
+{
+}
+
+/* Here is the third implementation of the sparse folded
+   |multAndAdd|. It is column-wise implementation, and thus is not a good
+   candidate for the best performer.
+
+   We go through all columns from the output. For each column we
+   calculate folded |sumcol| which is a sum of all appropriate columns
+   for all suitable equivalences. So we go through all suitable
+   equivalences, for each we construct a |StackProduct| object and
+   construct |IrregTensor| for a corresponding column of $z$. The
+   |IrregTensor| is an abstraction for Kronecker multiplication of
+   stacked columns of the two containers without zeros. Then the column
+   is added to |sumcol|. Finally, the |sumcol| is multiplied by the
+   sparse tensor. */
+
+void
+FoldedStackContainer::multAndAddSparse3(const FSSparseTensor &t,
+                                        FGSTensor &out) const
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+  for (Tensor::index run = out.begin(); run != out.end(); ++run)
+    {
+      Vector outcol(out, *run);
+      FRSingleTensor sumcol(t.nvar(), t.dimen());
+      sumcol.zeros();
+      for (EquivalenceSet::const_iterator it = eset.begin();
+           it != eset.end(); ++it)
+        {
+          if ((*it).numClasses() == t.dimen())
+            {
+              StackProduct<FGSTensor> sp(*this, *it, out.getSym());
+              IrregTensorHeader header(sp, run.getCoor());
+              IrregTensor irten(header);
+              irten.addTo(sumcol);
+            }
+        }
+      t.multColumnAndAdd(sumcol, outcol);
+    }
+}
+
+/* Here is the fourth implementation of sparse
+   |FoldedStackContainer::multAndAdd|. It is almost equivalent to
+   |multAndAddSparse2| with the exception that the |FPSTensor| as a
+   result of a product of a slice and Kronecker product of the stack
+   derivatives is calculated in the sparse fashion. For further details, see
+   |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and
+   |@<|FPSTensor| sparse constructor@>|. */
+
+void
+FoldedStackContainer::multAndAddSparse4(const FSSparseTensor &t, FGSTensor &out) const
+{
+  THREAD_GROUP gr;
+  FFSTensor dummy_f(0, numStacks(), t.dimen());
+  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
+    {
+      THREAD *worker = new WorkerFoldMAASparse4(*this, t, out, fi.getCoor());
+      gr.insert(worker);
+    }
+  gr.run();
+}
+
+/* The |WorkerFoldMAASparse4| is the same as |WorkerFoldMAASparse2|
+   with the exception that we call a sparse version of
+   |multAndAddStacks|. */
+
+void
+WorkerFoldMAASparse4::operator()()
+{
+  GSSparseTensor slice(t, cont.getStackSizes(), coor,
+                       TensorDimens(cont.getStackSizes(), coor));
+  if (slice.getNumNonZero())
+    cont.multAndAddStacks(coor, slice, out, &out);
+}
+
+WorkerFoldMAASparse4::WorkerFoldMAASparse4(const FoldedStackContainer &container,
+                                           const FSSparseTensor &ten,
+                                           FGSTensor &outten, const IntSequence &c)
+  : cont(container), t(ten), out(outten), coor(c)
+{
+}
+
+// |FoldedStackContainer::multAndAddStacks| dense code
+/* This is almost the same as
+   |@<|UnfoldedStackContainer::multAndAddStacks| code@>|. The only
+   difference is that we do not construct a |UPSTensor| from
+   |KronProdStack|, but we construct partially folded permuted
+   symmetry |FPSTensor|. Note that the tensor |g| must be unfolded
+   in order to be able to multiply with unfolded rows of Kronecker
+   product. However, columns of such a product are partially
+   folded giving a rise to the |FPSTensor|. */
+void
+FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
+                                       const FGSTensor &g,
+                                       FGSTensor &out, const void *ad) const
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+
+  UGSTensor ug(g);
+  UFSTensor dummy_u(0, numStacks(), g.dimen());
+  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
+    {
+      IntSequence tmp(ui.getCoor());
+      tmp.sort();
+      if (tmp == coor)
+        {
+          Permutation sort_per(ui.getCoor());
+          sort_per.inverse();
+          for (EquivalenceSet::const_iterator it = eset.begin();
+               it != eset.end(); ++it)
+            {
+              if ((*it).numClasses() == g.dimen())
+                {
+                  StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
+                  if (!sp.isZero(coor))
+                    {
+                      KronProdStack<FGSTensor> kp(sp, coor);
+                      if (ug.getSym().isFull())
+                        kp.optimizeOrder();
+                      FPSTensor fps(out.getDims(), *it, sort_per, ug, kp);
+                      {
+                        SYNCHRO syn(ad, "multAndAddStacks");
+                        fps.addTo(out);
+                      }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// |FoldedStackContainer::multAndAddStacks| sparse code
+/* This is almost the same as
+   |@<|FoldedStackContainer::multAndAddStacks| dense code@>|. The only
+   difference is that the Kronecker product of the stacks is multiplied
+   with sparse slice |GSSparseTensor| (not dense slice |FGSTensor|). The
+   multiplication is done in |@<|FPSTensor| sparse constructor@>|. */
+void
+FoldedStackContainer::multAndAddStacks(const IntSequence &coor,
+                                       const GSSparseTensor &g,
+                                       FGSTensor &out, const void *ad) const
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+  UFSTensor dummy_u(0, numStacks(), g.dimen());
+  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
+    {
+      IntSequence tmp(ui.getCoor());
+      tmp.sort();
+      if (tmp == coor)
+        {
+          Permutation sort_per(ui.getCoor());
+          sort_per.inverse();
+          for (EquivalenceSet::const_iterator it = eset.begin();
+               it != eset.end(); ++it)
+            {
+              if ((*it).numClasses() == g.dimen())
+                {
+                  StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
+                  if (!sp.isZero(coor))
+                    {
+                      KronProdStack<FGSTensor> kp(sp, coor);
+                      FPSTensor fps(out.getDims(), *it, sort_per, g, kp);
+                      {
+                        SYNCHRO syn(ad, "multAndAddStacks");
+                        fps.addTo(out);
+                      }
+                    }
+                }
+            }
+        }
+    }
+}
+
+// |UnfoldedStackContainer::multAndAdd| sparse code
+/*  Here we simply call either |multAndAddSparse1| or
+    |multAndAddSparse2|. The first one allows for optimization of
+    Kronecker products, so it seems to be more efficient. */
+void
+UnfoldedStackContainer::multAndAdd(const FSSparseTensor &t,
+                                   UGSTensor &out) const
+{
+  TL_RAISE_IF(t.nvar() != getAllSize(),
+              "Wrong number of variables of tensor for UnfoldedStackContainer::multAndAdd");
+  multAndAddSparse2(t, out);
+}
+
+// |UnfoldedStackContainer::multAndAdd| dense code
+/* Here we implement the formula for stacks for fully symmetric tensor
+   scattered in a number of general symmetry tensors contained in a given
+   container. The implementations is pretty the same as in
+   |multAndAddSparse2| but we do not do the slices of sparse tensor, but
+   only a lookup to the container.
+
+   This means that we do not iterate through a dummy folded tensor to
+   obtain folded coordinates of stacks, rather we iterate through all
+   symmetries contained in the container and the coordinates of stacks
+   are obtained as unfolded identity sequence via the symmetry. The
+   reason of doing this is that we are unable to calculate symmetry from
+   stack coordinates as easily as stack coordinates from the symmetry. */
+void
+UnfoldedStackContainer::multAndAdd(int dim, const UGSContainer &c,
+                                   UGSTensor &out) const
+{
+  TL_RAISE_IF(c.num() != numStacks(),
+              "Wrong symmetry length of container for UnfoldedStackContainer::multAndAdd");
+
+  THREAD_GROUP gr;
+  SymmetrySet ss(dim, c.num());
+  for (symiterator si(ss); !si.isEnd(); ++si)
+    {
+      if (c.check(*si))
+        {
+          THREAD *worker = new WorkerUnfoldMAADense(*this, *si, c, out);
+          gr.insert(worker);
+        }
+    }
+  gr.run();
+}
+
+void
+WorkerUnfoldMAADense::operator()()
+{
+  Permutation iden(dense_cont.num());
+  IntSequence coor(sym, iden.getMap());
+  const UGSTensor *g = dense_cont.get(sym);
+  cont.multAndAddStacks(coor, *g, out, &out);
+}
+
+WorkerUnfoldMAADense::WorkerUnfoldMAADense(const UnfoldedStackContainer &container,
+                                           const Symmetry &s,
+                                           const UGSContainer &dcontainer,
+                                           UGSTensor &outten)
+  : cont(container), sym(s), dense_cont(dcontainer), out(outten)
+{
+}
+
+/* Here we implement the formula for unfolded tensors. If, for instance,
+   a coordinate $z$ of a tensor $\left[f_{z^2}\right]$ is partitioned as
+   $z=[a, b]$, then we perform the following:
+   $$
+   \eqalign{
+   \left[f_{z^2}\right]\left(\sum_c\left[\matrix{a_{c(x)}\cr b_{c(y)}}\right]
+   \otimes\left[\matrix{a_{c(y)}\cr b_{c(y)}}\right]\right)=&
+   \left[f_{aa}\right]\left(\sum_ca_{c(x)}\otimes a_{c(y)}\right)+
+   \left[f_{ab}\right]\left(\sum_ca_{c(x)}\otimes b_{c(y)}\right)+\cr
+   &\left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)+
+   \left[f_{bb}\right]\left(\sum_cb_{c(x)}\otimes b_{c(y)}\right)\cr
+   }
+   $$
+   This is exactly what happens here. The code is clear. It goes through
+   all combinations of stacks, and each thread is responsible for
+   operation for the slice corresponding to the combination of the stacks. */
+
+void
+UnfoldedStackContainer::multAndAddSparse1(const FSSparseTensor &t,
+                                          UGSTensor &out) const
+{
+  THREAD_GROUP gr;
+  UFSTensor dummy(0, numStacks(), t.dimen());
+  for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui)
+    {
+      THREAD *worker = new WorkerUnfoldMAASparse1(*this, t, out, ui.getCoor());
+      gr.insert(worker);
+    }
+  gr.run();
+}
+
+/* This does a step of |@<|UnfoldedStackContainer::multAndAddSparse1| code@>| for
+   a given coordinates. First it makes the slice of the given stack coordinates.
+   Then it multiplies everything what should be multiplied with the slice.
+   That is it goes through all equivalences, creates |StackProduct|, then
+   |KronProdStack|, which is added to |out|. So far everything is clear.
+
+   However, we want to use optimized |KronProdAllOptim| to minimize
+   a number of flops and memory needed in the Kronecker product. So we go
+   through all permutations |per|, permute the coordinates to get
+   |percoor|, go through all equivalences, and make |KronProdStack| and
+   optimize it. The result of optimization is a permutation |oper|. Now,
+   we multiply the Kronecker product with the slice, only if the slice
+   has the same ordering of coordinates as the Kronecker product
+   |KronProdStack|. However, it is not perfectly true. Since we go
+   through {\bf all} permutations |per|, there might be two different
+   permutations leading to the same ordering in |KronProdStack| and thus
+   the same ordering in the optimized |KronProdStack|. The two cases
+   would be counted twice, which is wrong. That is why we do not
+   condition on $\hbox{coor}\circ\hbox{oper}\circ\hbox{per} =
+   \hbox{coor}$, but we condition on
+   $\hbox{oper}\circ\hbox{per}=\hbox{id}$. In this way, we rule out
+   permutations |per| leading to the same ordering of stacks when
+   applied on |coor|.
+
+   todo: vertically narrow slice and out according to the fill in t. */
+
+void
+WorkerUnfoldMAASparse1::operator()()
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+  const PermutationSet &pset = tls.pbundle->get(t.dimen());
+  Permutation iden(t.dimen());
+
+  UPSTensor slice(t, cont.getStackSizes(), coor,
+                  PerTensorDimens(cont.getStackSizes(), coor));
+  for (int iper = 0; iper < pset.getNum(); iper++)
+    {
+      const Permutation &per = pset.get(iper);
+      IntSequence percoor(coor.size());
+      per.apply(coor, percoor);
+      for (EquivalenceSet::const_iterator it = eset.begin();
+           it != eset.end(); ++it)
+        {
+          if ((*it).numClasses() == t.dimen())
+            {
+              StackProduct<UGSTensor> sp(cont, *it, out.getSym());
+              if (!sp.isZero(percoor))
+                {
+                  KronProdStack<UGSTensor> kp(sp, percoor);
+                  kp.optimizeOrder();
+                  const Permutation &oper = kp.getPer();
+                  if (Permutation(oper, per) == iden)
+                    {
+                      UPSTensor ups(out.getDims(), *it, slice, kp);
+                      {
+                        SYNCHRO syn(&out, "WorkerUnfoldMAASparse1");
+                        ups.addTo(out);
+                      }
+                    }
+                }
+            }
+        }
+    }
+}
+
+WorkerUnfoldMAASparse1::WorkerUnfoldMAASparse1(const UnfoldedStackContainer &container,
+                                               const FSSparseTensor &ten,
+                                               UGSTensor &outten, const IntSequence &c)
+  : cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle))
+{
+}
+
+/* In here we implement the formula by a bit different way. We use the
+   fact, using notation of |@<|UnfoldedStackContainer::multAndAddSparse2|
+   code@>|, that
+   $$
+   \left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)=
+   \left[f_{ab}\right]\left(\sum_ca_{c(y)}\otimes b_{c(b)}\right)\cdot P
+   $$
+   where $P$ is a suitable permutation of columns. The permutation
+   corresponds to (in this example) a swap of $a$ and $b$. An advantage
+   of this approach is that we do not need |UPSTensor| for $f_{ba}$, and
+   thus we decrease the number of needed slices.
+
+   So we go through all folded indices of stack coordinates, then for
+   each such index |fi| we make a slice and call |multAndAddStacks|. This
+   goes through all corresponding unfolded indices to perform the
+   formula. Each unsorted (unfold) index implies a sorting permutation
+   |sort_per| which must be used to permute stacks in |StackProduct|, and
+   permute equivalence classes when |UPSTensor| is formed. In this way
+   the column permutation $P$ from the formula is factored to the
+   permutation of |UPSTensor|. */
+
+void
+UnfoldedStackContainer::multAndAddSparse2(const FSSparseTensor &t,
+                                          UGSTensor &out) const
+{
+  THREAD_GROUP gr;
+  FFSTensor dummy_f(0, numStacks(), t.dimen());
+  for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi)
+    {
+      THREAD *worker = new WorkerUnfoldMAASparse2(*this, t, out, fi.getCoor());
+      gr.insert(worker);
+    }
+  gr.run();
+}
+
+/* This does a step of |@<|UnfoldedStackContainer::multAndAddSparse2| code@>| for
+   a given coordinates.
+
+   todo: implement |multAndAddStacks| for sparse slice as
+   |@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and do this method as
+   |@<|WorkerFoldMAASparse2::operator()()| code@>|. */
+
+void
+WorkerUnfoldMAASparse2::operator()()
+{
+  GSSparseTensor slice(t, cont.getStackSizes(), coor,
+                       TensorDimens(cont.getStackSizes(), coor));
+  if (slice.getNumNonZero())
+    {
+      FGSTensor fslice(slice);
+      UGSTensor dense_slice(fslice);
+      int r1 = slice.getFirstNonZeroRow();
+      int r2 = slice.getLastNonZeroRow();
+      UGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
+      UGSTensor out1(r1, r2-r1+1, out);
+
+      cont.multAndAddStacks(coor, dense_slice1, out1, &out);
+    }
+}
+
+WorkerUnfoldMAASparse2::WorkerUnfoldMAASparse2(const UnfoldedStackContainer &container,
+                                               const FSSparseTensor &ten,
+                                               UGSTensor &outten, const IntSequence &c)
+  : cont(container), t(ten), out(outten), coor(c)
+{
+}
+
+/* For a given unfolded coordinates of stacks |fi|, and appropriate
+   tensor $g$, whose symmetry is a symmetry of |fi|, the method
+   contributes to |out| all tensors in unfolded stack formula involving
+   stacks chosen by |fi|.
+
+   We go through all |ui| coordinates which yield |fi| after sorting. We
+   construct a permutation |sort_per| which sorts |ui| to |fi|. We go
+   through all appropriate equivalences, and construct |StackProduct|
+   from equivalence classes permuted by |sort_per|, then |UPSTensor| with
+   implied permutation of columns by the permuted equivalence by
+   |sort_per|. The |UPSTensor| is then added to |out|.
+
+   We cannot use here the optimized |KronProdStack|, since the symmetry
+   of |UGSTensor& g| prescribes the ordering of the stacks. However, if
+   |g| is fully symmetric, we can do the optimization harmlessly. */
+
+void
+UnfoldedStackContainer::multAndAddStacks(const IntSequence &fi,
+                                         const UGSTensor &g,
+                                         UGSTensor &out, const void *ad) const
+{
+  const EquivalenceSet &eset = ebundle.get(out.dimen());
+
+  UFSTensor dummy_u(0, numStacks(), g.dimen());
+  for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui)
+    {
+      IntSequence tmp(ui.getCoor());
+      tmp.sort();
+      if (tmp == fi)
+        {
+          Permutation sort_per(ui.getCoor());
+          sort_per.inverse();
+          for (EquivalenceSet::const_iterator it = eset.begin();
+               it != eset.end(); ++it)
+            {
+              if ((*it).numClasses() == g.dimen())
+                {
+                  StackProduct<UGSTensor> sp(*this, *it, sort_per, out.getSym());
+                  if (!sp.isZero(fi))
+                    {
+                      KronProdStack<UGSTensor> kp(sp, fi);
+                      if (g.getSym().isFull())
+                        kp.optimizeOrder();
+                      UPSTensor ups(out.getDims(), *it, sort_per, g, kp);
+                      {
+                        SYNCHRO syn(ad, "multAndAddStacks");
+                        ups.addTo(out);
+                      }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/dynare++/tl/cc/stack_container.cweb
+++ b/dynare++/tl/cc/stack_container.cweb
@ -1,670 +0,0 @@
-@q $Id: stack_container.cweb 1835 2008-05-19 01:54:48Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt stack\_container.cpp} file.
-
-@c
-#include "stack_container.h"
-#include "pyramid_prod2.h"
-#include "ps_tensor.h"
-
-double FoldedStackContainer::fill_threshold = 0.00005;
-double UnfoldedStackContainer::fill_threshold = 0.00005;
-@<|FoldedStackContainer::multAndAdd| sparse code@>;
-@<|FoldedStackContainer::multAndAdd| dense code@>;
-@<|WorkerFoldMAADense::operator()()| code@>;
-@<|WorkerFoldMAADense| constructor code@>;
-@<|FoldedStackContainer::multAndAddSparse1| code@>;
-@<|WorkerFoldMAASparse1::operator()()| code@>;
-@<|WorkerFoldMAASparse1| constructor code@>;
-@<|FoldedStackContainer::multAndAddSparse2| code@>;
-@<|WorkerFoldMAASparse2::operator()()| code@>;
-@<|WorkerFoldMAASparse2| constructor code@>;
-@<|FoldedStackContainer::multAndAddSparse3| code@>;
-@<|FoldedStackContainer::multAndAddSparse4| code@>;
-@<|WorkerFoldMAASparse4::operator()()| code@>;
-@<|WorkerFoldMAASparse4| constructor code@>;
-@<|FoldedStackContainer::multAndAddStacks| dense code@>;
-@<|FoldedStackContainer::multAndAddStacks| sparse code@>;
-@#
-@<|UnfoldedStackContainer::multAndAdd| sparse code@>;
-@<|UnfoldedStackContainer::multAndAdd| dense code@>;
-@<|WorkerUnfoldMAADense::operator()()| code@>;
-@<|WorkerUnfoldMAADense| constructor code@>;
-@<|UnfoldedStackContainer::multAndAddSparse1| code@>;
-@<|WorkerUnfoldMAASparse1::operator()()| code@>;
-@<|WorkerUnfoldMAASparse1| constructor code@>;
-@<|UnfoldedStackContainer::multAndAddSparse2| code@>;
-@<|WorkerUnfoldMAASparse2::operator()()| code@>;
-@<|WorkerUnfoldMAASparse2| constructor code@>;
-@<|UnfoldedStackContainer::multAndAddStacks| code@>;
-
-
-@ Here we multiply the sparse tensor with the
-|FoldedStackContainer|. We have four implementations,
-|multAndAddSparse1|, |multAndAddSparse2|, |multAndAddSparse3|, and
-|multAndAddSparse4|.  The third is not threaded yet and I expect that
-it is certainly the slowest. The |multAndAddSparse4| exploits the
-sparsity, however, it seems to be still worse than |multAndAddSparse2|
-even for really sparse matrices. On the other hand, it can be more
-efficient than |multAndAddSparse2| for large problems, since it does
-not need that much of memory and can avoid much swapping. Very
-preliminary examination shows that |multAndAddSparse2| is the best in
-terms of time.
-
-@s FSSparseTensor int
-@s IrregTensorHeader int
-@s IrregTensor int
-
-@<|FoldedStackContainer::multAndAdd| sparse code@>=
-void FoldedStackContainer::multAndAdd(const FSSparseTensor& t,
-									  FGSTensor& out) const
-{
-	TL_RAISE_IF(t.nvar() != getAllSize(),
-				"Wrong number of variables of tensor for FoldedStackContainer::multAndAdd");
-	multAndAddSparse2(t, out);
-}
-
-@ Here we perform the Faa Di Bruno step for a given dimension |dim|, and for
-the dense fully symmetric tensor which is scattered in the container
-of general symmetric tensors. The implementation is pretty the same as
-|@<|UnfoldedStackContainer::multAndAdd| dense code@>|.
-
-@<|FoldedStackContainer::multAndAdd| dense code@>=
-void FoldedStackContainer::multAndAdd(int dim, const FGSContainer& c, FGSTensor& out) const
-{
-	TL_RAISE_IF(c.num() != numStacks(),
-				"Wrong symmetry length of container for FoldedStackContainer::multAndAdd");
-
-	THREAD_GROUP@, gr;
-	SymmetrySet ss(dim, c.num());
-	for (symiterator si(ss); !si.isEnd(); ++si) {
-		if (c.check(*si)) {
-			THREAD* worker = new WorkerFoldMAADense(*this, *si, c, out);
-			gr.insert(worker);
-		}
-	}
-	gr.run();
-}
-
-@ This is analogous to |@<|WorkerUnfoldMAADense::operator()()|
-code@>|.
-
-@<|WorkerFoldMAADense::operator()()| code@>=
-void WorkerFoldMAADense::operator()()
-{
-	Permutation iden(dense_cont.num());
-	IntSequence coor(sym, iden.getMap());
-	const FGSTensor* g = dense_cont.get(sym);
-	cont.multAndAddStacks(coor, *g, out, &out);
-}
-
-@ 
-@<|WorkerFoldMAADense| constructor code@>=
-WorkerFoldMAADense::WorkerFoldMAADense(const FoldedStackContainer& container, 
-									   const Symmetry& s,
-									   const FGSContainer& dcontainer,
-									   FGSTensor& outten)
-	: cont(container), sym(s), dense_cont(dcontainer), out(outten)
-{}
-
-@ This is analogous to |@<|UnfoldedStackContainer::multAndAddSparse1|
-code@>|.
-@<|FoldedStackContainer::multAndAddSparse1| code@>=
-void FoldedStackContainer::multAndAddSparse1(const FSSparseTensor& t,
-											 FGSTensor& out) const
-{
-	THREAD_GROUP@, gr;
-	UFSTensor dummy(0, numStacks(), t.dimen());
-	for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui) {
-		THREAD* worker = new WorkerFoldMAASparse1(*this, t, out, ui.getCoor());
-		gr.insert(worker);
-	}
-	gr.run();
-}
-
-@ This is analogous to |@<|WorkerUnfoldMAASparse1::operator()()| code@>|.
-The only difference is that instead of |UPSTensor| as a
-result of multiplication of unfolded tensor and tensors from
-containers, we have |FPSTensor| with partially folded permuted
-symmetry.
-
-todo: make slice vertically narrowed according to the fill of t,
-vertically narrow out accordingly.
-
-@<|WorkerFoldMAASparse1::operator()()| code@>=
-void WorkerFoldMAASparse1::operator()()
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-	const PermutationSet& pset = tls.pbundle->get(t.dimen());
-	Permutation iden(t.dimen());
-
-	UPSTensor slice(t, cont.getStackSizes(), coor,
-					PerTensorDimens(cont.getStackSizes(), coor));
-	for (int iper = 0; iper < pset.getNum(); iper++) {
-		const Permutation& per = pset.get(iper);
-		IntSequence percoor(coor.size());
-		per.apply(coor, percoor);
-		for (EquivalenceSet::const_iterator it = eset.begin();
-			 it != eset.end(); ++it) {
-			if ((*it).numClasses() == t.dimen()) {
-				StackProduct<FGSTensor> sp(cont, *it, out.getSym());
-				if (! sp.isZero(percoor)) {
-					KronProdStack<FGSTensor> kp(sp, percoor);
-					kp.optimizeOrder();
-					const Permutation& oper = kp.getPer();
-					if (Permutation(oper, per) == iden) {
-						FPSTensor fps(out.getDims(), *it, slice, kp);
-						{
-							SYNCHRO@, syn(&out, "WorkerUnfoldMAASparse1");
-							fps.addTo(out);
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-@ 
-@<|WorkerFoldMAASparse1| constructor code@>=
-WorkerFoldMAASparse1::WorkerFoldMAASparse1(const FoldedStackContainer& container,
-										   const FSSparseTensor& ten,
-										   FGSTensor& outten, const IntSequence& c)
-	: cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle)) @+{}
-
-
-@ Here is the second implementation of sparse folded |multAndAdd|. It
-is pretty similar to implementation of
-|@<|UnfoldedStackContainer::multAndAddSparse2| code@>|. We make a
-dense folded |slice|, and then call folded |multAndAddStacks|, which
-multiplies all the combinations compatible with the slice.
-
-@<|FoldedStackContainer::multAndAddSparse2| code@>=
-void FoldedStackContainer::multAndAddSparse2(const FSSparseTensor& t,
-											 FGSTensor& out) const
-{
-	THREAD_GROUP@, gr;
-	FFSTensor dummy_f(0, numStacks(), t.dimen());
-	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
-		THREAD* worker = new WorkerFoldMAASparse2(*this, t, out, fi.getCoor());
-		gr.insert(worker);
-	}
-	gr.run();
-}
-
-@ Here we make a sparse slice first and then call |multAndAddStacks|
-if the slice is not empty. If the slice is really sparse, we call
-sparse version of |multAndAddStacks|. What means ``really sparse'' is
-given by |fill_threshold|. It is not tuned yet, a practice shows that
-it must be a really low number, since sparse |multAndAddStacks| is
-much slower than the dense version.
- 
-Further, we take only nonzero rows of the slice, and accordingly of
-the out tensor. We jump over zero initial rows and drop zero tailing
-rows.
-
-@<|WorkerFoldMAASparse2::operator()()| code@>=
-void WorkerFoldMAASparse2::operator()()
-{
-	GSSparseTensor slice(t, cont.getStackSizes(), coor,
-						 TensorDimens(cont.getStackSizes(), coor));
-	if (slice.getNumNonZero()) {
-		if (slice.getUnfoldIndexFillFactor() > FoldedStackContainer::fill_threshold) {
-			FGSTensor dense_slice(slice);
-			int r1 = slice.getFirstNonZeroRow();
-			int r2 = slice.getLastNonZeroRow();
-			FGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
-			FGSTensor out1(r1, r2-r1+1, out);
-			cont.multAndAddStacks(coor, dense_slice1, out1, &out);
-		} else
-			cont.multAndAddStacks(coor, slice, out, &out);
-	}
-}
-
-@ 
-@<|WorkerFoldMAASparse2| constructor code@>=
-WorkerFoldMAASparse2::WorkerFoldMAASparse2(const FoldedStackContainer& container,
-										   const FSSparseTensor& ten,
-										   FGSTensor& outten, const IntSequence& c)
-	: cont(container), t(ten), out(outten), coor(c)
-{}
-
-
-@ Here is the third implementation of the sparse folded
-|multAndAdd|. It is column-wise implementation, and thus is not a good
-candidate for the best performer.
-
-We go through all columns from the output. For each column we
-calculate folded |sumcol| which is a sum of all appropriate columns
-for all suitable equivalences. So we go through all suitable
-equivalences, for each we construct a |StackProduct| object and
-construct |IrregTensor| for a corresponding column of $z$. The
-|IrregTensor| is an abstraction for Kronecker multiplication of
-stacked columns of the two containers without zeros. Then the column
-is added to |sumcol|. Finally, the |sumcol| is multiplied by the
-sparse tensor.
-
-@<|FoldedStackContainer::multAndAddSparse3| code@>=
-void FoldedStackContainer::multAndAddSparse3(const FSSparseTensor& t,
-											 FGSTensor& out) const
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-	for (Tensor::index run = out.begin(); run != out.end(); ++run) {
-		Vector outcol(out, *run);
-		FRSingleTensor sumcol(t.nvar(), t.dimen());
-		sumcol.zeros();
-		for (EquivalenceSet::const_iterator it = eset.begin();
-			 it != eset.end(); ++it) {
-			if ((*it).numClasses() == t.dimen()) {
-				StackProduct<FGSTensor> sp(*this, *it, out.getSym());
-				IrregTensorHeader header(sp, run.getCoor());
-				IrregTensor irten(header);
-				irten.addTo(sumcol);
-			}
-		}
-		t.multColumnAndAdd(sumcol, outcol);
-	}
-}
-
-@ Here is the fourth implementation of sparse
-|FoldedStackContainer::multAndAdd|. It is almost equivalent to
-|multAndAddSparse2| with the exception that the |FPSTensor| as a
-result of a product of a slice and Kronecker product of the stack
-derivatives is calculated in the sparse fashion. For further details, see
-|@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and
-|@<|FPSTensor| sparse constructor@>|.
- 
-@<|FoldedStackContainer::multAndAddSparse4| code@>=
-void FoldedStackContainer::multAndAddSparse4(const FSSparseTensor& t, FGSTensor& out) const
-{
-	THREAD_GROUP@, gr;
-	FFSTensor dummy_f(0, numStacks(), t.dimen());
-	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
-		THREAD* worker = new WorkerFoldMAASparse4(*this, t, out, fi.getCoor());
-		gr.insert(worker);
-	}
-	gr.run();
-}
-
-@ The |WorkerFoldMAASparse4| is the same as |WorkerFoldMAASparse2|
-with the exception that we call a sparse version of
-|multAndAddStacks|.
-
-@<|WorkerFoldMAASparse4::operator()()| code@>=
-void WorkerFoldMAASparse4::operator()()
-{
-	GSSparseTensor slice(t, cont.getStackSizes(), coor,
-						 TensorDimens(cont.getStackSizes(), coor)); 
-	if (slice.getNumNonZero())
-		cont.multAndAddStacks(coor, slice, out, &out);
-}
-
-@ 
-@<|WorkerFoldMAASparse4| constructor code@>=
-WorkerFoldMAASparse4::WorkerFoldMAASparse4(const FoldedStackContainer& container,
-										   const FSSparseTensor& ten,
-										   FGSTensor& outten, const IntSequence& c)
-	: cont(container), t(ten), out(outten), coor(c)
-{}
-
-
-@ This is almost the same as
-|@<|UnfoldedStackContainer::multAndAddStacks| code@>|. The only
-difference is that we do not construct a |UPSTensor| from
-|KronProdStack|, but we construct partially folded permuted
-symmetry |FPSTensor|. Note that the tensor |g| must be unfolded
-in order to be able to multiply with unfolded rows of Kronecker
-product. However, columns of such a product are partially
-folded giving a rise to the |FPSTensor|.
-
-@<|FoldedStackContainer::multAndAddStacks| dense code@>=
-void FoldedStackContainer::multAndAddStacks(const IntSequence& coor,
-											const FGSTensor& g,
-											FGSTensor& out, const void* ad) const
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-
-	UGSTensor ug(g);
-	UFSTensor dummy_u(0, numStacks(), g.dimen());
-	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
-		IntSequence tmp(ui.getCoor());
-		tmp.sort();
-		if (tmp == coor) {
-			Permutation sort_per(ui.getCoor());
-			sort_per.inverse();
-			for (EquivalenceSet::const_iterator it = eset.begin();
-				 it != eset.end(); ++it) {
-				if ((*it).numClasses() == g.dimen()) {
-					StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
-					if (! sp.isZero(coor)) {
-						KronProdStack<FGSTensor> kp(sp, coor);
-						if (ug.getSym().isFull())
-							kp.optimizeOrder();
-						FPSTensor fps(out.getDims(), *it, sort_per, ug, kp);
-						{
-							SYNCHRO@, syn(ad, "multAndAddStacks");
-							fps.addTo(out);
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-@ This is almost the same as
-|@<|FoldedStackContainer::multAndAddStacks| dense code@>|. The only
-difference is that the Kronecker product of the stacks is multiplied
-with sparse slice |GSSparseTensor| (not dense slice |FGSTensor|). The
-multiplication is done in |@<|FPSTensor| sparse constructor@>|.
-
-@<|FoldedStackContainer::multAndAddStacks| sparse code@>=
-void FoldedStackContainer::multAndAddStacks(const IntSequence& coor,
-											const GSSparseTensor& g,
-											FGSTensor& out, const void* ad) const
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-	UFSTensor dummy_u(0, numStacks(), g.dimen());
-	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
-		IntSequence tmp(ui.getCoor());
-		tmp.sort();
-		if (tmp == coor) {
-			Permutation sort_per(ui.getCoor());
-			sort_per.inverse();
-			for (EquivalenceSet::const_iterator it = eset.begin();
-				 it != eset.end(); ++it) {
-				if ((*it).numClasses() == g.dimen()) {
-					StackProduct<FGSTensor> sp(*this, *it, sort_per, out.getSym());
-					if (! sp.isZero(coor)) {
-						KronProdStack<FGSTensor> kp(sp, coor);
-						FPSTensor fps(out.getDims(), *it, sort_per, g, kp);
-						{
-							SYNCHRO@, syn(ad, "multAndAddStacks");
-							fps.addTo(out);
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-@ Here we simply call either |multAndAddSparse1| or
-|multAndAddSparse2|. The first one allows for optimization of
-Kronecker products, so it seems to be more efficient.
-
-@<|UnfoldedStackContainer::multAndAdd| sparse code@>=
-void UnfoldedStackContainer::multAndAdd(const FSSparseTensor& t,
-										UGSTensor& out) const
-{
-	TL_RAISE_IF(t.nvar() != getAllSize(),
-				"Wrong number of variables of tensor for UnfoldedStackContainer::multAndAdd");
-	multAndAddSparse2(t, out);
-}
-
-@ Here we implement the formula for stacks for fully symmetric tensor
-scattered in a number of general symmetry tensors contained in a given
-container. The implementations is pretty the same as in
-|multAndAddSparse2| but we do not do the slices of sparse tensor, but
-only a lookup to the container.
-
-This means that we do not iterate through a dummy folded tensor to
-obtain folded coordinates of stacks, rather we iterate through all
-symmetries contained in the container and the coordinates of stacks
-are obtained as unfolded identity sequence via the symmetry. The
-reason of doing this is that we are unable to calculate symmetry from
-stack coordinates as easily as stack coordinates from the symmetry.
-
-@<|UnfoldedStackContainer::multAndAdd| dense code@>=
-void UnfoldedStackContainer::multAndAdd(int dim, const UGSContainer& c,
-										UGSTensor& out) const
-{
-	TL_RAISE_IF(c.num() != numStacks(),
-				"Wrong symmetry length of container for UnfoldedStackContainer::multAndAdd");
-
-	THREAD_GROUP@, gr;
-	SymmetrySet ss(dim, c.num());
-	for (symiterator si(ss); !si.isEnd(); ++si) {
-		if (c.check(*si)) {
-			THREAD* worker = new WorkerUnfoldMAADense(*this, *si, c, out);
-			gr.insert(worker);
-		}
-	}
-	gr.run();
-}
-
-@ 
-@<|WorkerUnfoldMAADense::operator()()| code@>=
-void WorkerUnfoldMAADense::operator()()
-{
-	Permutation iden(dense_cont.num());
-	IntSequence coor(sym, iden.getMap());
-	const UGSTensor* g = dense_cont.get(sym);
-	cont.multAndAddStacks(coor, *g, out, &out);
-}
-
-@ 
-@<|WorkerUnfoldMAADense| constructor code@>=
-WorkerUnfoldMAADense::WorkerUnfoldMAADense(const UnfoldedStackContainer& container,
-										   const Symmetry& s,
-										   const UGSContainer& dcontainer,
-										   UGSTensor& outten)
-	: cont(container), sym(s), dense_cont(dcontainer), out(outten)@+ {}
-
-
-@ Here we implement the formula for unfolded tensors. If, for instance,
-a coordinate $z$ of a tensor $\left[f_{z^2}\right]$ is partitioned as
-$z=[a, b]$, then we perform the following:
-$$
-\eqalign{
-\left[f_{z^2}\right]\left(\sum_c\left[\matrix{a_{c(x)}\cr b_{c(y)}}\right]
-\otimes\left[\matrix{a_{c(y)}\cr b_{c(y)}}\right]\right)=&
-\left[f_{aa}\right]\left(\sum_ca_{c(x)}\otimes a_{c(y)}\right)+
-\left[f_{ab}\right]\left(\sum_ca_{c(x)}\otimes b_{c(y)}\right)+\cr
-&\left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)+
- \left[f_{bb}\right]\left(\sum_cb_{c(x)}\otimes b_{c(y)}\right)\cr
-}
-$$
-This is exactly what happens here. The code is clear. It goes through
-all combinations of stacks, and each thread is responsible for
-operation for the slice corresponding to the combination of the stacks.
-
-@<|UnfoldedStackContainer::multAndAddSparse1| code@>=
-void UnfoldedStackContainer::multAndAddSparse1(const FSSparseTensor& t,
-											   UGSTensor& out) const
-{
-	THREAD_GROUP@, gr;
-	UFSTensor dummy(0, numStacks(), t.dimen());
-	for (Tensor::index ui = dummy.begin(); ui != dummy.end(); ++ui) {
-		THREAD* worker = new WorkerUnfoldMAASparse1(*this, t, out, ui.getCoor());
-		gr.insert(worker);
-	}
-	gr.run();
-}
-
-@ This does a step of |@<|UnfoldedStackContainer::multAndAddSparse1| code@>| for
-a given coordinates. First it makes the slice of the given stack coordinates.
-Then it multiplies everything what should be multiplied with the slice.
-That is it goes through all equivalences, creates |StackProduct|, then
-|KronProdStack|, which is added to |out|. So far everything is clear.
-
-However, we want to use optimized |KronProdAllOptim| to minimize
-a number of flops and memory needed in the Kronecker product. So we go
-through all permutations |per|, permute the coordinates to get
-|percoor|, go through all equivalences, and make |KronProdStack| and
-optimize it. The result of optimization is a permutation |oper|. Now,
-we multiply the Kronecker product with the slice, only if the slice
-has the same ordering of coordinates as the Kronecker product
-|KronProdStack|. However, it is not perfectly true. Since we go
-through {\bf all} permutations |per|, there might be two different
-permutations leading to the same ordering in |KronProdStack| and thus
-the same ordering in the optimized |KronProdStack|. The two cases
-would be counted twice, which is wrong. That is why we do not
-condition on $\hbox{coor}\circ\hbox{oper}\circ\hbox{per} =
-\hbox{coor}$, but we condition on
-$\hbox{oper}\circ\hbox{per}=\hbox{id}$. In this way, we rule out
-permutations |per| leading to the same ordering of stacks when
-applied on |coor|.
-
-todo: vertically narrow slice and out according to the fill in t.
-
-@<|WorkerUnfoldMAASparse1::operator()()| code@>=
-void WorkerUnfoldMAASparse1::operator()()
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-	const PermutationSet& pset = tls.pbundle->get(t.dimen());
-	Permutation iden(t.dimen());
-
-	UPSTensor slice(t, cont.getStackSizes(), coor,
-					PerTensorDimens(cont.getStackSizes(), coor));
-	for (int iper = 0; iper < pset.getNum(); iper++) {
-		const Permutation& per = pset.get(iper);
-		IntSequence percoor(coor.size());
-		per.apply(coor, percoor);
-		for (EquivalenceSet::const_iterator it = eset.begin();
-			 it != eset.end(); ++it) {
-			if ((*it).numClasses() == t.dimen()) {
-				StackProduct<UGSTensor> sp(cont, *it, out.getSym());
-				if (! sp.isZero(percoor)) {
-					KronProdStack<UGSTensor> kp(sp, percoor);
-					kp.optimizeOrder();
-					const Permutation& oper = kp.getPer();
-					if (Permutation(oper, per) == iden) {
-						UPSTensor ups(out.getDims(), *it, slice, kp);
-						{
-							SYNCHRO@, syn(&out, "WorkerUnfoldMAASparse1");
-							ups.addTo(out);
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-@ 
-@<|WorkerUnfoldMAASparse1| constructor code@>=
-WorkerUnfoldMAASparse1::WorkerUnfoldMAASparse1(const UnfoldedStackContainer& container,
-											   const FSSparseTensor& ten,
-											   UGSTensor& outten, const IntSequence& c)
-	: cont(container), t(ten), out(outten), coor(c), ebundle(*(tls.ebundle)) @+{}
-
-
-@ In here we implement the formula by a bit different way. We use the
-fact, using notation of |@<|UnfoldedStackContainer::multAndAddSparse2|
-code@>|, that
-$$
-\left[f_{ba}\right]\left(\sum_cb_{c(x)}\otimes a_{c(y)}\right)=
-\left[f_{ab}\right]\left(\sum_ca_{c(y)}\otimes b_{c(b)}\right)\cdot P
-$$
-where $P$ is a suitable permutation of columns. The permutation
-corresponds to (in this example) a swap of $a$ and $b$. An advantage
-of this approach is that we do not need |UPSTensor| for $f_{ba}$, and
-thus we decrease the number of needed slices.
-
-So we go through all folded indices of stack coordinates, then for
-each such index |fi| we make a slice and call |multAndAddStacks|. This
-goes through all corresponding unfolded indices to perform the
-formula. Each unsorted (unfold) index implies a sorting permutation
-|sort_per| which must be used to permute stacks in |StackProduct|, and
-permute equivalence classes when |UPSTensor| is formed. In this way
-the column permutation $P$ from the formula is factored to the
-permutation of |UPSTensor|.
- 
-@<|UnfoldedStackContainer::multAndAddSparse2| code@>=
-void UnfoldedStackContainer::multAndAddSparse2(const FSSparseTensor& t,
-											   UGSTensor& out) const
-{
-	THREAD_GROUP@, gr;
-	FFSTensor dummy_f(0, numStacks(), t.dimen());
-	for (Tensor::index fi = dummy_f.begin(); fi != dummy_f.end(); ++fi) {
-		THREAD* worker = new WorkerUnfoldMAASparse2(*this, t, out, fi.getCoor());
-		gr.insert(worker);
-	}
-	gr.run();
-}
-
-@ This does a step of |@<|UnfoldedStackContainer::multAndAddSparse2| code@>| for
-a given coordinates.
-
-todo: implement |multAndAddStacks| for sparse slice as
-|@<|FoldedStackContainer::multAndAddStacks| sparse code@>| and do this method as
-|@<|WorkerFoldMAASparse2::operator()()| code@>|.
-
-@<|WorkerUnfoldMAASparse2::operator()()| code@>=
-void WorkerUnfoldMAASparse2::operator()()
-{
-	GSSparseTensor slice(t, cont.getStackSizes(), coor,
-						 TensorDimens(cont.getStackSizes(), coor));
-	if (slice.getNumNonZero()) {
-		FGSTensor fslice(slice);
-		UGSTensor dense_slice(fslice);
-		int r1 = slice.getFirstNonZeroRow();
-		int r2 = slice.getLastNonZeroRow();
-		UGSTensor dense_slice1(r1, r2-r1+1, dense_slice);
-		UGSTensor out1(r1, r2-r1+1, out);
-		
-		cont.multAndAddStacks(coor, dense_slice1, out1, &out);
-	}
-}
-
-@ 
-@<|WorkerUnfoldMAASparse2| constructor code@>=
-WorkerUnfoldMAASparse2::WorkerUnfoldMAASparse2(const UnfoldedStackContainer& container,
-											   const FSSparseTensor& ten,
-											   UGSTensor& outten, const IntSequence& c)
-	: cont(container), t(ten), out(outten), coor(c) @+{}
-
-
-@ For a given unfolded coordinates of stacks |fi|, and appropriate
-tensor $g$, whose symmetry is a symmetry of |fi|, the method
-contributes to |out| all tensors in unfolded stack formula involving
-stacks chosen by |fi|.
-
-We go through all |ui| coordinates which yield |fi| after sorting. We
-construct a permutation |sort_per| which sorts |ui| to |fi|. We go
-through all appropriate equivalences, and construct |StackProduct|
-from equivalence classes permuted by |sort_per|, then |UPSTensor| with
-implied permutation of columns by the permuted equivalence by
-|sort_per|. The |UPSTensor| is then added to |out|.
-
-We cannot use here the optimized |KronProdStack|, since the symmetry
-of |UGSTensor& g| prescribes the ordering of the stacks. However, if
-|g| is fully symmetric, we can do the optimization harmlessly.
-
-@<|UnfoldedStackContainer::multAndAddStacks| code@>=
-void UnfoldedStackContainer::multAndAddStacks(const IntSequence& fi,
-											  const UGSTensor& g,
-											  UGSTensor& out, const void* ad) const
-{
-	const EquivalenceSet& eset = ebundle.get(out.dimen());
-
-	UFSTensor dummy_u(0, numStacks(), g.dimen());
-	for (Tensor::index ui = dummy_u.begin(); ui != dummy_u.end(); ++ui) {
-		IntSequence tmp(ui.getCoor());
-		tmp.sort();
-		if (tmp == fi) {
-			Permutation sort_per(ui.getCoor());
-			sort_per.inverse();
-			for (EquivalenceSet::const_iterator it = eset.begin();
-				 it != eset.end(); ++it) {
-				if ((*it).numClasses() == g.dimen()) {
-					StackProduct<UGSTensor> sp(*this, *it, sort_per, out.getSym());
-					if (! sp.isZero(fi)) {
-						KronProdStack<UGSTensor> kp(sp, fi);
-						if (g.getSym().isFull())
-							kp.optimizeOrder();
-						UPSTensor ups(out.getDims(), *it, sort_per, g, kp);
-						{
-							SYNCHRO@, syn(ad, "multAndAddStacks");
-							ups.addTo(out);
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-@ End of {\tt stack\_container.cpp} file.
--- a/dynare++/tl/cc/stack_container.hh
+++ b/dynare++/tl/cc/stack_container.hh
@ -0,0 +1,744 @@
+// Copyright 2004, Ondra Kamenik
+
+// Stack of containers.
+
+/* Here we develop abstractions for stacked containers of tensors. For
+   instance, in perturbation methods for SDGE we need function
+   $$z(y,u,u',\sigma)=\left[\matrix{G(y,u,u',\sigma)\cr g(y,u,\sigma)\cr y\cr u}\right]$$
+   and we need to calculate one step of Faa Di Bruno formula
+   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_l}=\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
+   \sum_{c\in M_{l,k}}\prod_{m=1}^l\left[z_{s^k(c_m)}\right]^{\beta_m}_{c_m(\alpha)}$$
+   where we have containers for derivatives of $G$ and $g$.
+
+   The main purpose of this file is to define abstractions for stack of
+   containers and possibly raw variables, and code |multAndAdd| method
+   calculating (one step of) the Faa Di Bruno formula for folded and
+   unfolded tensors. Note also, that tensors $\left[f_{z^l}\right]$ are
+   sparse.
+
+   The abstractions are built as follows. At the top, there is an
+   interface describing stack of columns. It contains pure virtual
+   methods needed for manipulating the container stack. For technical
+   reasons it is a template. Both versions (folded, and unfolded) provide
+   all interface necessary for implementation of |multAndAdd|. The second
+   way of inheritance is first general implementation of the interface
+   |StackContainer|, and then specific (|ZContainer| for our specific
+   $z$). The only method which is virtual also after |StackContainer| is
+   |getType|, which is implemented in the specialization and determines
+   behaviour of the stack. The complete classes are obtained by
+   inheriting from the both branches, as it is drawn below:
+
+   \def\drawpenta#1#2#3#4#5{%
+   \hbox{$
+   \hgrid=40pt\vgrid=20pt%
+   \sarrowlength=25pt%
+   \gridcommdiag{%
+   &&\hbox{#1}&&\cr
+   &\llap{virtual}\arrow(-1,-1)&&\arrow(1,-1)\rlap{virtual}&\cr
+   \hbox{#2}&&&&\hbox{#3}\cr
+   \arrow(0,-1)&&&&\cr
+   \hbox{#4}&&&
+   {\multiply\sarrowlength by 63\divide\sarrowlength by 50\arrow(-1,-2)}&\cr
+   &\arrow(1,-1)&&&\cr
+   &&\hbox{#5}&&\cr
+   }$}}
+
+   \centerline{
+   \drawpenta{|StackContainerInterface<FGSTensor>|}{|StackContainer<FGSTensor>|}%
+   {|FoldedStackContainer|}{|ZContainer<FGSTensor>|}{|FoldedZContainer|}
+   }
+
+   \centerline{
+   \drawpenta{|StackContainerInterface<UGSTensor>|}{|StackContainer<UGSTensor>|}%
+   {|UnfoldedStackContainer|}{|ZContainer<UGSTensor>|}{|UnfoldedZContainer|}
+   }
+
+   We have also two supporting classes |StackProduct| and |KronProdStack|
+   and a number of worker classes used as threads. */
+
+#ifndef STACK_CONTAINER_H
+#define STACK_CONTAINER_H
+
+#include "int_sequence.hh"
+#include "equivalence.hh"
+#include "tl_static.hh"
+#include "t_container.hh"
+#include "kron_prod.hh"
+#include "permutation.hh"
+#include "sthread.hh"
+
+/* Here is the general interface to stack container. The subclasses
+   maintain |IntSequence| of stack sizes, i.e. size of $G$, $g$, $y$, and
+   $u$. Then a convenience |IntSequence| of stack offsets. Then vector of
+   pointers to containers, in our example $G$, and $g$.
+
+   A non-virtual subclass must implement |getType| which determines
+   dependency of stack items on symmetries. There are three possible types
+   for a symmetry. Either the stack item derivative wrt. the symmetry is
+   a matrix, or a unit matrix, or zero.
+
+   Method |isZero| returns true if the derivative of a given stack item
+   wrt. to given symmetry is zero as defined by |getType| or the
+   derivative is not present in the container. In this way, we can
+   implement the formula conditional some of the tensors are zero, which
+   is not true (they are only missing).
+
+   Method |createPackedColumn| returns a vector of stack derivatives with
+   respect to the given symmetry and of the given column, where all zeros
+   from zero types, or unit matrices are deleted. See {\tt
+   kron\_prod2.hweb} for explanation. */
+
+template <class _Ttype>
+class StackContainerInterface
+{
+public:
+  typedef TensorContainer<_Ttype> _Ctype;
+  typedef enum { matrix, unit, zero} itype;
+protected:
+  const EquivalenceBundle &ebundle;
+public:
+  StackContainerInterface()
+    : ebundle(*(tls.ebundle))
+  {
+  }
+  virtual ~StackContainerInterface()
+  {
+  }
+  virtual const IntSequence&getStackSizes() const = 0;
+  virtual IntSequence&getStackSizes() = 0;
+  virtual const IntSequence&getStackOffsets() const = 0;
+  virtual IntSequence&getStackOffsets() = 0;
+  virtual int numConts() const = 0;
+  virtual const _Ctype *getCont(int i) const = 0;
+  virtual itype getType(int i, const Symmetry &s) const = 0;
+  virtual int numStacks() const = 0;
+  virtual bool isZero(int i, const Symmetry &s) const = 0;
+  virtual const _Ttype *getMatrix(int i, const Symmetry &s) const = 0;
+  virtual int getLengthOfMatrixStacks(const Symmetry &s) const = 0;
+  virtual int getUnitPos(const Symmetry &s) const = 0;
+  virtual Vector *createPackedColumn(const Symmetry &s,
+                                     const IntSequence &coor,
+                                     int &iu) const = 0;
+  int
+  getAllSize() const
+  {
+    return getStackOffsets()[numStacks()-1]
+      + getStackSizes()[numStacks()-1];
+  }
+};
+
+/* Here is |StackContainer|, which implements almost all interface
+   |StackContainerInterface| but one method |getType| which is left for
+   implementation to specializations. */
+
+template <class _Ttype>
+class StackContainer : virtual public StackContainerInterface<_Ttype>
+{
+public:
+  typedef StackContainerInterface<_Ttype> _Stype;
+  typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
+  typedef typename StackContainerInterface<_Ttype>::itype itype;
+protected:
+  int num_conts;
+  IntSequence stack_sizes;
+  IntSequence stack_offsets;
+  const _Ctype **const conts;
+public:
+  StackContainer(int ns, int nc)
+    : num_conts(nc), stack_sizes(ns, 0), stack_offsets(ns, 0),
+      conts(new const _Ctype *[nc])
+  {
+  }
+  virtual ~StackContainer()
+  {
+    delete [] conts;
+  }
+  const IntSequence &
+  getStackSizes() const
+  {
+    return stack_sizes;
+  }
+  IntSequence &
+  getStackSizes()
+  {
+    return stack_sizes;
+  }
+  const IntSequence &
+  getStackOffsets() const
+  {
+    return stack_offsets;
+  }
+  IntSequence &
+  getStackOffsets()
+  {
+    return stack_offsets;
+  }
+  int
+  numConts() const
+  {
+    return num_conts;
+  }
+  const _Ctype *
+  getCont(int i) const
+  {
+    return conts[i];
+  }
+  virtual itype getType(int i, const Symmetry &s) const = 0;
+  int
+  numStacks() const
+  {
+    return stack_sizes.size();
+  }
+  bool
+  isZero(int i, const Symmetry &s) const
+  {
+    TL_RAISE_IF(i < 0 || i >= numStacks(),
+                "Wrong index to stack in StackContainer::isZero.");
+    return (getType(i, s) == _Stype::zero
+            || (getType(i, s) == _Stype::matrix && !conts[i]->check(s)));
+  }
+
+  const _Ttype *
+  getMatrix(int i, const Symmetry &s) const
+  {
+    TL_RAISE_IF(isZero(i, s) || getType(i, s) == _Stype::unit,
+                "Matrix is not returned in StackContainer::getMatrix");
+    return conts[i]->get(s);
+  }
+
+  int
+  getLengthOfMatrixStacks(const Symmetry &s) const
+  {
+    int res = 0;
+    int i = 0;
+    while (i < numStacks() && getType(i, s) == _Stype::matrix)
+      res += stack_sizes[i++];
+    return res;
+  }
+
+  int
+  getUnitPos(const Symmetry &s) const
+  {
+    if (s.dimen() != 1)
+      return -1;
+    int i = numStacks()-1;
+    while (i >= 0 && getType(i, s) != _Stype::unit)
+      i--;
+    return i;
+  }
+
+  Vector *
+  createPackedColumn(const Symmetry &s,
+                     const IntSequence &coor, int &iu) const
+  {
+    TL_RAISE_IF(s.dimen() != coor.size(),
+                "Incompatible coordinates for symmetry in StackContainer::createPackedColumn");
+
+    int len = getLengthOfMatrixStacks(s);
+    iu = -1;
+    int i = 0;
+    if (-1 != (i = getUnitPos(s)))
+      {
+        iu = stack_offsets[i] + coor[0];
+        len++;
+      }
+
+    Vector *res = new Vector(len);
+    i = 0;
+    while (i < numStacks() && getType(i, s) == _Stype::matrix)
+      {
+        const _Ttype *t = getMatrix(i, s);
+        Tensor::index ind(t, coor);
+        Vector subres(*res, stack_offsets[i], stack_sizes[i]);
+        subres = ConstVector(ConstGeneralMatrix(*t), *ind);
+        i++;
+      }
+    if (iu != -1)
+      (*res)[len-1] = 1;
+
+    return res;
+  }
+
+protected:
+  void
+  calculateOffsets()
+  {
+    stack_offsets[0] = 0;
+    for (int i = 1; i < stack_offsets.size(); i++)
+      stack_offsets[i] = stack_offsets[i-1] + stack_sizes[i-1];
+  }
+};
+
+class WorkerFoldMAADense;
+class WorkerFoldMAASparse1;
+class WorkerFoldMAASparse2;
+class WorkerFoldMAASparse4;
+class FoldedStackContainer : virtual public StackContainerInterface<FGSTensor>
+{
+  friend class WorkerFoldMAADense;
+  friend class WorkerFoldMAASparse1;
+  friend class WorkerFoldMAASparse2;
+  friend class WorkerFoldMAASparse4;
+public:
+  static double fill_threshold;
+  void
+  multAndAdd(int dim, const TensorContainer<FSSparseTensor> &c,
+             FGSTensor &out) const
+  {
+    if (c.check(Symmetry(dim)))
+      multAndAdd(*(c.get(Symmetry(dim))), out);
+  }
+  void multAndAdd(const FSSparseTensor &t, FGSTensor &out) const;
+  void multAndAdd(int dim, const FGSContainer &c, FGSTensor &out) const;
+protected:
+  void multAndAddSparse1(const FSSparseTensor &t, FGSTensor &out) const;
+  void multAndAddSparse2(const FSSparseTensor &t, FGSTensor &out) const;
+  void multAndAddSparse3(const FSSparseTensor &t, FGSTensor &out) const;
+  void multAndAddSparse4(const FSSparseTensor &t, FGSTensor &out) const;
+  void multAndAddStacks(const IntSequence &fi, const FGSTensor &g,
+                        FGSTensor &out, const void *ad) const;
+  void multAndAddStacks(const IntSequence &fi, const GSSparseTensor &g,
+                        FGSTensor &out, const void *ad) const;
+};
+
+class WorkerUnfoldMAADense;
+class WorkerUnfoldMAASparse1;
+class WorkerUnfoldMAASparse2;
+class UnfoldedStackContainer : virtual public StackContainerInterface<UGSTensor>
+{
+  friend class WorkerUnfoldMAADense;
+  friend class WorkerUnfoldMAASparse1;
+  friend class WorkerUnfoldMAASparse2;
+public:
+  static double fill_threshold;
+  void
+  multAndAdd(int dim, const TensorContainer<FSSparseTensor> &c,
+             UGSTensor &out) const
+  {
+    if (c.check(Symmetry(dim)))
+      multAndAdd(*(c.get(Symmetry(dim))), out);
+  }
+  void multAndAdd(const FSSparseTensor &t, UGSTensor &out) const;
+  void multAndAdd(int dim, const UGSContainer &c, UGSTensor &out) const;
+protected:
+  void multAndAddSparse1(const FSSparseTensor &t, UGSTensor &out) const;
+  void multAndAddSparse2(const FSSparseTensor &t, UGSTensor &out) const;
+  void multAndAddStacks(const IntSequence &fi, const UGSTensor &g,
+                        UGSTensor &out, const void *ad) const;
+};
+
+/* Here is the specialization of the |StackContainer|. We implement
+   here the $z$ needed in SDGE context. We implement |getType| and define
+   a constructor feeding the data and sizes.
+
+   Note that it has two containers, the first is dependent on four
+   variables $G(y^*,u,u',\sigma)$, and the second dependent on three
+   variables $g(y^*,u,\sigma)$. So that we would be able to stack them,
+   we make the second container $g$ be dependent on four variables, the
+   third being $u'$ a dummy and always returning zero if dimension of
+   $u'$ is positive. */
+
+template <class _Ttype>
+class ZContainer : public StackContainer<_Ttype>
+{
+public:
+  typedef StackContainer<_Ttype> _Tparent;
+  typedef StackContainerInterface<_Ttype> _Stype;
+  typedef typename _Tparent::_Ctype _Ctype;
+  typedef typename _Tparent::itype itype;
+  ZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
+             int ny, int nu)
+    : _Tparent(4, 2)
+  {
+    _Tparent::stack_sizes[0] = ngss; _Tparent::stack_sizes[1] = ng;
+    _Tparent::stack_sizes[2] = ny; _Tparent::stack_sizes[3] = nu;
+    _Tparent::conts[0] = gss;
+    _Tparent::conts[1] = g;
+    _Tparent::calculateOffsets();
+  }
+
+  /* Here we say, what happens if we derive $z$. recall the top of the
+     file, how $z$ looks, and code is clear. */
+
+  itype
+  getType(int i, const Symmetry &s) const
+  {
+    if (i == 0)
+      return _Stype::matrix;
+    if (i == 1)
+      if (s[2] > 0)
+        return _Stype::zero;
+      else
+        return _Stype::matrix;
+    if (i == 2)
+      if (s == Symmetry(1, 0, 0, 0))
+        return _Stype::unit;
+      else
+        return _Stype::zero;
+    if (i == 3)
+      if (s == Symmetry(0, 1, 0, 0))
+        return _Stype::unit;
+      else
+        return _Stype::zero;
+
+    TL_RAISE("Wrong stack index in ZContainer::getType");
+    return _Stype::zero;
+  }
+
+};
+
+class FoldedZContainer : public ZContainer<FGSTensor>,
+                         public FoldedStackContainer
+{
+public:
+  typedef TensorContainer<FGSTensor> _Ctype;
+  FoldedZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
+                   int ny, int nu)
+    : ZContainer<FGSTensor>(gss, ngss, g, ng, ny, nu)
+  {
+  }
+};
+
+class UnfoldedZContainer : public ZContainer<UGSTensor>,
+                           public UnfoldedStackContainer
+{
+public:
+  typedef TensorContainer<UGSTensor> _Ctype;
+  UnfoldedZContainer(const _Ctype *gss, int ngss, const _Ctype *g, int ng,
+                     int ny, int nu)
+    : ZContainer<UGSTensor>(gss, ngss, g, ng, ny, nu)
+  {
+  }
+};
+
+/* Here we have another specialization of container used in context of
+   SDGE. We define a container for
+   $$G(y,u,u',\sigma)=g^{**}(g^*(y,u,\sigma),u',\sigma)$$
+
+   For some reason, the symmetry of $g^{**}$ has length $4$ although it
+   is really dependent on three variables. (To now the reason, consult
+   |@<|ZContainer| class declaration@>|.) So, it has four stack, the
+   third one is dummy, and always returns zero. The first stack
+   corresponds to a container of $g^*$. */
+
+template <class _Ttype>
+class GContainer : public StackContainer<_Ttype>
+{
+public:
+  typedef StackContainer<_Ttype> _Tparent;
+  typedef StackContainerInterface<_Ttype> _Stype;
+  typedef typename StackContainer<_Ttype>::_Ctype _Ctype;
+  typedef typename StackContainer<_Ttype>::itype itype;
+  GContainer(const _Ctype *gs, int ngs, int nu)
+    : StackContainer<_Ttype>(4, 1)
+  {
+    _Tparent::stack_sizes[0] = ngs; _Tparent::stack_sizes[1] = nu;
+    _Tparent::stack_sizes[2] = nu; _Tparent::stack_sizes[3] = 1;
+    _Tparent::conts[0] = gs;
+    _Tparent::calculateOffsets();
+  }
+
+  /* Here we define the dependencies in
+     $g^{**}(g^*(y,u,\sigma),u',\sigma)$. Also note, that first derivative
+     of $g^*$ wrt $\sigma$ is always zero, so we also add this
+     information. */
+
+  itype
+  getType(int i, const Symmetry &s) const
+  {
+    if (i == 0)
+      if (s[2] > 0 || s == Symmetry(0, 0, 0, 1))
+        return _Stype::zero;
+      else
+        return _Stype::matrix;
+    if (i == 1)
+      if (s == Symmetry(0, 0, 1, 0))
+        return _Stype::unit;
+      else
+        return _Stype::zero;
+    if (i == 2)
+      return _Stype::zero;
+    if (i == 3)
+      if (s == Symmetry(0, 0, 0, 1))
+        return _Stype::unit;
+      else
+        return _Stype::zero;
+
+    TL_RAISE("Wrong stack index in GContainer::getType");
+    return _Stype::zero;
+  }
+
+};
+
+class FoldedGContainer : public GContainer<FGSTensor>,
+                         public FoldedStackContainer
+{
+public:
+  typedef TensorContainer<FGSTensor> _Ctype;
+  FoldedGContainer(const _Ctype *gs, int ngs, int nu)
+    : GContainer<FGSTensor>(gs, ngs, nu)
+  {
+  }
+};
+
+class UnfoldedGContainer : public GContainer<UGSTensor>,
+                           public UnfoldedStackContainer
+{
+public:
+  typedef TensorContainer<UGSTensor> _Ctype;
+  UnfoldedGContainer(const _Ctype *gs, int ngs, int nu)
+    : GContainer<UGSTensor>(gs, ngs, nu)
+  {
+  }
+};
+
+/* Here we have a support class for product of |StackContainer|s. It
+   only adds a dimension to |StackContainer|. It selects the symmetries
+   according to equivalence classes passed to the constructor. The
+   equivalence can have permuted classes by some given
+   permutation. Nothing else is interesting. */
+
+template <class _Ttype>
+class StackProduct
+{
+public:
+  typedef StackContainerInterface<_Ttype> _Stype;
+  typedef typename _Stype::_Ctype _Ctype;
+  typedef typename _Stype::itype itype;
+protected:
+  const _Stype &stack_cont;
+  InducedSymmetries syms;
+  Permutation per;
+public:
+  StackProduct(const _Stype &sc, const Equivalence &e,
+               const Symmetry &os)
+    : stack_cont(sc), syms(e, os), per(e)
+  {
+  }
+  StackProduct(const _Stype &sc, const Equivalence &e,
+               const Permutation &p, const Symmetry &os)
+    : stack_cont(sc), syms(e, p, os), per(e, p)
+  {
+  }
+  int
+  dimen() const
+  {
+    return syms.size();
+  }
+  int
+  getAllSize() const
+  {
+    return stack_cont.getAllSize();
+  }
+  const Symmetry &
+  getProdSym(int ip) const
+  {
+    return syms[ip];
+  }
+  bool
+  isZero(const IntSequence &istacks) const
+  {
+    TL_RAISE_IF(istacks.size() != dimen(),
+                "Wrong istacks coordinates for StackProduct::isZero");
+
+    bool res = false;
+    int i = 0;
+    while (i < dimen() && !(res = stack_cont.isZero(istacks[i], syms[i])))
+      i++;
+    return res;
+  }
+
+  itype
+  getType(int is, int ip) const
+  {
+    TL_RAISE_IF(is < 0 || is >= stack_cont.numStacks(),
+                "Wrong index to stack in StackProduct::getType");
+    TL_RAISE_IF(ip < 0 || ip >= dimen(),
+                "Wrong index to stack container in StackProduct::getType");
+    return stack_cont.getType(is, syms[ip]);
+  }
+
+  const _Ttype *
+  getMatrix(int is, int ip) const
+  {
+    return stack_cont.getMatrix(is, syms[ip]);
+  }
+
+  void
+  createPackedColumns(const IntSequence &coor,
+                      Vector **vs, IntSequence &iu) const
+  {
+    TL_RAISE_IF(iu.size() != dimen(),
+                "Wrong storage length for unit flags in StackProduct::createPackedColumn");
+    TL_RAISE_IF(coor.size() != per.size(),
+                "Wrong size of index coor in StackProduct::createPackedColumn");
+    IntSequence perindex(coor.size());
+    per.apply(coor, perindex);
+    int off = 0;
+    for (int i = 0; i < dimen(); i++)
+      {
+        IntSequence percoor(perindex, off, syms[i].dimen() + off);
+        vs[i] = stack_cont.createPackedColumn(syms[i], percoor, iu[i]);
+        off += syms[i].dimen();
+      }
+  }
+
+  int
+  getSize(int is) const
+  {
+    return stack_cont.getStackSizes()[is];
+  }
+
+  int
+  numMatrices(const IntSequence &istacks) const
+  {
+    TL_RAISE_IF(istacks.size() != dimen(),
+                "Wrong size of stack coordinates in StackContainer::numMatrices");
+    int ret = 0;
+    int ip = 0;
+    while (ip < dimen() && getType(istacks[ip], ip) == _Stype::matrix)
+      {
+        ret++;
+        ip++;
+      }
+    return ret;
+  }
+};
+
+/* Here we only inherit from Kronecker product |KronProdAllOptim|, only to
+   allow for a constructor constructing from |StackProduct|. */
+
+template <class _Ttype>
+class KronProdStack : public KronProdAllOptim
+{
+public:
+  typedef StackProduct<_Ttype> _Ptype;
+  typedef StackContainerInterface<_Ttype> _Stype;
+
+  /* Here we construct |KronProdAllOptim| from |StackContainer| and given
+     selections of stack items from stack containers in the product. We
+     only decide whether to insert matrix, or unit matrix.
+
+     At this point, we do not call |KronProdAllOptim::optimizeOrder|, so
+     the |KronProdStack| behaves like |KronProdAll| (i.e. no optimization
+     is done). */
+
+  KronProdStack(const _Ptype &sp, const IntSequence &istack)
+    : KronProdAllOptim(sp.dimen())
+  {
+    TL_RAISE_IF(sp.dimen() != istack.size(),
+                "Wrong stack product dimension for KronProdStack constructor");
+
+    for (int i = 0; i < sp.dimen(); i++)
+      {
+        TL_RAISE_IF(sp.getType(istack[i], i) == _Stype::zero,
+                    "Attempt to construct KronProdStack from zero matrix");
+        if (sp.getType(istack[i], i) == _Stype::unit)
+          setUnit(i, sp.getSize(istack[i]));
+        if (sp.getType(istack[i], i) == _Stype::matrix)
+          {
+            const TwoDMatrix *m = sp.getMatrix(istack[i], i);
+            TL_RAISE_IF(m->nrows() != sp.getSize(istack[i]),
+                        "Wrong size of returned matrix in KronProdStack constructor");
+            setMat(i, *m);
+          }
+      }
+  }
+};
+
+class WorkerFoldMAADense : public THREAD
+{
+  const FoldedStackContainer &cont;
+  Symmetry sym;
+  const FGSContainer &dense_cont;
+  FGSTensor &out;
+public:
+  WorkerFoldMAADense(const FoldedStackContainer &container,
+                     const Symmetry &s,
+                     const FGSContainer &dcontainer,
+                     FGSTensor &outten);
+  void operator()();
+};
+
+class WorkerFoldMAASparse1 : public THREAD
+{
+  const FoldedStackContainer &cont;
+  const FSSparseTensor &t;
+  FGSTensor &out;
+  IntSequence coor;
+  const EquivalenceBundle &ebundle;
+public:
+  WorkerFoldMAASparse1(const FoldedStackContainer &container,
+                       const FSSparseTensor &ten,
+                       FGSTensor &outten, const IntSequence &c);
+  void operator()();
+};
+
+class WorkerFoldMAASparse2 : public THREAD
+{
+  const FoldedStackContainer &cont;
+  const FSSparseTensor &t;
+  FGSTensor &out;
+  IntSequence coor;
+public:
+  WorkerFoldMAASparse2(const FoldedStackContainer &container,
+                       const FSSparseTensor &ten,
+                       FGSTensor &outten, const IntSequence &c);
+  void operator()();
+};
+
+class WorkerFoldMAASparse4 : public THREAD
+{
+  const FoldedStackContainer &cont;
+  const FSSparseTensor &t;
+  FGSTensor &out;
+  IntSequence coor;
+public:
+  WorkerFoldMAASparse4(const FoldedStackContainer &container,
+                       const FSSparseTensor &ten,
+                       FGSTensor &outten, const IntSequence &c);
+  void operator()();
+};
+
+class WorkerUnfoldMAADense : public THREAD
+{
+  const UnfoldedStackContainer &cont;
+  Symmetry sym;
+  const UGSContainer &dense_cont;
+  UGSTensor &out;
+public:
+  WorkerUnfoldMAADense(const UnfoldedStackContainer &container,
+                       const Symmetry &s,
+                       const UGSContainer &dcontainer,
+                       UGSTensor &outten);
+  void operator()();
+};
+
+class WorkerUnfoldMAASparse1 : public THREAD
+{
+  const UnfoldedStackContainer &cont;
+  const FSSparseTensor &t;
+  UGSTensor &out;
+  IntSequence coor;
+  const EquivalenceBundle &ebundle;
+public:
+  WorkerUnfoldMAASparse1(const UnfoldedStackContainer &container,
+                         const FSSparseTensor &ten,
+                         UGSTensor &outten, const IntSequence &c);
+  void operator()();
+};
+
+class WorkerUnfoldMAASparse2 : public THREAD
+{
+  const UnfoldedStackContainer &cont;
+  const FSSparseTensor &t;
+  UGSTensor &out;
+  IntSequence coor;
+public:
+  WorkerUnfoldMAASparse2(const UnfoldedStackContainer &container,
+                         const FSSparseTensor &ten,
+                         UGSTensor &outten, const IntSequence &c);
+  void operator()();
+};
+
+#endif
--- a/dynare++/tl/cc/stack_container.hweb
+++ b/dynare++/tl/cc/stack_container.hweb
@ -1,771 +0,0 @@
-@q $Id: stack_container.hweb 745 2006-05-09 13:20:00Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Stack of containers. Start of {\tt stack\_container.h} file.
-
-Here we develop abstractions for stacked containers of tensors. For
-instance, in perturbation methods for SDGE we need function
-$$z(y,u,u',\sigma)=\left[\matrix{G(y,u,u',\sigma)\cr g(y,u,\sigma)\cr y\cr u}\right]$$
-and we need to calculate one step of Faa Di Bruno formula
-$$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_l}=\left[f_{z^l}\right]_{\beta_1\ldots\beta_l}
-\sum_{c\in M_{l,k}}\prod_{m=1}^l\left[z_{s^k(c_m)}\right]^{\beta_m}_{c_m(\alpha)}$$
-where we have containers for derivatives of $G$ and $g$.
-
-The main purpose of this file is to define abstractions for stack of
-containers and possibly raw variables, and code |multAndAdd| method
-calculating (one step of) the Faa Di Bruno formula for folded and
-unfolded tensors. Note also, that tensors $\left[f_{z^l}\right]$ are
-sparse.
-
-The abstractions are built as follows. At the top, there is an
-interface describing stack of columns. It contains pure virtual
-methods needed for manipulating the container stack. For technical
-reasons it is a template. Both versions (folded, and unfolded) provide
-all interface necessary for implementation of |multAndAdd|. The second
-way of inheritance is first general implementation of the interface
-|StackContainer|, and then specific (|ZContainer| for our specific
-$z$). The only method which is virtual also after |StackContainer| is
-|getType|, which is implemented in the specialization and determines
-behaviour of the stack. The complete classes are obtained by
-inheriting from the both branches, as it is drawn below:
-
-\def\drawpenta#1#2#3#4#5{%
-\hbox{$
-\hgrid=40pt\vgrid=20pt%
-\sarrowlength=25pt%
-\gridcommdiag{%
-&&\hbox{#1}&&\cr
-&\llap{virtual}\arrow(-1,-1)&&\arrow(1,-1)\rlap{virtual}&\cr
-\hbox{#2}&&&&\hbox{#3}\cr
-\arrow(0,-1)&&&&\cr
-\hbox{#4}&&&
-{\multiply\sarrowlength by 63\divide\sarrowlength by 50\arrow(-1,-2)}&\cr
-&\arrow(1,-1)&&&\cr
-&&\hbox{#5}&&\cr
-}$}}
-
-\centerline{
-\drawpenta{|StackContainerInterface<FGSTensor>|}{|StackContainer<FGSTensor>|}%
-	      {|FoldedStackContainer|}{|ZContainer<FGSTensor>|}{|FoldedZContainer|}
-}
-
-\centerline{
-\drawpenta{|StackContainerInterface<UGSTensor>|}{|StackContainer<UGSTensor>|}%
-	      {|UnfoldedStackContainer|}{|ZContainer<UGSTensor>|}{|UnfoldedZContainer|}
-}
-
-We have also two supporting classes |StackProduct| and |KronProdStack|
-and a number of worker classes used as threads.
-
-@s StackContainerInterface int
-@s StackContainer int
-@s ZContainer int
-@s FoldedStackContainer int
-@s UnfoldedStackContainer int
-@s FoldedZContainer int
-@s UnfoldedZContainer int
-@s WorkerFoldMAADense int
-@s WorkerFoldMAASparse1 int
-@s WorkerFoldMAASparse2 int
-@s WorkerFoldMAASparse4 int
-@s WorkerUnfoldMAADense int
-@s WorkerUnfoldMAASparse1 int
-@s WorkerUnfoldMAASparse2 int
-@s GContainer int
-@s FoldedGContainer int
-@s UnfoldedGContainer int
-@s StackProduct int
-@s KronProdStack int
-
-@c
-#ifndef STACK_CONTAINER_H
-#define STACK_CONTAINER_H
-
-#include "int_sequence.h"
-#include "equivalence.h"
-#include "tl_static.h"
-#include "t_container.h"
-#include "kron_prod.h"
-#include "permutation.h"
-#include "sthread.h"
-
-@<|StackContainerInterface| class declaration@>;
-@<|StackContainer| class declaration@>;
-@<|FoldedStackContainer| class declaration@>;
-@<|UnfoldedStackContainer| class declaration@>;
-@<|ZContainer| class declaration@>;
-@<|FoldedZContainer| class declaration@>;
-@<|UnfoldedZContainer| class declaration@>;
-@<|GContainer| class declaration@>;
-@<|FoldedGContainer| class declaration@>;
-@<|UnfoldedGContainer| class declaration@>;
-@<|StackProduct| class declaration@>;
-@<|KronProdStack| class declaration@>;
-@<|WorkerFoldMAADense| class declaration@>;
-@<|WorkerFoldMAASparse1| class declaration@>;
-@<|WorkerFoldMAASparse2| class declaration@>;
-@<|WorkerFoldMAASparse4| class declaration@>;
-@<|WorkerUnfoldMAADense| class declaration@>;
-@<|WorkerUnfoldMAASparse1| class declaration@>;
-@<|WorkerUnfoldMAASparse2| class declaration@>;
-
-#endif
-
-@ Here is the general interface to stack container. The subclasses
-maintain |IntSequence| of stack sizes, i.e. size of $G$, $g$, $y$, and
-$u$. Then a convenience |IntSequence| of stack offsets. Then vector of
-pointers to containers, in our example $G$, and $g$.
-
-A non-virtual subclass must implement |getType| which determines
-dependency of stack items on symmetries. There are three possible types
-for a symmetry. Either the stack item derivative wrt. the symmetry is
-a matrix, or a unit matrix, or zero.
-
-Method |isZero| returns true if the derivative of a given stack item
-wrt. to given symmetry is zero as defined by |getType| or the
-derivative is not present in the container. In this way, we can
-implement the formula conditional some of the tensors are zero, which
-is not true (they are only missing).
-
-Method |createPackedColumn| returns a vector of stack derivatives with
-respect to the given symmetry and of the given column, where all zeros
-from zero types, or unit matrices are deleted. See {\tt
-kron\_prod2.hweb} for explanation.
-
-@<|StackContainerInterface| class declaration@>=
-template <class _Ttype>@;
-class StackContainerInterface {
-public:@;
-	typedef TensorContainer<_Ttype> _Ctype;
-	typedef enum {@+ matrix, unit, zero@+} itype;
-protected:@;
-	const EquivalenceBundle& ebundle;
-public:@;
-	StackContainerInterface()
-		: ebundle(*(tls.ebundle))@+ {}
-	virtual ~StackContainerInterface()@+ {}
-	virtual const IntSequence& getStackSizes() const =0;
-	virtual IntSequence& getStackSizes() =0;
-	virtual const IntSequence& getStackOffsets() const =0;
-	virtual IntSequence& getStackOffsets() =0;
-	virtual int numConts() const =0;
-	virtual const _Ctype* getCont(int i) const =0;
-	virtual itype getType(int i, const Symmetry& s) const =0;
-	virtual int numStacks() const =0;
-	virtual bool isZero(int i, const Symmetry& s) const =0;
-	virtual const _Ttype* getMatrix(int i, const Symmetry& s) const =0;
-	virtual int getLengthOfMatrixStacks(const Symmetry& s) const =0;
-	virtual int getUnitPos(const Symmetry& s) const =0;
-	virtual Vector* createPackedColumn(const Symmetry& s,
-									   const IntSequence& coor,
-									   int& iu) const =0;
-	int getAllSize() const
-		{@+ return getStackOffsets()[numStacks()-1]
-			 + getStackSizes()[numStacks()-1];@+}
-};
-
-@ Here is |StackContainer|, which implements almost all interface
-|StackContainerInterface| but one method |getType| which is left for
-implementation to specializations.
-
-@<|StackContainer| class declaration@>=
-template <class _Ttype>@;
-class StackContainer : virtual public StackContainerInterface<_Ttype> {
-public:@;
-	typedef StackContainerInterface<_Ttype> _Stype;
-	typedef typename StackContainerInterface<_Ttype>::_Ctype _Ctype;
-	typedef typename StackContainerInterface<_Ttype>::itype itype;
-protected:@;
-	int num_conts;
-	IntSequence stack_sizes;
-	IntSequence stack_offsets;
-	const _Ctype** const conts;
-public:@;
-	StackContainer(int ns, int nc)
-		: num_conts(nc), stack_sizes(ns, 0), stack_offsets(ns, 0),
-		  conts(new const _Ctype*[nc])@+ {}
-	virtual ~StackContainer() @+{delete [] conts;}
-	const IntSequence& getStackSizes() const
-		{@+ return stack_sizes;@+}
-	IntSequence& getStackSizes()
-		{@+ return stack_sizes;@+}
-	const IntSequence& getStackOffsets() const
-		{@+ return stack_offsets;@+}
-	IntSequence& getStackOffsets()
-		{@+ return stack_offsets;@+}
-	int numConts() const
-		{@+ return num_conts;}
-	const _Ctype* getCont(int i) const
-		{@+ return conts[i];@+}
-	virtual itype getType(int i, const Symmetry& s) const =0;
-	int numStacks() const
-		{@+ return stack_sizes.size();@+}
-	@<|StackContainer::isZero| code@>;
-	@<|StackContainer::getMatrix| code@>;
-	@<|StackContainer::getLengthOfMatrixStacks| code@>;
-	@<|StackContainer::getUnitPos| code@>;
-	@<|StackContainer::createPackedColumn| code@>;
-protected:@;
-	@<|StackContainer::calculateOffsets| code@>;
-};
-
-@ 
-@<|StackContainer::isZero| code@>=
-bool isZero(int i, const Symmetry& s) const
-{
-	TL_RAISE_IF(i < 0 || i >= numStacks(),
-				"Wrong index to stack in StackContainer::isZero.");
-	return (getType(i, s) == _Stype::zero ||
-			(getType(i, s) == _Stype::matrix && !conts[i]->check(s)));
-}
-
-@ 
-@<|StackContainer::getMatrix| code@>=
-const _Ttype* getMatrix(int i, const Symmetry& s) const
-{
-	TL_RAISE_IF(isZero(i, s) || getType(i, s) == _Stype::unit,
-				"Matrix is not returned in StackContainer::getMatrix");
-	return conts[i]->get(s);
-}
-
-@ 
-@<|StackContainer::getLengthOfMatrixStacks| code@>=
-int getLengthOfMatrixStacks(const Symmetry& s) const
-{
-	int res = 0;
-	int i = 0;
-	while (i < numStacks() && getType(i, s) == _Stype::matrix)
-		res += stack_sizes[i++];
-	return res;
-}
-
-
-@ 
-@<|StackContainer::getUnitPos| code@>=
-int getUnitPos(const Symmetry& s) const
-{
-	if (s.dimen() != 1)
-		return -1;
-	int i = numStacks()-1; 
-	while (i >= 0 && getType(i, s) != _Stype::unit)
-		i--;
-	return i;
-}
-
-
-@ 
-@<|StackContainer::createPackedColumn| code@>=
-Vector* createPackedColumn(const Symmetry& s,
-						   const IntSequence& coor, int& iu) const
-{
-	TL_RAISE_IF(s.dimen() != coor.size(),
-				"Incompatible coordinates for symmetry in StackContainer::createPackedColumn");
-
-	int len = getLengthOfMatrixStacks(s);
-	iu = -1;
-	int i = 0;
-	if (-1 != (i = getUnitPos(s))) {
-		iu = stack_offsets[i] + coor[0];
-		len++;
-	}
-
-	Vector* res = new Vector(len);
-	i = 0;
-	while (i < numStacks() && getType(i, s) == _Stype::matrix) {
-		const _Ttype* t = getMatrix(i, s);
-		Tensor::index ind(t, coor);
-		Vector subres(*res, stack_offsets[i], stack_sizes[i]);
-		subres = ConstVector(ConstGeneralMatrix(*t), *ind);
-		i++;
-	}
-	if (iu != -1)
-		(*res)[len-1] = 1;
-
-	return res;
-}
-
-@ 
-@<|StackContainer::calculateOffsets| code@>=
-void calculateOffsets()
-{
-	stack_offsets[0] = 0;
-	for (int i = 1; i < stack_offsets.size(); i++)
-		stack_offsets[i] = stack_offsets[i-1] + stack_sizes[i-1];
-}
-
-@ 
-@<|FoldedStackContainer| class declaration@>=
-class WorkerFoldMAADense;
-class WorkerFoldMAASparse1;
-class WorkerFoldMAASparse2;
-class WorkerFoldMAASparse4;
-class FoldedStackContainer : virtual public StackContainerInterface<FGSTensor> {
-	friend class WorkerFoldMAADense;
-	friend class WorkerFoldMAASparse1;
-	friend class WorkerFoldMAASparse2;
-	friend class WorkerFoldMAASparse4;
-public:@;
-	static double fill_threshold;
-	void multAndAdd(int dim, const TensorContainer<FSSparseTensor>& c ,
-					FGSTensor& out) const
-		{@+ if (c.check(Symmetry(dim))) multAndAdd(*(c.get(Symmetry(dim))), out);@+}
-	void multAndAdd(const FSSparseTensor& t, FGSTensor& out) const;
-	void multAndAdd(int dim, const FGSContainer& c, FGSTensor& out) const;
-protected:@;
-	void multAndAddSparse1(const FSSparseTensor& t, FGSTensor& out) const;
-	void multAndAddSparse2(const FSSparseTensor& t, FGSTensor& out) const;
-	void multAndAddSparse3(const FSSparseTensor& t, FGSTensor& out) const;
-	void multAndAddSparse4(const FSSparseTensor& t, FGSTensor& out) const;
-	void multAndAddStacks(const IntSequence& fi, const FGSTensor& g,
-						  FGSTensor& out, const void* ad) const;
-	void multAndAddStacks(const IntSequence& fi, const GSSparseTensor& g,
-						  FGSTensor& out, const void* ad) const;
-};
-
-
-@ 
-@<|UnfoldedStackContainer| class declaration@>=
-class WorkerUnfoldMAADense;
-class WorkerUnfoldMAASparse1;
-class WorkerUnfoldMAASparse2;
-class UnfoldedStackContainer : virtual public StackContainerInterface<UGSTensor> {
-	friend class WorkerUnfoldMAADense;
-	friend class WorkerUnfoldMAASparse1;
-	friend class WorkerUnfoldMAASparse2;
-public:@;
-	static double fill_threshold;
-	void multAndAdd(int dim, const TensorContainer<FSSparseTensor>& c ,
-					UGSTensor& out) const
-		{@+ if (c.check(Symmetry(dim))) multAndAdd(*(c.get(Symmetry(dim))), out);@+}
-	void multAndAdd(const FSSparseTensor& t, UGSTensor& out) const;
-	void multAndAdd(int dim, const UGSContainer& c, UGSTensor& out) const;
-protected:@;
-	void multAndAddSparse1(const FSSparseTensor& t, UGSTensor& out) const;
-	void multAndAddSparse2(const FSSparseTensor& t, UGSTensor& out) const;
-	void multAndAddStacks(const IntSequence& fi, const UGSTensor& g,
-						  UGSTensor& out, const void* ad) const;
-};
-
-@ Here is the specialization of the |StackContainer|. We implement
-here the $z$ needed in SDGE context. We implement |getType| and define
-a constructor feeding the data and sizes.
-
-Note that it has two containers, the first is dependent on four
-variables $G(y^*,u,u',\sigma)$, and the second dependent on three
-variables $g(y^*,u,\sigma)$. So that we would be able to stack them,
-we make the second container $g$ be dependent on four variables, the
-third being $u'$ a dummy and always returning zero if dimension of
-$u'$ is positive.
-
-@<|ZContainer| class declaration@>=
-template <class _Ttype>@;
-class ZContainer : public StackContainer<_Ttype> {
-public:@;
-	typedef StackContainer<_Ttype> _Tparent;
-	typedef StackContainerInterface<_Ttype> _Stype;
-	typedef typename _Tparent::_Ctype _Ctype;
-	typedef typename _Tparent::itype itype;
-	ZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
-			   int ny, int nu)
-		: _Tparent(4, 2)
-		{
-			_Tparent::stack_sizes[0] = ngss; _Tparent::stack_sizes[1] = ng;
-			_Tparent::stack_sizes[2] = ny; _Tparent::stack_sizes[3] = nu;
-			_Tparent::conts[0] = gss;
-			_Tparent::conts[1] = g;
-			_Tparent::calculateOffsets();
-		}
-
-	@<|ZContainer::getType| code@>;
-};
-
-@ Here we say, what happens if we derive $z$. recall the top of the
-file, how $z$ looks, and code is clear.
-
-@<|ZContainer::getType| code@>=
-itype getType(int i, const Symmetry& s) const
-{
-	if (i == 0)
-		return _Stype::matrix;
-	if (i == 1)
-		if (s[2] > 0)
-			return _Stype::zero;
-		else
-			return _Stype::matrix;
-	if (i == 2)
-		if (s == Symmetry(1,0,0,0))
-			return _Stype::unit;
-		else
-			return _Stype::zero;
-	if (i == 3)
-		if (s == Symmetry(0,1,0,0))
-			return _Stype::unit;
-		else
-			return _Stype::zero;
-
-	TL_RAISE("Wrong stack index in ZContainer::getType");
-	return _Stype::zero;
-}
-
-@ 
-@<|FoldedZContainer| class declaration@>=
-class FoldedZContainer : public ZContainer<FGSTensor>,
-						 public FoldedStackContainer {
-public:@;
-	typedef TensorContainer<FGSTensor> _Ctype;
-	FoldedZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
-					 int ny, int nu)
-		: ZContainer<FGSTensor>(gss, ngss, g, ng, ny, nu)@+ {}
-};
-
-@ 
-@<|UnfoldedZContainer| class declaration@>=
-class UnfoldedZContainer : public ZContainer<UGSTensor>,
-						   public UnfoldedStackContainer {
-public:@;
-	typedef TensorContainer<UGSTensor> _Ctype;
-	UnfoldedZContainer(const _Ctype* gss, int ngss, const _Ctype* g, int ng,
-					   int ny, int nu)
-		: ZContainer<UGSTensor>(gss, ngss, g, ng, ny, nu)@+ {}
-};
-
-@ Here we have another specialization of container used in context of
-SDGE. We define a container for
-$$G(y,u,u',\sigma)=g^{**}(g^*(y,u,\sigma),u',\sigma)$$
-
-For some reason, the symmetry of $g^{**}$ has length $4$ although it
-is really dependent on three variables. (To now the reason, consult
-|@<|ZContainer| class declaration@>|.) So, it has four stack, the
-third one is dummy, and always returns zero. The first stack
-corresponds to a container of $g^*$.
-
-@<|GContainer| class declaration@>=
-template <class _Ttype>@;
-class GContainer : public StackContainer<_Ttype> {
-public:@;
-	typedef StackContainer<_Ttype> _Tparent;
-	typedef StackContainerInterface<_Ttype> _Stype;
-	typedef typename StackContainer<_Ttype>::_Ctype _Ctype;
-	typedef typename StackContainer<_Ttype>::itype itype;
-	GContainer(const _Ctype* gs, int ngs, int nu)
-		: StackContainer<_Ttype>(4, 1)
-		{
-			_Tparent::stack_sizes[0] = ngs; _Tparent::stack_sizes[1] = nu;
-			_Tparent::stack_sizes[2] = nu; _Tparent::stack_sizes[3] = 1;
-			_Tparent::conts[0] = gs;
-			_Tparent::calculateOffsets();
-		}
-
-	@<|GContainer::getType| code@>;
-};
-
-@ Here we define the dependencies in
-$g^{**}(g^*(y,u,\sigma),u',\sigma)$. Also note, that first derivative
-of $g^*$ wrt $\sigma$ is always zero, so we also add this
-information.
-
-@<|GContainer::getType| code@>=
-itype getType(int i, const Symmetry& s) const
-{
-	if (i == 0)
-		if (s[2] > 0 || s == Symmetry(0,0,0,1))
-			return _Stype::zero;
-		else
-			return _Stype::matrix;
-	if (i == 1)
-		if (s == Symmetry(0,0,1,0))
-			return _Stype::unit;
-		else
-			return _Stype::zero;
-	if (i == 2)
-		return _Stype::zero;
-	if (i == 3)
-		if (s == Symmetry(0,0,0,1))
-			return _Stype::unit;
-		else
-			return _Stype::zero;
-
-	TL_RAISE("Wrong stack index in GContainer::getType");
-	return _Stype::zero;
-}
-
-
-@ 
-@<|FoldedGContainer| class declaration@>=
-class FoldedGContainer : public GContainer<FGSTensor>,
-						 public FoldedStackContainer {
-public:@;
-	typedef TensorContainer<FGSTensor> _Ctype;
-	FoldedGContainer(const _Ctype* gs, int ngs, int nu)
-		: GContainer<FGSTensor>(gs, ngs, nu)@+ {}
-};
-
-@ 
-@<|UnfoldedGContainer| class declaration@>=
-class UnfoldedGContainer : public GContainer<UGSTensor>,
-						   public UnfoldedStackContainer {
-public:@;
-	typedef TensorContainer<UGSTensor> _Ctype;
-	UnfoldedGContainer(const _Ctype* gs, int ngs, int nu)
-		: GContainer<UGSTensor>(gs, ngs, nu)@+ {}
-};
-
-
-@ Here we have a support class for product of |StackContainer|s. It
-only adds a dimension to |StackContainer|. It selects the symmetries
-according to equivalence classes passed to the constructor. The
-equivalence can have permuted classes by some given
-permutation. Nothing else is interesting.
-
-@<|StackProduct| class declaration@>=
-template <class _Ttype>@;
-class StackProduct {
-public:@;
-	typedef StackContainerInterface<_Ttype> _Stype;
-	typedef typename _Stype::_Ctype _Ctype;
-	typedef typename _Stype::itype itype;
-protected:@;
-	const _Stype& stack_cont;
-	InducedSymmetries syms;
-	Permutation per;
-public:@;
-	StackProduct(const _Stype& sc, const Equivalence& e,
-				 const Symmetry& os)
-		: stack_cont(sc), syms(e, os), per(e)@+ {}
-	StackProduct(const _Stype& sc, const Equivalence& e,
-				 const Permutation& p, const Symmetry& os)
-		: stack_cont(sc), syms(e, p, os), per(e, p)@+ {}
-	int dimen() const
-		{@+ return syms.size();@+}
-	int getAllSize() const
-		{@+ return stack_cont.getAllSize();@+}
-	const Symmetry& getProdSym(int ip) const
-		{@+ return syms[ip];@+}
-	@<|StackProduct::isZero| code@>;
-	@<|StackProduct::getType| code@>;
-	@<|StackProduct::getMatrix| code@>;
-	@<|StackProduct::createPackedColumns| code@>;
-	@<|StackProduct::getSize| code@>;
-	@<|StackProduct::numMatrices| code@>;
-};
-
-@ 
-@<|StackProduct::isZero| code@>=
-bool isZero(const IntSequence& istacks) const
-{
-	TL_RAISE_IF(istacks.size() != dimen(),
-				"Wrong istacks coordinates for StackProduct::isZero");
-
-	bool res = false;
-	int i = 0;
-	while (i < dimen() && !(res = stack_cont.isZero(istacks[i], syms[i])))
-		i++;
-	return res;
-}
-
-@ 
-@<|StackProduct::getType| code@>=
-itype getType(int is, int ip) const
-{
-	TL_RAISE_IF(is < 0 || is >= stack_cont.numStacks(),
-				"Wrong index to stack in StackProduct::getType");
-	TL_RAISE_IF(ip < 0 || ip >= dimen(),
-				"Wrong index to stack container in StackProduct::getType");
-	return stack_cont.getType(is, syms[ip]);
-}
-
-@ 
-@<|StackProduct::getMatrix| code@>=
-const _Ttype* getMatrix(int is, int ip) const
-{
-	return stack_cont.getMatrix(is, syms[ip]);
-}
-
-@ 
-@<|StackProduct::createPackedColumns| code@>=
-void createPackedColumns(const IntSequence& coor,
-						 Vector** vs, IntSequence& iu) const
-{
-	TL_RAISE_IF(iu.size() != dimen(),
-				"Wrong storage length for unit flags in StackProduct::createPackedColumn");
-	TL_RAISE_IF(coor.size() != per.size(),
-				"Wrong size of index coor in StackProduct::createPackedColumn");
-	IntSequence perindex(coor.size());
-	per.apply(coor, perindex);
-	int off = 0;
-	for (int i = 0; i < dimen(); i++) {
-		IntSequence percoor(perindex, off, syms[i].dimen() + off);
-		vs[i] = stack_cont.createPackedColumn(syms[i], percoor, iu[i]);
-		off += syms[i].dimen();
-	}
-}
-
-@ 
-@<|StackProduct::getSize| code@>=
-int getSize(int is) const
-{
-	return stack_cont.getStackSizes()[is];
-}
-
-
-@ 
-@<|StackProduct::numMatrices| code@>=
-int numMatrices(const IntSequence& istacks) const
-{
-	TL_RAISE_IF(istacks.size() != dimen(),
-				"Wrong size of stack coordinates in StackContainer::numMatrices");
-	int ret = 0;
-	int ip = 0;
-	while (ip < dimen() && getType(istacks[ip], ip) == _Stype::matrix) {
-		ret++;
-		ip++;
-	}
-	return ret;
-}
-
-@ Here we only inherit from Kronecker product |KronProdAllOptim|, only to
-allow for a constructor constructing from |StackProduct|.
-
-@<|KronProdStack| class declaration@>=
-template <class _Ttype>
-class KronProdStack : public KronProdAllOptim {
-public:@;
-	typedef StackProduct<_Ttype> _Ptype;
-	typedef StackContainerInterface<_Ttype> _Stype;
-	@<|KronProdStack| constructor code@>;
-};
-
-@ Here we construct |KronProdAllOptim| from |StackContainer| and given
-selections of stack items from stack containers in the product. We
-only decide whether to insert matrix, or unit matrix.
-
-At this point, we do not call |KronProdAllOptim::optimizeOrder|, so
-the |KronProdStack| behaves like |KronProdAll| (i.e. no optimization
-is done).
-
-@<|KronProdStack| constructor code@>=
-KronProdStack(const _Ptype& sp, const IntSequence& istack)
-	: KronProdAllOptim(sp.dimen())
-{
-	TL_RAISE_IF(sp.dimen() != istack.size(),
-				"Wrong stack product dimension for KronProdStack constructor");
-	
-	for (int i = 0; i < sp.dimen(); i++) {
-		TL_RAISE_IF(sp.getType(istack[i], i) == _Stype::zero,
-					"Attempt to construct KronProdStack from zero matrix");
-		if (sp.getType(istack[i], i) == _Stype::unit)
-			setUnit(i, sp.getSize(istack[i]));
-		if (sp.getType(istack[i], i) == _Stype::matrix) {
-			const TwoDMatrix* m = sp.getMatrix(istack[i], i);
-			TL_RAISE_IF(m->nrows() != sp.getSize(istack[i]),
-						"Wrong size of returned matrix in KronProdStack constructor");
-			setMat(i, *m);
-		}
-	}
-}
-
-
-@ 
-@<|WorkerFoldMAADense| class declaration@>=
-class WorkerFoldMAADense : public THREAD {
-	const FoldedStackContainer& cont;
-	Symmetry sym;
-	const FGSContainer& dense_cont;
-	FGSTensor& out;
-public:@;
-	WorkerFoldMAADense(const FoldedStackContainer& container, 
-					   const Symmetry& s,
-					   const FGSContainer& dcontainer,
-					   FGSTensor& outten);
-	void operator()();
-};
-
-@ 
-@<|WorkerFoldMAASparse1| class declaration@>=
-class WorkerFoldMAASparse1 : public THREAD {
-	const FoldedStackContainer& cont;
-	const FSSparseTensor& t;
-	FGSTensor& out;
-	IntSequence coor;
-	const EquivalenceBundle& ebundle;
-public:@;
-	WorkerFoldMAASparse1(const FoldedStackContainer& container,
-						 const FSSparseTensor& ten,
-						 FGSTensor& outten, const IntSequence& c);
-	void operator()();
-};
-
-@ 
-@<|WorkerFoldMAASparse2| class declaration@>=
-class WorkerFoldMAASparse2 : public THREAD {
-	const FoldedStackContainer& cont;
-	const FSSparseTensor& t;
-	FGSTensor& out;
-	IntSequence coor;
-public:@;
-	WorkerFoldMAASparse2(const FoldedStackContainer& container,
-						 const FSSparseTensor& ten,
-						 FGSTensor& outten, const IntSequence& c);
-	void operator()();
-};
-
-@ 
-@<|WorkerFoldMAASparse4| class declaration@>=
-class WorkerFoldMAASparse4 : public THREAD {
-	const FoldedStackContainer& cont;
-	const FSSparseTensor& t;
-	FGSTensor& out;
-	IntSequence coor;
-public:@;
-	WorkerFoldMAASparse4(const FoldedStackContainer& container,
-						 const FSSparseTensor& ten,
-						 FGSTensor& outten, const IntSequence& c);
-	void operator()();
-};
-
-@ 
-@<|WorkerUnfoldMAADense| class declaration@>=
-class WorkerUnfoldMAADense : public THREAD {
-	const UnfoldedStackContainer& cont;
-	Symmetry sym;
-	const UGSContainer& dense_cont;
-	UGSTensor& out;
-public:@;
-	WorkerUnfoldMAADense(const UnfoldedStackContainer& container, 
-						 const Symmetry& s,
-						 const UGSContainer& dcontainer,
-						 UGSTensor& outten);
-	void operator()();
-};
-
-@ 
-@<|WorkerUnfoldMAASparse1| class declaration@>=
-class WorkerUnfoldMAASparse1 : public THREAD {
-	const UnfoldedStackContainer& cont;
-	const FSSparseTensor& t;
-	UGSTensor& out;
-	IntSequence coor;
-	const EquivalenceBundle& ebundle;
-public:@;
-	WorkerUnfoldMAASparse1(const UnfoldedStackContainer& container,
-						   const FSSparseTensor& ten,
-						   UGSTensor& outten, const IntSequence& c);
-	void operator()();
-};
-
-@ 
-@<|WorkerUnfoldMAASparse2| class declaration@>=
-class WorkerUnfoldMAASparse2 : public THREAD {
-	const UnfoldedStackContainer& cont;
-	const FSSparseTensor& t;
-	UGSTensor& out;
-	IntSequence coor;
-public:@;
-	WorkerUnfoldMAASparse2(const UnfoldedStackContainer& container,
-						   const FSSparseTensor& ten,
-						   UGSTensor& outten, const IntSequence& c);
-	void operator()();
-};
-
-
-@ End of {\tt stack\_container.h} file.
--- a/dynare++/tl/cc/sthread.cc
+++ b/dynare++/tl/cc/sthread.cc
@ -0,0 +1,232 @@
+// Copyright 2004, Ondra Kamenik
+
+/* We set the default values for
+   |max_parallel_threads| for both |posix| and |empty| implementation and
+   both joinable and detach group. For |posix| this defaults to
+   uniprocessor machine with hyper-threading, this is 2. */
+
+#include <cstring>
+#include "sthread.hh"
+
+#ifdef HAVE_PTHREAD
+namespace sthread
+{
+  template<>
+  int thread_group<posix>::max_parallel_threads = 2;
+  template<>
+  int detach_thread_group<posix>::max_parallel_threads = 2;
+
+  // POSIX specializations methods
+  void *posix_thread_function(void *c);
+  template <>
+  void
+  thread_traits<posix>::run(_Ctype *c)
+  {
+    pthread_create(&(c->getThreadIden()), NULL, posix_thread_function, (void *) c);
+  }
+
+  void *posix_detach_thread_function(void *c);
+
+  template <>
+  void
+  thread_traits<posix>::detach_run(_Dtype *c)
+  {
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+    pthread_create(&(c->getThreadIden()), &attr, posix_detach_thread_function, (void *) c);
+    pthread_attr_destroy(&attr);
+  }
+
+  template <>
+  void
+  thread_traits<posix>::exit()
+  {
+    pthread_exit(NULL);
+  }
+
+  template <>
+  void
+  thread_traits<posix>::join(_Ctype *c)
+  {
+    pthread_join(c->getThreadIden(), NULL);
+  }
+
+  template <>
+  void
+  mutex_traits<posix>::init(pthread_mutex_t &m)
+  {
+    pthread_mutex_init(&m, NULL);
+  }
+
+  template <>
+  void
+  mutex_traits<posix>::lock(pthread_mutex_t &m)
+  {
+    pthread_mutex_lock(&m);
+  }
+
+  template <>
+  void
+  mutex_traits<posix>::unlock(pthread_mutex_t &m)
+  {
+    pthread_mutex_unlock(&m);
+  }
+
+  template <>
+  void
+  cond_traits<posix>::init(_Tcond &cond)
+  {
+    pthread_cond_init(&cond, NULL);
+  }
+
+  template <>
+  void
+  cond_traits<posix>::broadcast(_Tcond &cond)
+  {
+    pthread_cond_broadcast(&cond);
+  }
+
+  template <>
+  void
+  cond_traits<posix>::wait(_Tcond &cond, _Tmutex &mutex)
+  {
+    pthread_cond_wait(&cond, &mutex);
+  }
+
+  template <>
+  void
+  cond_traits<posix>::destroy(_Tcond &cond)
+  {
+    pthread_cond_destroy(&cond);
+  }
+
+  /* Here we instantiate the static map, and construct |PosixSynchro|
+     using that map. */
+
+  static posix_synchro::mutex_map_t posix_mm;
+
+  PosixSynchro::PosixSynchro(const void *c, const char *id)
+    : posix_synchro(c, id, posix_mm)
+  {
+  }
+
+  /* This function is of the type |void* function(void*)| as required by
+     POSIX, but it typecasts its argument and runs |operator()()|. */
+
+  void *
+  posix_thread_function(void *c)
+  {
+    thread_traits<posix>::_Ctype *ct
+      = (thread_traits<posix>::_Ctype *)c;
+    try
+      {
+        ct->operator()();
+      }
+    catch (...)
+      {
+        ct->exit();
+      }
+    return NULL;
+  }
+
+  void *
+  posix_detach_thread_function(void *c)
+  {
+    thread_traits<posix>::_Dtype *ct
+      = (thread_traits<posix>::_Dtype *)c;
+    condition_counter<posix> *counter = ct->counter;
+    try
+      {
+        ct->operator()();
+      }
+    catch (...)
+      {
+        ct->exit();
+      }
+    if (counter)
+      counter->decrease();
+    return NULL;
+  }
+}
+#else
+namespace sthread
+{
+  template<>
+  int thread_group<empty>::max_parallel_threads = 1;
+  template<>
+  int detach_thread_group<empty>::max_parallel_threads = 1;
+
+  // non-threading specialization methods
+  /* The only trait methods we need to work are |thread_traits::run| and
+     |thread_traits::detach_run|, which directly call
+     |operator()()|. Anything other is empty. */
+
+  template <>
+  void
+  thread_traits<empty>::run(_Ctype *c)
+  {
+    c->operator()();
+  }
+  template <>
+  void
+  thread_traits<empty>::detach_run(_Dtype *c)
+  {
+    c->operator()();
+  }
+
+  template <>
+  void
+  thread_traits<empty>::exit()
+  {
+  }
+
+  template <>
+  void
+  thread_traits<empty>::join(_Ctype *c)
+  {
+  }
+
+  template <>
+  void
+  mutex_traits<empty>::init(Empty &m)
+  {
+  }
+
+  template <>
+  void
+  mutex_traits<empty>::lock(Empty &m)
+  {
+  }
+
+  template <>
+  void
+  mutex_traits<empty>::unlock(Empty &m)
+  {
+  }
+
+  template <>
+  void
+  cond_traits<empty>::init(_Tcond &cond)
+  {
+  }
+
+  template <>
+  void
+  cond_traits<empty>::broadcast(_Tcond &cond)
+  {
+  }
+
+  template <>
+  void
+  cond_traits<empty>::wait(_Tcond &cond, _Tmutex &mutex)
+  {
+  }
+
+  template <>
+  void
+  cond_traits<empty>::destroy(_Tcond &cond)
+  {
+  }
+}
+#endif
--- a/dynare++/tl/cc/sthread.cweb
+++ b/dynare++/tl/cc/sthread.cweb
@ -1,224 +0,0 @@
-@q $Id: sthread.cweb 2269 2008-11-23 14:33:22Z michel $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt sthreads.h} file. We set the default values for
-|max_parallel_threads| for both |posix| and |empty| implementation and
-both joinable and detach group. For |posix| this defaults to
-uniprocessor machine with hyper-threading, this is 2.
-
-@c
-#include <cstring>
-#include "sthread.h"
-
-#ifdef HAVE_PTHREAD
-namespace sthread {
-	template<>
-	int thread_group<posix>::max_parallel_threads = 2;
-	template<>
-	int detach_thread_group<posix>::max_parallel_threads = 2;
-	@<POSIX specializations methods@>;
-}
-#else
-namespace sthread {
-	template<>
-	int thread_group<empty>::max_parallel_threads = 1;
-	template<>
-	int detach_thread_group<empty>::max_parallel_threads = 1;
-	@<non-threading specialization methods@>;
-}
-#endif
-
-@ 
-@<POSIX specializations methods@>=
-	@<|thread_traits| method codes@>;
-	@<|mutex_traits| method codes@>;
-	@<|cond_traits| method codes@>;
-	@<|PosixSynchro| constructor@>;
-	@<|posix_thread_function| code@>;
-	@<|posix_detach_thread_function| code@>;
-
-@ 
-@<|thread_traits| method codes@>=
-void* posix_thread_function(void* c);
-template <>
-void thread_traits<posix>::run(_Ctype* c)
-{
-	pthread_create(&(c->getThreadIden()), NULL, posix_thread_function, (void*) c);
-}
-@#
-void* posix_detach_thread_function(void* c);
-
-template <>
-void thread_traits<posix>::detach_run(_Dtype* c)
-{
-	pthread_attr_t attr;
-	pthread_attr_init(&attr);
-	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-	pthread_create(&(c->getThreadIden()), &attr, posix_detach_thread_function, (void*) c);
-	pthread_attr_destroy(&attr);
-}
-@#
-
-template <>
-void thread_traits<posix>::exit()
-{
-	pthread_exit(NULL);
-}
-@#
-template <>
-void thread_traits<posix>::join(_Ctype* c)
-{
-	pthread_join(c->getThreadIden(), NULL);
-}
-
-@ 
-@<|mutex_traits| method codes@>=
-template <>
-void mutex_traits<posix>::init(pthread_mutex_t& m)
-{
-	pthread_mutex_init(&m, NULL);
-}
-@#
-template <>
-void mutex_traits<posix>::lock(pthread_mutex_t& m)
-{
-	pthread_mutex_lock(&m);
-}
-@#
-template <>
-void mutex_traits<posix>::unlock(pthread_mutex_t& m)
-{
-	pthread_mutex_unlock(&m);
-}
-
-@ 
-@<|cond_traits| method codes@>=
-template <>
-void cond_traits<posix>::init(_Tcond& cond)
-{
-	pthread_cond_init(&cond, NULL);
-}
-@#
-template <>
-void cond_traits<posix>::broadcast(_Tcond& cond)
-{
-	pthread_cond_broadcast(&cond);
-}
-@#
-template <>
-void cond_traits<posix>::wait(_Tcond& cond, _Tmutex& mutex)
-{
-	pthread_cond_wait(&cond, &mutex);
-}
-@#
-template <>
-void cond_traits<posix>::destroy(_Tcond& cond)
-{
-	pthread_cond_destroy(&cond);
-}
-
-
-@ Here we instantiate the static map, and construct |PosixSynchro|
-using that map.
-
-@<|PosixSynchro| constructor@>=
-static posix_synchro::mutex_map_t posix_mm;
-
-PosixSynchro::PosixSynchro(const void* c, const char* id)
-	: posix_synchro(c, id, posix_mm) {}
-
-@ This function is of the type |void* function(void*)| as required by
-POSIX, but it typecasts its argument and runs |operator()()|.
-@<|posix_thread_function| code@>=
-void* posix_thread_function(void* c)
-{
-	thread_traits<posix>::_Ctype* ct =
-		(thread_traits<posix>::_Ctype*)c;
-	try {
-		ct->operator()();
-	} catch (...) {
-		ct->exit();
-	}
-	return NULL;
-}
-
-@ 
-@<|posix_detach_thread_function| code@>=
-void* posix_detach_thread_function(void* c)
-{
-	thread_traits<posix>::_Dtype* ct =
-		(thread_traits<posix>::_Dtype*)c;
-	condition_counter<posix>* counter = ct->counter;
-	try {
-		ct->operator()();
-	} catch (...) {
-		ct->exit();
-	}
-	if (counter)
-		counter->decrease();
-	return NULL;
-}
-
-
-@ The only trait methods we need to work are |thread_traits::run| and
-|thread_traits::detach_run|, which directly call
-|operator()()|. Anything other is empty.
-
-@<non-threading specialization methods@>=
-template <>
-void thread_traits<empty>::run(_Ctype* c)
-{
-	c->operator()();
-}
-template <>
-void thread_traits<empty>::detach_run(_Dtype* c)
-{
-	c->operator()();
-}
-@#
-template <>
-void thread_traits<empty>::exit()
-{
-}
-@#
-template <>
-void thread_traits<empty>::join(_Ctype* c)
-{
-}
-@#
-template <>
-void mutex_traits<empty>::init(Empty& m)
-{
-}
-@#
-template <>
-void mutex_traits<empty>::lock(Empty& m)
-{
-}
-@#
-template <>
-void mutex_traits<empty>::unlock(Empty& m)
-{
-}
-@#
-template <>
-void cond_traits<empty>::init(_Tcond& cond)
-{
-}
-@#
-template <>
-void cond_traits<empty>::broadcast(_Tcond& cond)
-{
-}
-@#
-template <>
-void cond_traits<empty>::wait(_Tcond& cond, _Tmutex& mutex)
-{
-}
-@#
-template <>
-void cond_traits<empty>::destroy(_Tcond& cond)
-{
-}
-
-@ End of {\tt sthreads.h} file.
--- a/dynare++/tl/cc/sthread.hh
+++ b/dynare++/tl/cc/sthread.hh
@ -0,0 +1,627 @@
+// Copyright 2004, Ondra Kamenik
+
+// Simple threads.
+
+/* This file defines types making a simple interface to
+   multi-threading. It follows the classical C++ idioms for traits. We
+   have three sorts of traits. The first is a |thread_traits|, which make
+   interface to thread functions (run, exit, create and join), the second
+   is |mutex_traits|, which make interface to mutexes (create, lock,
+   unlock), and third is |cond_traits|, which make interface to
+   conditions (create, wait, broadcast, and destroy). At present, there
+   are two implementations. The first are POSIX threads, mutexes, and
+   conditions, the second is serial (no parallelization).
+
+   The file provides the following interfaces templated by the types
+   implementing the threading (like types |pthread_t|, and |pthread_mutex_t|
+   for POSIX thread and mutex):
+   \unorderedlist
+   \li |thread| is a pure virtual class, which must be inherited and a
+   method |operator()()| be implemented as the running code of the
+   thread. This code is run as a new thread by calling |run| method.
+   \li |thread_group| allows insertion of |thread|s and running all of
+   them simultaneously joining them. The number of maximum parallel
+   threads can be controlled. See below.
+   \li |synchro| object locks a piece of code to be executed only serially
+   for a given data and specified entry-point. It locks the code until it
+   is destructed. So, the typical use is to create the |synchro| object
+   on the stack of a function which is to be synchronized. The
+   synchronization can be subjected to specific data (then a pointer can
+   be passed to |synchro|'s constructor), and can be subjected to
+   specific entry-point (then |const char*| is passed to the
+   constructor).
+   \li |detach_thread| inherits from |thread| and models a detached
+   thread in contrast to |thread| which models the joinable thread.
+   \li |detach_thread_group| groups the detached threads and runs them. They
+   are not joined, they are synchronized by means of a counter counting
+   running threads. A change of the counter is checked by waiting on an
+   associated condition.
+   \endunorderedlist
+
+   What implementation is selected is governed (at present) by
+   |HAVE_PTHREAD|. If it is defined, then POSIX threads are linked. If
+   it is not defined, then serial implementation is taken. In accordance
+   with this, the header file defines macros |THREAD|, |THREAD_GROUP|,
+   and |SYNCHRO| as the picked specialization of |thread| (or |detach_thread|),
+   |thread_group| (or |detach_thread_group|), and |synchro|.
+
+   The type of implementation is controlled by |thread_impl| integer
+   template parameter, this can be |posix| or |empty|.
+
+   The number of maximum parallel threads is controlled via a static
+   member of |thread_group| and |detach_thread_group| classes. */
+
+#ifndef STHREAD_H
+#define STHREAD_H
+
+#ifdef HAVE_PTHREAD
+# include <pthread.h>
+#else
+/* Give valid types for POSIX thread types, otherwise the templates fail in empty mode.
+   Don't use typedefs because on some systems |pthread_t| and friends are typedefs even
+   without the include. */
+# define pthread_t void *
+# define pthread_mutex_t void *
+# define pthread_cond_t void *
+#endif
+
+#include <cstdio>
+#include <list>
+#include <map>
+
+namespace sthread
+{
+  using namespace std;
+
+  class Empty
+  {
+  };
+  // classical IF template
+  /* Here is the classical IF template. */
+  template<bool condition, class Then, class Else>
+  struct IF
+  {
+    typedef Then RET;
+  };
+
+  template<class Then, class Else>
+  struct IF<false, Then, Else>
+  {
+    typedef Else RET;
+  };
+
+  enum { posix, empty};
+  template <int>
+  class thread_traits;
+  template <int>
+  class detach_thread;
+
+  /* The class of |thread| is clear. The user implements |operator()()|,
+     the method |run| runs the user's code as joinable thread, |exit| kills the
+     execution. */
+  template <int thread_impl>
+  class thread
+  {
+    typedef thread_traits<thread_impl> _Ttraits;
+    typedef typename _Ttraits::_Tthread _Tthread;
+    _Tthread th;
+  public:
+    virtual ~thread()
+    {
+    }
+    _Tthread &
+    getThreadIden()
+    {
+      return th;
+    }
+    const _Tthread &
+    getThreadIden() const
+    {
+      return th;
+    }
+    virtual void operator()() = 0;
+    void
+    run()
+    {
+      _Ttraits::run(this);
+    }
+    void
+    detach_run()
+    {
+      _Ttraits::detach_run(this);
+    }
+    void
+    exit()
+    {
+      _Ttraits::exit();
+    }
+  };
+
+  /* The |thread_group| is also clear. We allow a user to insert the
+     |thread|s, and then launch |run|, which will run all the threads not
+     allowing more than |max_parallel_threads| joining them at the
+     end. This static member can be set from outside. */
+
+  template <int thread_impl>
+  class thread_group
+  {
+    typedef thread_traits<thread_impl> _Ttraits;
+    typedef thread<thread_impl> _Ctype;
+    list<_Ctype *> tlist;
+    typedef typename list<_Ctype *>::iterator iterator;
+  public:
+    static int max_parallel_threads;
+    void
+    insert(_Ctype *c)
+    {
+      tlist.push_back(c);
+    }
+    /* The thread group class maintains list of pointers to threads. It
+       takes responsibility of deallocating the threads. So we implement the
+       destructor. */
+    ~thread_group()
+    {
+      while (!tlist.empty())
+        {
+          delete tlist.front();
+          tlist.pop_front();
+        }
+    }
+    /* Here we run the threads ensuring that not more than
+       |max_parallel_threads| are run in parallel. More over, we do not want
+       to run a too low number of threads, since it is wasting with resource
+       (if there are). Therefore, we run in parallel |max_parallel_threads|
+       batches as long as the remaining threads are greater than the double
+       number. And then the remaining batch (less than |2*max_parallel_threads|)
+       is run half by half. */
+
+    void
+    run()
+    {
+      int rem = tlist.size();
+      iterator pfirst = tlist.begin();
+      while (rem > 2*max_parallel_threads)
+        {
+          pfirst = run_portion(pfirst, max_parallel_threads);
+          rem -= max_parallel_threads;
+        }
+      if (rem > max_parallel_threads)
+        {
+          pfirst = run_portion(pfirst, rem/2);
+          rem -= rem/2;
+        }
+      run_portion(pfirst, rem);
+    }
+
+  private:
+    /* This runs a given number of threads in parallel starting from the
+       given iterator. It returns the first iterator not run. */
+
+    iterator
+    run_portion(iterator start, int n)
+    {
+      int c = 0;
+      for (iterator i = start; c < n; ++i, c++)
+        {
+          (*i)->run();
+        }
+      iterator ret;
+      c = 0;
+      for (ret = start; c < n; ++ret, c++)
+        {
+          _Ttraits::join(*ret);
+        }
+      return ret;
+    }
+  };
+
+  /* Clear. We have only |run|, |detach_run|, |exit| and |join|, since
+     this is only a simple interface. */
+
+  template <int thread_impl>
+  struct thread_traits
+  {
+    typedef typename IF<thread_impl == posix, pthread_t, Empty>::RET _Tthread;
+    typedef thread<thread_impl> _Ctype;
+    typedef detach_thread<thread_impl> _Dtype;
+    static void run(_Ctype *c);
+    static void detach_run(_Dtype *c);
+    static void exit();
+    static void join(_Ctype *c);
+  };
+
+  /* Clear. We have only |init|, |lock|, and |unlock|. */
+  struct ltmmkey;
+  typedef pair<const void *, const char *> mmkey;
+
+  template <int thread_impl>
+  struct mutex_traits
+  {
+    typedef typename IF<thread_impl == posix, pthread_mutex_t, Empty>::RET _Tmutex;
+    typedef map<mmkey, pair<_Tmutex, int>, ltmmkey> mutex_int_map;
+    static void init(_Tmutex &m);
+    static void lock(_Tmutex &m);
+    static void unlock(_Tmutex &m);
+  };
+
+  /* Here we define a map of mutexes keyed by a pair of address, and a
+     string. A purpose of the map of mutexes is that, if synchronizing, we
+     need to publish mutexes locking some piece of codes (characterized by
+     the string) accessing the data (characterized by the pointer). So, if
+     any thread needs to pass a |synchro| object, it creates its own with
+     the same address and string, and must look to some public storage to
+     unlock the mutex. If the |synchro| object is created for the first
+     time, the mutex is created and inserted to the map. We count the
+     references to the mutex (number of waiting threads) to know, when it
+     is save to remove the mutex from the map. This is the only purpose of
+     counting the references. Recall, that the mutex is keyed by an address
+     of the data, and without removing, the number of mutexes would only
+     grow.
+
+     The map itself needs its own mutex to avoid concurrent insertions and
+     deletions. */
+
+  struct ltmmkey
+  {
+    bool
+    operator()(const mmkey &k1, const mmkey &k2) const
+    {
+      return k1.first < k2.first
+                        || (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);
+    }
+  };
+
+  template <int thread_impl>
+  class mutex_map :
+    public mutex_traits<thread_impl>::mutex_int_map
+  {
+    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
+    typedef mutex_traits<thread_impl> _Mtraits;
+    typedef pair<_Tmutex, int> mmval;
+    typedef map<mmkey, mmval, ltmmkey> _Tparent;
+    typedef typename _Tparent::iterator iterator;
+    typedef typename _Tparent::value_type _mvtype;
+    _Tmutex m;
+  public:
+    mutex_map()
+    {
+      _Mtraits::init(m);
+    }
+    void
+    insert(const void *c, const char *id, const _Tmutex &m)
+    {
+      _Tparent::insert(_mvtype(mmkey(c, id), mmval(m, 0)));
+    }
+    bool
+    check(const void *c, const char *id) const
+    {
+      return _Tparent::find(mmkey(c, id)) != _Tparent::end();
+    }
+    /* This returns a pointer to the pair of mutex and count reference number. */
+    mmval *
+    get(const void *c, const char *id)
+    {
+      iterator it = _Tparent::find(mmkey(c, id));
+      if (it == _Tparent::end())
+        return NULL;
+      return &((*it).second);
+    }
+
+    /* This removes unconditionally the mutex from the map regardless its
+       number of references. The only user of this class should be |synchro|
+       class, it implementation must not remove referenced mutex. */
+
+    void
+    remove(const void *c, const char *id)
+    {
+      iterator it = _Tparent::find(mmkey(c, id));
+      if (it != _Tparent::end())
+        this->erase(it);
+    }
+    void
+    lock_map()
+    {
+      _Mtraits::lock(m);
+    }
+    void
+    unlock_map()
+    {
+      _Mtraits::unlock(m);
+    }
+
+  };
+
+  /* This is the |synchro| class. The constructor of this class tries to
+     lock a mutex for a particular address (identification of data) and
+     string (identification of entry-point). If the mutex is already
+     locked, it waits until it is unlocked and then returns. The destructor
+     releases the lock. The typical use is to construct the object on the
+     stacked of the code being synchronized. */
+
+  template <int thread_impl>
+  class synchro
+  {
+    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
+    typedef mutex_traits<thread_impl> _Mtraits;
+  public:
+    typedef mutex_map<thread_impl> mutex_map_t;
+  private:
+    const void *caller;
+    const char *iden;
+    mutex_map_t &mutmap;
+  public:
+    synchro(const void *c, const char *id, mutex_map_t &mmap)
+      : caller(c), iden(id), mutmap(mmap)
+    {
+      lock();
+    }
+    ~synchro()
+    {
+      unlock();
+    }
+  private:
+    /* The |lock| function acquires the mutex in the map. First it tries to
+       get an exclusive access to the map. Then it increases a number of
+       references of the mutex (if it does not exists, it inserts it). Then
+       unlocks the map, and finally tries to lock the mutex of the map. */
+
+    void
+    lock()
+    {
+      mutmap.lock_map();
+      if (!mutmap.check(caller, iden))
+        {
+          _Tmutex mut;
+          _Mtraits::init(mut);
+          mutmap.insert(caller, iden, mut);
+        }
+      mutmap.get(caller, iden)->second++;
+      mutmap.unlock_map();
+      _Mtraits::lock(mutmap.get(caller, iden)->first);
+    }
+
+    /* The |unlock| function first locks the map. Then releases the lock,
+       and decreases a number of references. If it is zero, it removes the
+       mutex. */
+
+    void
+    unlock()
+    {
+      mutmap.lock_map();
+      if (mutmap.check(caller, iden))
+        {
+          _Mtraits::unlock(mutmap.get(caller, iden)->first);
+          mutmap.get(caller, iden)->second--;
+          if (mutmap.get(caller, iden)->second == 0)
+            mutmap.remove(caller, iden);
+        }
+      mutmap.unlock_map();
+    }
+  };
+
+  /* These are traits for conditions. We need |init|, |broadcast|, |wait|
+     and |destroy|. */
+
+  template <int thread_impl>
+  struct cond_traits
+  {
+    typedef typename IF<thread_impl == posix, pthread_cond_t, Empty>::RET _Tcond;
+    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
+    static void init(_Tcond &cond);
+    static void broadcast(_Tcond &cond);
+    static void wait(_Tcond &cond, _Tmutex &mutex);
+    static void destroy(_Tcond &cond);
+  };
+
+  /* Here is the condition counter. It is a counter which starts at 0,
+     and can be increased and decreased. A thread can wait until the
+     counter is changed, this is implemented by condition. After the wait
+     is done, another (or the same) thread, by calling |waitForChange|
+     waits for another change. This can be dangerous, since it is possible
+     to wait for a change which will not happen, because all the threads
+     which can cause the change (by increase of decrease) might had
+     finished. */
+
+  template <int thread_impl>
+  class condition_counter
+  {
+    typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
+    typedef typename cond_traits<thread_impl>::_Tcond _Tcond;
+    int counter;
+    _Tmutex mut;
+    _Tcond cond;
+    bool changed;
+  public:
+    /* We initialize the counter to 0, and |changed| flag to |true|, since
+       the counter was change from undefined value to 0. */
+
+    condition_counter()
+      : counter(0), changed(true)
+    {
+      mutex_traits<thread_impl>::init(mut);
+      cond_traits<thread_impl>::init(cond);
+    }
+
+    /* In destructor, we only release the resources associated with the
+       condition. */
+
+    ~condition_counter()
+    {
+      cond_traits<thread_impl>::destroy(cond);
+    }
+
+    /* When increasing, we lock the mutex, advance the counter, remember it
+       is changed, broadcast, and release the mutex. */
+
+    void
+    increase()
+    {
+      mutex_traits<thread_impl>::lock(mut);
+      counter++;
+      changed = true;
+      cond_traits<thread_impl>::broadcast(cond);
+      mutex_traits<thread_impl>::unlock(mut);
+    }
+
+    /* Same as increase. */
+    void
+    decrease()
+    {
+      mutex_traits<thread_impl>::lock(mut);
+      counter--;
+      changed = true;
+      cond_traits<thread_impl>::broadcast(cond);
+      mutex_traits<thread_impl>::unlock(mut);
+    }
+
+    /* We lock the mutex, and if there was a change since the last call of
+       |waitForChange|, we return immediately, otherwise we wait for the
+       change. The mutex is released. */
+
+    int
+    waitForChange()
+    {
+      mutex_traits<thread_impl>::lock(mut);
+      if (!changed)
+        {
+          cond_traits<thread_impl>::wait(cond, mut);
+        }
+      changed = false;
+      int res = counter;
+      mutex_traits<thread_impl>::unlock(mut);
+      return res;
+    }
+  };
+
+  /* The detached thread is the same as joinable |thread|. We only
+     re-implement |run| method to call |thread_traits::detach_run|, and add
+     a method which installs a counter. The counter is increased and
+     decreased on the body of the new thread. */
+
+  template <int thread_impl>
+  class detach_thread : public thread<thread_impl>
+  {
+  public:
+    condition_counter<thread_impl> *counter;
+    detach_thread() : counter(NULL)
+    {
+    }
+    void
+    installCounter(condition_counter<thread_impl> *c)
+    {
+      counter = c;
+    }
+    void
+    run()
+    {
+      thread_traits<thread_impl>::detach_run(this);
+    }
+  };
+
+  /* The detach thread group is (by interface) the same as
+     |thread_group|. The extra thing we have here is the |counter|. The
+     implementation of |insert| and |run| is different. */
+
+  template<int thread_impl>
+  class detach_thread_group
+  {
+    typedef thread_traits<thread_impl> _Ttraits;
+    typedef cond_traits<thread_impl> _Ctraits;
+    typedef detach_thread<thread_impl> _Ctype;
+    list<_Ctype *> tlist;
+    typedef typename list<_Ctype *>::iterator iterator;
+    condition_counter<thread_impl> counter;
+  public:
+    static int max_parallel_threads;
+
+    /* When inserting, the counter is installed to the thread. */
+    void
+    insert(_Ctype *c)
+    {
+      tlist.push_back(c);
+      c->installCounter(&counter);
+    }
+
+    /* The destructor is clear. */
+    ~detach_thread_group()
+    {
+      while (!tlist.empty())
+        {
+          delete tlist.front();
+          tlist.pop_front();
+        }
+    }
+
+    /* We cycle through all threads in the group, and in each cycle we wait
+       for the change in the |counter|. If the counter indicates less than
+       maximum parallel threads running, then a new thread is run, and the
+       iterator in the list is moved.
+
+       At the end we have to wait for all thread to finish. */
+
+    void
+    run()
+    {
+      int mpt = max_parallel_threads;
+      iterator it = tlist.begin();
+      while (it != tlist.end())
+        {
+          if (counter.waitForChange() < mpt)
+            {
+              counter.increase();
+              (*it)->run();
+              ++it;
+            }
+        }
+      while (counter.waitForChange() > 0)
+        {
+        }
+    }
+  };
+
+#ifdef HAVE_PTHREAD
+  // POSIX thread specializations
+  /* Here we only define the specializations for POSIX threads. Then we
+     define the macros. Note that the |PosixSynchro| class construct itself
+     from the static map defined in {\tt sthreads.cpp}. */
+  typedef detach_thread<posix> PosixThread;
+  typedef detach_thread_group<posix> PosixThreadGroup;
+  typedef synchro<posix> posix_synchro;
+  class PosixSynchro : public posix_synchro
+  {
+  public:
+    PosixSynchro(const void *c, const char *id);
+  };
+
+# define THREAD sthread::PosixThread
+# define THREAD_GROUP sthread::PosixThreadGroup
+# define SYNCHRO sthread::PosixSynchro
+
+#else
+  // No threading specializations@>=
+  /* Here we define an empty class and use it as thread and
+     mutex. |NoSynchro| class is also empty, but an empty constructor is
+     declared. The empty destructor is declared only to avoid ``unused
+     variable warning''. */
+  typedef thread<empty> NoThread;
+  typedef thread_group<empty> NoThreadGroup;
+  typedef synchro<empty> no_synchro;
+  class NoSynchro
+  {
+  public:
+    NoSynchro(const void *c, const char *id)
+    {
+    }
+    ~NoSynchro()
+    {
+    }
+  };
+
+# define THREAD sthread::NoThread
+# define THREAD_GROUP sthread::NoThreadGroup
+# define SYNCHRO sthread::NoSynchro
+
+#endif
+};
+
+#endif
--- a/dynare++/tl/cc/sthread.hweb
+++ b/dynare++/tl/cc/sthread.hweb
@ -1,625 +0,0 @@
-@q $Id: sthread.hweb 411 2005-08-11 12:26:13Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Simple threads. Start of {\tt sthreads.h} file.
-
-This file defines types making a simple interface to
-multi-threading. It follows the classical C++ idioms for traits. We
-have three sorts of traits. The first is a |thread_traits|, which make
-interface to thread functions (run, exit, create and join), the second
-is |mutex_traits|, which make interface to mutexes (create, lock,
-unlock), and third is |cond_traits|, which make interface to
-conditions (create, wait, broadcast, and destroy). At present, there
-are two implementations. The first are POSIX threads, mutexes, and
-conditions, the second is serial (no parallelization).
-
-The file provides the following interfaces templated by the types
-implementing the threading (like types |pthread_t|, and |pthread_mutex_t|
-for POSIX thread and mutex):
-\unorderedlist
-\li |thread| is a pure virtual class, which must be inherited and a
-method |operator()()| be implemented as the running code of the
-thread. This code is run as a new thread by calling |run| method.
-\li |thread_group| allows insertion of |thread|s and running all of
-them simultaneously joining them. The number of maximum parallel
-threads can be controlled. See below.
-\li |synchro| object locks a piece of code to be executed only serially
-for a given data and specified entry-point. It locks the code until it
-is destructed. So, the typical use is to create the |synchro| object
-on the stack of a function which is to be synchronized. The
-synchronization can be subjected to specific data (then a pointer can
-be passed to |synchro|'s constructor), and can be subjected to
-specific entry-point (then |const char*| is passed to the
-constructor).
-\li |detach_thread| inherits from |thread| and models a detached
-thread in contrast to |thread| which models the joinable thread.
-\li |detach_thread_group| groups the detached threads and runs them. They
-are not joined, they are synchronized by means of a counter counting
-running threads. A change of the counter is checked by waiting on an
-associated condition.
-\endunorderedlist
-
-What implementation is selected is governed (at present) by
-|HAVE_PTHREAD|. If it is defined, then POSIX threads are linked. If
-it is not defined, then serial implementation is taken. In accordance
-with this, the header file defines macros |THREAD|, |THREAD_GROUP|,
-and |SYNCHRO| as the picked specialization of |thread| (or |detach_thread|),
-|thread_group| (or |detach_thread_group|), and |synchro|.
-
-The type of implementation is controlled by |thread_impl| integer
-template parameter, this can be |posix| or |empty|.
-
-The number of maximum parallel threads is controlled via a static
-member of |thread_group| and |detach_thread_group| classes.
-
-@s _Tthread int
-@s thread_traits int
-@s thread int
-@s thread_group int
-@s detach_thread int
-@s detach_thread_group int
-@s cond_traits int
-@s condition_counter int
-@s mutex_traits int
-@s mutex_map int
-@s synchro int
-@s _Tmutex int
-@s pthread_t int
-@s pthread_mutex_t int
-@s pthread_cond_t int
-@s pthread_attr_t int
-@s IF int
-@s Then int
-@s Else int
-@s RET int
-@s thread_impl int
-
-@c
-#ifndef STHREAD_H
-#define STHREAD_H
-
-#ifdef HAVE_PTHREAD
-# include <pthread.h>
-#else
-/* Give valid types for POSIX thread types, otherwise the templates fail in empty mode.
-   Don't use typedefs because on some systems |pthread_t| and friends are typedefs even
-   without the include. */
-# define pthread_t void *
-# define pthread_mutex_t void *
-# define pthread_cond_t void *
-#endif
-
-#include <cstdio>
-#include <list>
-#include <map>
-
-namespace sthread {
-	using namespace std;
-
-	class Empty {};
-	@<classical IF template@>;
-	enum {@+ posix, empty@+};
-	template <int> class thread_traits;
-	template <int> class detach_thread;
-	@<|thread| template class declaration@>;
-	@<|thread_group| template class declaration@>;
-	@<|thread_traits| template class declaration@>;
-	@<|mutex_traits| template class declaration@>;
-	@<|mutex_map| template class declaration@>;
-	@<|synchro| template class declaration@>;
-	@<|cond_traits| template class declaration@>;
-	@<|condition_counter| template class declaration@>;
-	@<|detach_thread| template class declaration@>;
-	@<|detach_thread_group| template class declaration@>;
-#ifdef HAVE_PTHREAD
-	@<POSIX thread specializations@>;
-#else
-	@<No threading specializations@>;
-#endif
-};
-
-#endif
-
-@ Here is the classical IF template.
-@<classical IF template@>=
-template<bool condition, class Then, class Else>
-struct IF {
-	typedef Then RET;
-};
-
-template<class Then, class Else>
-struct IF<false, Then, Else> {
-	typedef Else RET;
-};
-
-
-
-@ The class of |thread| is clear. The user implements |operator()()|,
-the method |run| runs the user's code as joinable thread, |exit| kills the
-execution.
-
-@<|thread| template class declaration@>=
-template <int thread_impl>
-class thread {
-	typedef thread_traits<thread_impl> _Ttraits; 
-	typedef typename _Ttraits::_Tthread _Tthread;
-	_Tthread th;
-public:@;
-	virtual ~thread() {}
-	_Tthread& getThreadIden()
-		{@+ return th;@+}
-	const _Tthread& getThreadIden() const
-		{@+ return th;@+}
-	virtual void operator()() = 0;
-	void run()
-		{@+ _Ttraits::run(this);@+}
-	void detach_run()
-		{@+ _Ttraits::detach_run(this);@+}
-	void exit()
-		{@+ _Ttraits::exit();@+}
-};
-
-@ The |thread_group| is also clear. We allow a user to insert the
-|thread|s, and then launch |run|, which will run all the threads not
-allowing more than |max_parallel_threads| joining them at the
-end. This static member can be set from outside.
-
-@<|thread_group| template class declaration@>=
-template <int thread_impl>
-class thread_group {
-	typedef thread_traits<thread_impl> _Ttraits;
-	typedef thread<thread_impl> _Ctype;
-	list<_Ctype*> tlist;
-	typedef typename list<_Ctype*>::iterator iterator;
-public:@;
-	static int max_parallel_threads;
-	void insert(_Ctype* c)
-		{@+ tlist.push_back(c);@+}
-	@<|thread_group| destructor code@>;
-	@<|thread_group::run| code@>;
-private:@;
-	@<|thread_group::run_portion| code@>;
-};
-
-@ The thread group class maintains list of pointers to threads. It
-takes responsibility of deallocating the threads. So we implement the
-destructor.
-@<|thread_group| destructor code@>=
-~thread_group()
-{
-	while (! tlist.empty()) {
-		delete tlist.front();
-		tlist.pop_front();
-	}
-}
-
-@ This runs a given number of threads in parallel starting from the
-given iterator. It returns the first iterator not run.
-
-@<|thread_group::run_portion| code@>=
-iterator run_portion(iterator start, int n)
-{
-	int c = 0;
-	for (iterator i = start; c < n; ++i, c++) {
-		(*i)->run();
-	}
-	iterator ret;
-	c = 0;
-	for (ret = start; c < n; ++ret, c++) {
-		_Ttraits::join(*ret);
-	}
-	return ret;
-}
-
-
-@ Here we run the threads ensuring that not more than
-|max_parallel_threads| are run in parallel. More over, we do not want
-to run a too low number of threads, since it is wasting with resource
-(if there are). Therefore, we run in parallel |max_parallel_threads|
-batches as long as the remaining threads are greater than the double
-number. And then the remaining batch (less than |2*max_parallel_threads|)
-is run half by half.
-
-@<|thread_group::run| code@>=
-void run()
-{
-	int rem = tlist.size();
-	iterator pfirst = tlist.begin();
-	while (rem > 2*max_parallel_threads) {
-		pfirst = run_portion(pfirst, max_parallel_threads);
-		rem -= max_parallel_threads;
-	}
-	if (rem > max_parallel_threads) {
-		pfirst = run_portion(pfirst, rem/2);
-		rem -= rem/2;
-	}
-	run_portion(pfirst, rem);
-}
-
-
-
-
-@ Clear. We have only |run|, |detach_run|, |exit| and |join|, since
-this is only a simple interface.
-
-@<|thread_traits| template class declaration@>=
-template <int thread_impl>
-struct thread_traits {
-	typedef typename IF<thread_impl==posix, pthread_t, Empty>::RET _Tthread;
-	typedef thread<thread_impl> _Ctype;
-	typedef detach_thread<thread_impl> _Dtype;
-	static void run(_Ctype* c);
-	static void detach_run(_Dtype* c);
-	static void exit();
-	static void join(_Ctype* c);
-};
-
-@ Clear. We have only |init|, |lock|, and |unlock|.
-@<|mutex_traits| template class declaration@>=
-struct ltmmkey;
-typedef pair<const void*, const char*> mmkey;
-@#
-template <int thread_impl>
-struct mutex_traits {
-	typedef typename IF<thread_impl==posix, pthread_mutex_t, Empty>::RET _Tmutex;
-	typedef map<mmkey, pair<_Tmutex, int>, ltmmkey> mutex_int_map;
-	static void init(_Tmutex& m);
-	static void lock(_Tmutex& m);
-	static void unlock(_Tmutex& m);
-};
-
-@ Here we define a map of mutexes keyed by a pair of address, and a
-string. A purpose of the map of mutexes is that, if synchronizing, we
-need to publish mutexes locking some piece of codes (characterized by
-the string) accessing the data (characterized by the pointer). So, if
-any thread needs to pass a |synchro| object, it creates its own with
-the same address and string, and must look to some public storage to
-unlock the mutex. If the |synchro| object is created for the first
-time, the mutex is created and inserted to the map. We count the
-references to the mutex (number of waiting threads) to know, when it
-is save to remove the mutex from the map. This is the only purpose of
-counting the references. Recall, that the mutex is keyed by an address
-of the data, and without removing, the number of mutexes would only
-grow.
-
-The map itself needs its own mutex to avoid concurrent insertions and
-deletions.
-
-@s mutex_int_map int
-
-@<|mutex_map| template class declaration@>=
-struct ltmmkey {
-	bool operator()(const mmkey& k1, const mmkey& k2) const
-		{return k1.first < k2.first ||
-			 (k1.first == k2.first && strcmp(k1.second, k2.second) < 0);} 
-};
-@#
-template <int thread_impl>
-class mutex_map
-	: public mutex_traits<thread_impl>::mutex_int_map
-{
-	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
-	typedef mutex_traits<thread_impl> _Mtraits;
-	typedef pair<_Tmutex, int> mmval;
-	typedef map<mmkey, mmval, ltmmkey> _Tparent;
-	typedef typename _Tparent::iterator iterator;
-	typedef typename _Tparent::value_type _mvtype;
-	_Tmutex m;
-public:@;
-	mutex_map()
-		{@+ _Mtraits::init(m);@+}
-	void insert(const void* c, const char* id, const _Tmutex& m)
-		{@+ _Tparent::insert(_mvtype(mmkey(c,id), mmval(m,0)));@+}
-	bool check(const void* c, const char* id) const
-		{@+ return _Tparent::find(mmkey(c, id)) != _Tparent::end();@+}
-	@<|mutex_map::get| code@>;
-	@<|mutex_map::remove| code@>;
-	void lock_map()
-		{@+ _Mtraits::lock(m);@+}
-	void unlock_map()
-		{@+ _Mtraits::unlock(m);@+}
-
-};
-
-@ This returns a pointer to the pair of mutex and count reference number.
-@<|mutex_map::get| code@>=
-mmval* get(const void* c, const char* id)
-{
-	iterator it = _Tparent::find(mmkey(c, id));
-	if (it == _Tparent::end())
-		return NULL;
-	return &((*it).second);
-}
-
-@ This removes unconditionally the mutex from the map regardless its
-number of references. The only user of this class should be |synchro|
-class, it implementation must not remove referenced mutex.
-
-@<|mutex_map::remove| code@>=
-void remove(const void* c, const char* id)
-{
-	iterator it = _Tparent::find(mmkey(c, id));
-	if (it != _Tparent::end())
-		this->erase(it);
-}
-
-@ This is the |synchro| class. The constructor of this class tries to
-lock a mutex for a particular address (identification of data) and
-string (identification of entry-point). If the mutex is already
-locked, it waits until it is unlocked and then returns. The destructor
-releases the lock. The typical use is to construct the object on the
-stacked of the code being synchronized.
-
-@<|synchro| template class declaration@>=
-template <int thread_impl>
-class synchro {
-	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
-	typedef mutex_traits<thread_impl> _Mtraits;
-public:@;
-	typedef mutex_map<thread_impl> mutex_map_t; 
-private:@;
-	const void* caller;
-	const char* iden;
-	mutex_map_t& mutmap;
-public:@;
-	synchro(const void* c, const char* id, mutex_map_t& mmap)
-		: caller(c), iden(id), mutmap(mmap)
-		{@+ lock();@+}
-	~synchro()
-		{@+ unlock();@+}
-private:@;
-	@<|synchro::lock| code@>;
-	@<|synchro::unlock| code@>;
-};
-
-@ The |lock| function acquires the mutex in the map. First it tries to
-get an exclusive access to the map. Then it increases a number of
-references of the mutex (if it does not exists, it inserts it). Then
-unlocks the map, and finally tries to lock the mutex of the map.
-   
-@<|synchro::lock| code@>=
-void lock() {
-	mutmap.lock_map();
-	if (!mutmap.check(caller, iden)) {
-		_Tmutex mut;
-		_Mtraits::init(mut);
-		mutmap.insert(caller, iden, mut);
-	}
-	mutmap.get(caller, iden)->second++;
-	mutmap.unlock_map();
-	_Mtraits::lock(mutmap.get(caller, iden)->first);
-}
-
-@ The |unlock| function first locks the map. Then releases the lock,
-and decreases a number of references. If it is zero, it removes the
-mutex.
-
-@<|synchro::unlock| code@>=
-void unlock() {
-	mutmap.lock_map();
-	if (mutmap.check(caller, iden)) {
-		_Mtraits::unlock(mutmap.get(caller, iden)->first);
-		mutmap.get(caller, iden)->second--;
-		if (mutmap.get(caller, iden)->second == 0)
-			mutmap.remove(caller, iden);
-	}
-	mutmap.unlock_map();
-}
-
-@ These are traits for conditions. We need |init|, |broadcast|, |wait|
-and |destroy|.
-
-@<|cond_traits| template class declaration@>=
-template <int thread_impl>
-struct cond_traits {
-	typedef typename IF<thread_impl==posix, pthread_cond_t, Empty>::RET _Tcond;
-	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
-	static void init(_Tcond& cond);
-	static void broadcast(_Tcond& cond);
-	static void wait(_Tcond& cond, _Tmutex& mutex);
-	static void destroy(_Tcond& cond);
-};
-
-@ Here is the condition counter. It is a counter which starts at 0,
-and can be increased and decreased. A thread can wait until the
-counter is changed, this is implemented by condition. After the wait
-is done, another (or the same) thread, by calling |waitForChange|
-waits for another change. This can be dangerous, since it is possible
-to wait for a change which will not happen, because all the threads
-which can cause the change (by increase of decrease) might had
-finished.
-
-@<|condition_counter| template class declaration@>=
-template <int thread_impl>
-class condition_counter {
-	typedef typename mutex_traits<thread_impl>::_Tmutex _Tmutex;
-	typedef typename cond_traits<thread_impl>::_Tcond _Tcond;
-	int counter;
-	_Tmutex mut;
-	_Tcond cond;
-	bool changed;
-public:@;
-	@<|condition_counter| constructor code@>;
-	@<|condition_counter| destructor code@>;
-	@<|condition_counter::increase| code@>;
-	@<|condition_counter::decrease| code@>;
-	@<|condition_counter::waitForChange| code@>;
-};
-
-@ We initialize the counter to 0, and |changed| flag to |true|, since
-the counter was change from undefined value to 0.
-
-@<|condition_counter| constructor code@>=
-condition_counter()
-	: counter(0), changed(true)
-{
-	mutex_traits<thread_impl>::init(mut);
-	cond_traits<thread_impl>::init(cond);
-}
-
-@ In destructor, we only release the resources associated with the
-condition.
-
-@<|condition_counter| destructor code@>=
-~condition_counter()
-{
-	cond_traits<thread_impl>::destroy(cond);
-}
-
-@ When increasing, we lock the mutex, advance the counter, remember it
-is changed, broadcast, and release the mutex.
-
-@<|condition_counter::increase| code@>=
-void increase()
-{
-	mutex_traits<thread_impl>::lock(mut);
-	counter++;
-	changed = true;
-	cond_traits<thread_impl>::broadcast(cond);
-	mutex_traits<thread_impl>::unlock(mut);
-}
-
-@ Same as increase.
-@<|condition_counter::decrease| code@>=
-void decrease()
-{
-	mutex_traits<thread_impl>::lock(mut);
-	counter--;
-	changed = true;
-	cond_traits<thread_impl>::broadcast(cond);
-	mutex_traits<thread_impl>::unlock(mut);
-}
-
-@ We lock the mutex, and if there was a change since the last call of
-|waitForChange|, we return immediately, otherwise we wait for the
-change. The mutex is released.
-
-@<|condition_counter::waitForChange| code@>=
-int waitForChange()
-{
-	mutex_traits<thread_impl>::lock(mut);
-	if (!changed) {
-		cond_traits<thread_impl>::wait(cond, mut);
-	}
-	changed = false;
-	int res = counter;
-	mutex_traits<thread_impl>::unlock(mut);
-	return res;
-}
-
-
-@ The detached thread is the same as joinable |thread|. We only
-re-implement |run| method to call |thread_traits::detach_run|, and add
-a method which installs a counter. The counter is increased and
-decreased on the body of the new thread.
-
-@<|detach_thread| template class declaration@>=
-template <int thread_impl>
-class detach_thread : public thread<thread_impl> {
-public:@;
-	condition_counter<thread_impl>* counter;
-	detach_thread() : counter(NULL) {}
-	void installCounter(condition_counter<thread_impl>* c)
-		{@+ counter = c;@+}
-	void run()
-		{@+thread_traits<thread_impl>::detach_run(this);@+}
-};
-
-@ The detach thread group is (by interface) the same as
-|thread_group|. The extra thing we have here is the |counter|. The
-implementation of |insert| and |run| is different.
-
-@<|detach_thread_group| template class declaration@>=
-template<int thread_impl>
-class detach_thread_group {	
-	typedef thread_traits<thread_impl> _Ttraits;
-	typedef cond_traits<thread_impl> _Ctraits;
-	typedef detach_thread<thread_impl> _Ctype;
-	list<_Ctype *> tlist;
-	typedef typename list<_Ctype*>::iterator iterator;
-	condition_counter<thread_impl> counter;
-public:@;
-	static int max_parallel_threads;
-	@<|detach_thread_group::insert| code@>;
-	@<|detach_thread_group| destructor code@>;
-	@<|detach_thread_group::run| code@>;
-};
-
-@ When inserting, the counter is installed to the thread.
-@<|detach_thread_group::insert| code@>=
-void insert(_Ctype* c)
-{
-	tlist.push_back(c);
-	c->installCounter(&counter);
-}
-
-@ The destructor is clear.
-@<|detach_thread_group| destructor code@>=
-~detach_thread_group()
-{
-	while (!tlist.empty()) {
-		delete tlist.front();
-		tlist.pop_front();
-	}
-}
-
-@ We cycle through all threads in the group, and in each cycle we wait
-for the change in the |counter|. If the counter indicates less than
-maximum parallel threads running, then a new thread is run, and the
-iterator in the list is moved.
-
-At the end we have to wait for all thread to finish.
-
-@<|detach_thread_group::run| code@>=
-void run()
-{
-	int mpt = max_parallel_threads;
-	iterator it = tlist.begin();
-	while (it != tlist.end()) {
-		if (counter.waitForChange() < mpt) {
-			counter.increase();
-			(*it)->run();
-			++it;
-		}
-	}
-	while (counter.waitForChange() > 0) {}
-}
-
-
-@ Here we only define the specializations for POSIX threads. Then we
-define the macros. Note that the |PosixSynchro| class construct itself
-from the static map defined in {\tt sthreads.cpp}.
- 
-@<POSIX thread specializations@>=
-typedef detach_thread<posix> PosixThread;
-typedef detach_thread_group<posix> PosixThreadGroup;
-typedef synchro<posix> posix_synchro;
-class PosixSynchro : public posix_synchro {
-public:@;
-	PosixSynchro(const void* c, const char* id);
-};
-@#
-#define THREAD@, sthread::PosixThread
-#define THREAD_GROUP@, sthread::PosixThreadGroup
-#define SYNCHRO@, sthread::PosixSynchro
-
-@ Here we define an empty class and use it as thread and
-mutex. |NoSynchro| class is also empty, but an empty constructor is
-declared. The empty destructor is declared only to avoid ``unused
-variable warning''.
-
-@<No threading specializations@>=
-typedef thread<empty> NoThread;
-typedef thread_group<empty> NoThreadGroup;
-typedef synchro<empty> no_synchro;
-class NoSynchro {
-public:@;
-	NoSynchro(const void* c, const char* id) {}
-	~NoSynchro() {}
-};
-@#
-#define THREAD@, sthread::NoThread
-#define THREAD_GROUP@, sthread::NoThreadGroup
-#define SYNCHRO@, sthread::NoSynchro
-
-@ End of {\tt sthreads.h} file.
--- a/dynare++/tl/cc/symmetry.cc
+++ b/dynare++/tl/cc/symmetry.cc
@ -0,0 +1,144 @@
+// Copyright (C) 2004-2011, Ondra Kamenik
+
+#include "symmetry.hh"
+#include "permutation.hh"
+
+#include <cstdio>
+
+/* Construct symmetry as numbers of successively equal items in the sequence. */
+
+Symmetry::Symmetry(const IntSequence &s)
+  : IntSequence(s.getNumDistinct(), 0)
+{
+  int p = 0;
+  if (s.size() > 0)
+    operator[](p) = 1;
+  for (int i = 1; i < s.size(); i++)
+    {
+      if (s[i] != s[i-1])
+        p++;
+      operator[](p)++;
+    }
+}
+
+/* Find a class of the symmetry containing a given index. */
+
+int
+Symmetry::findClass(int i) const
+{
+  int j = 0;
+  int sum = 0;
+  do
+    {
+      sum += operator[](j);
+      j++;
+    }
+  while (j < size() && sum <= i);
+
+  return j-1;
+}
+
+/* The symmetry is full if it allows for any permutation of indices. It
+   means, that there is at most one non-zero index. */
+
+bool
+Symmetry::isFull() const
+{
+  int count = 0;
+  for (int i = 0; i < num(); i++)
+    if (operator[](i) != 0)
+      count++;
+  return count <= 1;
+}
+
+/* Here we construct the beginning of the |symiterator|. The first
+   symmetry index is 0. If length is 2, the second index is the
+   dimension, otherwise we create the subordinal symmetry set and its
+   beginning as subordinal |symiterator|. */
+
+symiterator::symiterator(SymmetrySet &ss)
+  : s(ss), subit(NULL), subs(NULL), end_flag(false)
+{
+  s.sym()[0] = 0;
+  if (s.size() == 2)
+    {
+      s.sym()[1] = s.dimen();
+    }
+  else
+    {
+      subs = new SymmetrySet(s, s.dimen());
+      subit = new symiterator(*subs);
+    }
+}
+
+symiterator::~symiterator()
+{
+  if (subit)
+    delete subit;
+  if (subs)
+    delete subs;
+}
+
+/* Here we move to the next symmetry. We do so only, if we are not at
+   the end. If length is 2, we increase lower index and decrease upper
+   index, otherwise we increase the subordinal symmetry. If we got to the
+   end, we recreate the subordinal symmetry set and set the subordinal
+   iterator to the beginning. At the end we test, if we are not at the
+   end. This is recognized if the lowest index exceeded the dimension. */
+
+symiterator &
+symiterator::operator++()
+{
+  if (!end_flag)
+    {
+      if (s.size() == 2)
+        {
+          s.sym()[0]++;
+          s.sym()[1]--;
+        }
+      else
+        {
+          ++(*subit);
+          if (subit->isEnd())
+            {
+              delete subit;
+              delete subs;
+              s.sym()[0]++;
+              subs = new SymmetrySet(s, s.dimen()-s.sym()[0]);
+              subit = new symiterator(*subs);
+            }
+        }
+      if (s.sym()[0] == s.dimen()+1)
+        end_flag = true;
+    }
+  return *this;
+}
+
+InducedSymmetries::InducedSymmetries(const Equivalence &e, const Symmetry &s)
+{
+  for (Equivalence::const_seqit i = e.begin(); i != e.end(); ++i)
+    {
+      push_back(Symmetry(s, *i));
+    }
+}
+
+// |InducedSymmetries| permuted constructor code
+InducedSymmetries::InducedSymmetries(const Equivalence &e, const Permutation &p,
+                                     const Symmetry &s)
+{
+  for (int i = 0; i < e.numClasses(); i++)
+    {
+      Equivalence::const_seqit it = e.find(p.getMap()[i]);
+      push_back(Symmetry(s, *it));
+    }
+}
+
+/* Debug print. */
+
+void
+InducedSymmetries::print() const
+{
+  printf("Induced symmetries: %lu\n", (unsigned long) size());
+  for (unsigned int i = 0; i < size(); i++)
+    operator[](i).print();
+}
--- a/dynare++/tl/cc/symmetry.cweb
+++ b/dynare++/tl/cc/symmetry.cweb
@ -1,153 +0,0 @@
-@q Copyright (C) 2004-2011, Ondra Kamenik @>
-
-@ Start of {\tt symmetry.cpp} file.
-
-@c
-#include "symmetry.h"
-#include "permutation.h"
-
-#include <cstdio>
-
-@<|Symmetry| constructor code@>;
-@<|Symmetry::findClass| code@>;
-@<|Symmetry::isFull| code@>;
-@<|symiterator| constructor code@>;
-@<|symiterator| destructor code@>;
-@<|symiterator::operator++| code@>;
-@<|InducedSymmetries| constructor code@>;
-@<|InducedSymmetries| permuted constructor code@>;
-@<|InducedSymmetries::print| code@>;
-
-@ Construct symmetry as numbers of successively equal items in the sequence.
-
-@<|Symmetry| constructor code@>=
-Symmetry::Symmetry(const IntSequence& s)
-	: IntSequence(s.getNumDistinct(), 0)
-{
-	int p = 0;
-	if (s.size() > 0)
-		operator[](p) = 1;
-	for (int i = 1; i < s.size(); i++) {
-		if (s[i] != s[i-1])
-			p++; 
-		operator[](p)++;
-	}
-}
-
-@ Find a class of the symmetry containing a given index.
-@<|Symmetry::findClass| code@>=
-int Symmetry::findClass(int i) const
-{
-	int j = 0;
-	int sum = 0;
-	do {
-		sum += operator[](j);
-		j++;
-	} while (j < size() && sum <= i);
-
-	return j-1;
-}
-
-@ The symmetry is full if it allows for any permutation of indices. It
-means, that there is at most one non-zero index.
-
-@<|Symmetry::isFull| code@>=
-bool Symmetry::isFull() const
-{
-	int count = 0;
-	for (int i = 0; i < num(); i++)
-		if (operator[](i) != 0)
-			count++;
-	return count <=1;
-}
-
-
-@ Here we construct the beginning of the |symiterator|. The first
-symmetry index is 0. If length is 2, the second index is the
-dimension, otherwise we create the subordinal symmetry set and its
-beginning as subordinal |symiterator|.
-
-@<|symiterator| constructor code@>=
-symiterator::symiterator(SymmetrySet& ss)
-	: s(ss), subit(NULL), subs(NULL), end_flag(false)
-{
-	s.sym()[0] = 0;
-	if (s.size() == 2) {
-		s.sym()[1] = s.dimen();
-	} else {
-		subs = new SymmetrySet(s, s.dimen());
-		subit = new symiterator(*subs);
-	}
-}
-
-
-@ 
-@<|symiterator| destructor code@>=
-symiterator::~symiterator( )
-{
-	if (subit)
-		delete subit;
-	if (subs)
-		delete subs;
-}
-
-@ Here we move to the next symmetry. We do so only, if we are not at
-the end. If length is 2, we increase lower index and decrease upper
-index, otherwise we increase the subordinal symmetry. If we got to the
-end, we recreate the subordinal symmetry set and set the subordinal
-iterator to the beginning. At the end we test, if we are not at the
-end. This is recognized if the lowest index exceeded the dimension.
-
-@<|symiterator::operator++| code@>=
-symiterator& symiterator::operator++()
-{
-	if (!end_flag) {
-		if (s.size() == 2) {
-			s.sym()[0]++;
-			s.sym()[1]--;
-		} else {
-			++(*subit);
-			if (subit->isEnd()) {
-				delete subit;
-				delete subs;
-				s.sym()[0]++;
-				subs = new SymmetrySet(s, s.dimen()-s.sym()[0]);
-				subit = new symiterator(*subs);
-			}
-		}
-		if (s.sym()[0] == s.dimen()+1)
-			end_flag=true;
-	}
-	return *this;
-}
-
-@ 
-@<|InducedSymmetries| constructor code@>=
-InducedSymmetries::InducedSymmetries(const Equivalence& e, const Symmetry& s)
-{
-	for (Equivalence::const_seqit i = e.begin(); i != e.end(); ++i) {
-		push_back(Symmetry(s, *i));
-	}
-}
-
-@ 
-@<|InducedSymmetries| permuted constructor code@>=
-InducedSymmetries::InducedSymmetries(const Equivalence& e, const Permutation& p,
-									 const Symmetry& s)
-{
-	for (int i = 0; i < e.numClasses(); i++) {
-		Equivalence::const_seqit it = e.find(p.getMap()[i]);
-		push_back(Symmetry(s, *it));
-	}
-}
-
-@ Debug print.
-@<|InducedSymmetries::print| code@>=
-void InducedSymmetries::print() const
-{
-	printf("Induced symmetries: %lu\n", (unsigned long) size());
-	for (unsigned int i = 0; i < size(); i++)
-		operator[](i).print();
-}
-
-@ End of {\tt symmetry.cpp} file.
--- a/dynare++/tl/cc/symmetry.hh
+++ b/dynare++/tl/cc/symmetry.hh
@ -0,0 +1,227 @@
+// Copyright 2004, Ondra Kamenik
+
+// Symmetry.
+
+/* Symmetry is an abstraction for a term of the form $y^3u^2$. It manages
+   only indices, not the variable names. So if one uses this
+   abstraction, he must keep in mind that $y$ is the first, and $u$ is
+   the second.
+
+   In fact, the symmetry is a special case of equivalence, but its
+   implementation is much simpler. We do not need an abstraction for the
+   term $yyuyu$ but due to Green theorem we can have term $y^3u^2$. That
+   is why the equivalence is too general for our purposes.
+
+   One of a main purposes of the tensor library is to calculate something like:
+   $$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}
+   =\left[g_{y^l}\right]_{\gamma_1\ldots\gamma_l}
+   \left(\sum_{c\in M_{l,5}}
+   \prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha,\beta)}\right)$$
+   If, for instance, $l=3$, and $c=\{\{0,4\},\{1,2\},\{3\}\}$, then we
+   have to calculate
+   $$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
+   \left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
+   \left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
+   \left[g_u\right]^{\gamma_3}_{\beta_2}
+   $$
+
+   We must be able to calculate a symmetry induced by symmetry $y^2u^3$
+   and by an equivalence class from equivalence $c$. For equivalence
+   class $\{0,4\}$ the induced symmetry is $yu$, since we pick first and
+   fifth variable from $y^2u^3$. For a given outer symmetry, the class
+   |InducedSymmetries| does this for all classes of a given equivalence.
+
+   We need also to cycle through all possible symmetries yielding the
+   given dimension. For this purpose we define classes |SymmetrySet| and
+   |symiterator|.
+
+   The symmetry is implemented as |IntSequence|, in fact, it inherits
+   from it. */
+
+#ifndef SYMMETRY_H
+#define SYMMETRY_H
+
+#include "equivalence.hh"
+#include "int_sequence.hh"
+
+#include <list>
+#include <vector>
+
+/* Clear. The method |isFull| returns true if and only if the symmetry
+   allows for any permutation of indices. */
+
+class Symmetry : public IntSequence
+{
+public:
+  /* We provide three constructors for symmetries of the form $y^n$,
+     $y^nu^m$, $y^nu^m\sigma^k$. Also a copy constructor, and finally a
+     constructor of implied symmetry for a symmetry and an equivalence
+     class. It is already implemented in |IntSequence| so we only call
+     appropriate constructor of |IntSequence|. We also provide the
+     subsymmetry, which takes the given length of symmetry from the end.
+
+     The last constructor constructs a symmetry from an integer sequence
+     (supposed to be ordered) as a symmetry counting successively equal
+     items. For instance the sequence $(a,a,a,b,c,c,d,d,d,d)$ produces
+     symmetry $(3,1,2,4)$. */
+  Symmetry(int len, const char *dummy)
+    : IntSequence(len, 0)
+  {
+  }
+  Symmetry(int i1)
+    : IntSequence(1, i1)
+  {
+  }
+  Symmetry(int i1, int i2)
+    : IntSequence(2)
+  {
+    operator[](0) = i1; operator[](1) = i2;
+  }
+  Symmetry(int i1, int i2, int i3)
+    : IntSequence(3)
+  {
+    operator[](0) = i1;
+    operator[](1) = i2;
+    operator[](2) = i3;
+  }
+  Symmetry(int i1, int i2, int i3, int i4)
+    : IntSequence(4)
+  {
+    operator[](0) = i1;
+    operator[](1) = i2;
+    operator[](2) = i3;
+    operator[](3) = i4;
+  }
+  Symmetry(const Symmetry &s)
+    : IntSequence(s)
+  {
+  }
+  Symmetry(const Symmetry &s, const OrdSequence &cl)
+    : IntSequence(s, cl.getData())
+  {
+  }
+  Symmetry(Symmetry &s, int len)
+    : IntSequence(s, s.size()-len, s.size())
+  {
+  }
+  Symmetry(const IntSequence &s);
+
+  int
+  num() const
+  {
+    return size();
+  }
+  int
+  dimen() const
+  {
+    return sum();
+  }
+  int findClass(int i) const;
+  bool isFull() const;
+};
+
+/* The class |SymmetrySet| defines a set of symmetries of the given
+   length having given dimension. It does not store all the symmetries,
+   rather it provides a storage for one symmetry, which is changed as an
+   adjoint iterator moves.
+
+   The iterator class is |symiterator|. It is implemented
+   recursively. The iterator object, when created, creates subordinal
+   iterator, which iterates over a symmetry set whose length is one less,
+   and dimension is the former dimension. When the subordinal iterator
+   goes to its end, the superordinal iterator increases left most index in
+   the symmetry, resets the subordinal symmetry set with different
+   dimension, and iterates through the subordinal symmetry set until its
+   end, and so on. That's why we provide also |SymmetrySet| constructor
+   for construction of a subordinal symmetry set.
+
+   The typical usage of the abstractions for |SymmetrySet| and
+   |symiterator| is as follows:
+
+   \kern0.3cm
+   \centerline{|for (symiterator si(SymmetrySet(6, 4)); !si.isEnd(); ++si) {body}|}
+   \kern0.3cm
+
+   \noindent It goes through all symmetries of size 4 having dimension
+   6. One can use |*si| as the symmetry in the body. */
+
+class SymmetrySet
+{
+  Symmetry run;
+  int dim;
+public:
+  SymmetrySet(int d, int length)
+    : run(length, ""), dim(d)
+  {
+  }
+  SymmetrySet(SymmetrySet &s, int d)
+    : run(s.run, s.size()-1), dim(d)
+  {
+  }
+  int
+  dimen() const
+  {
+    return dim;
+  }
+  const Symmetry &
+  sym() const
+  {
+    return run;
+  }
+  Symmetry &
+  sym()
+  {
+    return run;
+  }
+  int
+  size() const
+  {
+    return run.size();
+  }
+};
+
+/* The logic of |symiterator| was described in |@<|SymmetrySet| class
+   declaration@>|. Here we only comment that: the class has a reference
+   to the |SymmetrySet| only to know dimension and for access of its
+   symmetry storage. Further we have pointers to subordinal |symiterator|
+   and its |SymmetrySet|. These are pointers, since the recursion ends at
+   length equal to 2, in which case these pointers are |NULL|.
+
+   The constructor creates the iterator which initializes to the first
+   symmetry (beginning). */
+
+class symiterator
+{
+  SymmetrySet &s;
+  symiterator *subit;
+  SymmetrySet *subs;
+  bool end_flag;
+public:
+  symiterator(SymmetrySet &ss);
+  ~symiterator();
+  symiterator &operator++();
+  bool
+  isEnd() const
+  {
+    return end_flag;
+  }
+  const Symmetry &
+  operator*() const
+  {
+    return s.sym();
+  }
+};
+
+/* This simple abstraction just constructs a vector of induced
+   symmetries from the given equivalence and outer symmetry. A
+   permutation might optionally permute the classes of the equivalence. */
+
+class InducedSymmetries : public vector<Symmetry>
+{
+public:
+  InducedSymmetries(const Equivalence &e, const Symmetry &s);
+  InducedSymmetries(const Equivalence &e, const Permutation &p, const Symmetry &s);
+  void print() const;
+};
+
+#endif
--- a/dynare++/tl/cc/symmetry.hweb
+++ b/dynare++/tl/cc/symmetry.hweb
@ -1,208 +0,0 @@
-@q $Id: symmetry.hweb 841 2006-07-27 14:41:11Z tamas $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Symmetry. This is {\tt symmetry.h} file
-
-Symmetry is an abstraction for a term of the form $y^3u^2$. It manages
-only indices, not the variable names. So if one uses this
-abstraction, he must keep in mind that $y$ is the first, and $u$ is
-the second.
-
-In fact, the symmetry is a special case of equivalence, but its
-implementation is much simpler. We do not need an abstraction for the
-term $yyuyu$ but due to Green theorem we can have term $y^3u^2$. That
-is why the equivalence is too general for our purposes.
-
-One of a main purposes of the tensor library is to calculate something like:
-$$\left[B_{y^2u^3}\right]_{\alpha_1\alpha_2\beta_1\beta_2\beta_3}
-=\left[g_{y^l}\right]_{\gamma_1\ldots\gamma_l}
-\left(\sum_{c\in M_{l,5}}
-\prod_{m=1}^l\left[g_{c_m}\right]^{\gamma_m}_{c_m(\alpha,\beta)}\right)$$
-If, for instance, $l=3$, and $c=\{\{0,4\},\{1,2\},\{3\}\}$, then we
-have to calculate
-$$\left[g_{y^3}\right]_{\gamma_1\gamma_2\gamma_3}
-\left[g_{yu}\right]^{\gamma_1}_{\alpha_1\beta_3}
-\left[g_{yu}\right]^{\gamma_2}_{\alpha_2\beta_1}
-\left[g_u\right]^{\gamma_3}_{\beta_2}
-$$
-
-We must be able to calculate a symmetry induced by symmetry $y^2u^3$
-and by an equivalence class from equivalence $c$. For equivalence
-class $\{0,4\}$ the induced symmetry is $yu$, since we pick first and
-fifth variable from $y^2u^3$. For a given outer symmetry, the class
-|InducedSymmetries| does this for all classes of a given equivalence.
-
-We need also to cycle through all possible symmetries yielding the
-given dimension. For this purpose we define classes |SymmetrySet| and
-|symiterator|.
-
-The symmetry is implemented as |IntSequence|, in fact, it inherits
-from it.
-
-@s Symmetry int
-@s IntSequence int
-@s SymmetrySet int
-@s symiterator int
-@s OrdSequence int
-@s InducedSymmetries int
-
-@c
-#ifndef SYMMETRY_H
-#define SYMMETRY_H
-
-#include "equivalence.h"
-#include "int_sequence.h"
-
-#include <list>
-#include <vector>
-
-@<|Symmetry| class declaration@>;
-@<|SymmetrySet| class declaration@>;
-@<|symiterator| class declaration@>;
-@<|InducedSymmetries| class declaration@>;
-
-#endif
-
-@ Clear. The method |isFull| returns true if and only if the symmetry
-allows for any permutation of indices.
-
-@<|Symmetry| class declaration@>=
-class Symmetry : public IntSequence {
-public:@/
-	@<|Symmetry| constructors@>; 
-	int num() const
-		{@+return size();@+}
-	int dimen() const
-		{@+return sum();@+}
-	int findClass(int i) const;
-	bool isFull() const;
-};
-
-@ We provide three constructors for symmetries of the form $y^n$,
-$y^nu^m$, $y^nu^m\sigma^k$. Also a copy constructor, and finally a
-constructor of implied symmetry for a symmetry and an equivalence
-class. It is already implemented in |IntSequence| so we only call
-appropriate constructor of |IntSequence|. We also provide the
-subsymmetry, which takes the given length of symmetry from the end.
-
-The last constructor constructs a symmetry from an integer sequence
-(supposed to be ordered) as a symmetry counting successively equal
-items. For instance the sequence $(a,a,a,b,c,c,d,d,d,d)$ produces
-symmetry $(3,1,2,4)$.
-
-@<|Symmetry| constructors@>=
-	Symmetry(int len, const char* dummy)
-		: IntSequence(len, 0)@+ {}
-	Symmetry(int i1)
-		: IntSequence(1, i1)@+ {}
-	Symmetry(int i1, int i2)
-		: IntSequence(2) {@+operator[](0) = i1;@+ operator[](1) = i2;@+}
-	Symmetry(int i1, int i2 ,int i3)
-		: IntSequence(3)
-		{@+
-			operator[](0) = i1;@+
-			operator[](1) = i2;@+
-			operator[](2) = i3;@+
-		}
-	Symmetry(int i1, int i2 ,int i3, int i4)
-		: IntSequence(4)
-		{@+
-			operator[](0) = i1;@+
-			operator[](1) = i2;@+
-			operator[](2) = i3;@+
-			operator[](3) = i4;@+
-		}
-	Symmetry(const Symmetry& s)
-		: IntSequence(s)@+ {}
-	Symmetry(const Symmetry& s, const OrdSequence& cl)
-		: IntSequence(s, cl.getData())@+ {}
-	Symmetry(Symmetry& s, int len)
-		: IntSequence(s, s.size()-len, s.size())@+ {}
-	Symmetry(const IntSequence& s);
-
-@ The class |SymmetrySet| defines a set of symmetries of the given
-length having given dimension. It does not store all the symmetries,
-rather it provides a storage for one symmetry, which is changed as an
-adjoint iterator moves.
-
-The iterator class is |symiterator|. It is implemented
-recursively. The iterator object, when created, creates subordinal
-iterator, which iterates over a symmetry set whose length is one less,
-and dimension is the former dimension. When the subordinal iterator
-goes to its end, the superordinal iterator increases left most index in
-the symmetry, resets the subordinal symmetry set with different
-dimension, and iterates through the subordinal symmetry set until its
-end, and so on. That's why we provide also |SymmetrySet| constructor
-for construction of a subordinal symmetry set.
-
-The typical usage of the abstractions for |SymmetrySet| and
-|symiterator| is as follows:
-
-\kern0.3cm
-\centerline{|for (symiterator si(SymmetrySet(6, 4)); !si.isEnd(); ++si) {body}|}
-\kern0.3cm
-
-\noindent It goes through all symmetries of size 4 having dimension
-6. One can use |*si| as the symmetry in the body.
-
-@<|SymmetrySet| class declaration@>=
-class SymmetrySet {
-	Symmetry run;
-	int dim;
-public:@;
-	SymmetrySet(int d, int length)
-		: run(length, ""), dim(d)@+ {}
-	SymmetrySet(SymmetrySet& s, int d)
-		: run(s.run, s.size()-1), dim(d)@+ {}
-	int dimen() const
-		{@+ return dim;@+}
-	const Symmetry& sym() const
-		{@+ return run;@+}
-	Symmetry& sym()
-		{@+ return run;@+}
-	int size() const
-		{@+ return run.size();@+}
-};
-
-@ The logic of |symiterator| was described in |@<|SymmetrySet| class
-declaration@>|. Here we only comment that: the class has a reference
-to the |SymmetrySet| only to know dimension and for access of its
-symmetry storage. Further we have pointers to subordinal |symiterator|
-and its |SymmetrySet|. These are pointers, since the recursion ends at
-length equal to 2, in which case these pointers are |NULL|.
-
-The constructor creates the iterator which initializes to the first
-symmetry (beginning).
-
-@<|symiterator| class declaration@>=
-class symiterator {
-	SymmetrySet& s;
-	symiterator* subit;
-	SymmetrySet* subs;
-	bool end_flag;
-public:@;
-	symiterator(SymmetrySet& ss);
-	~symiterator();
-	symiterator& operator++();
-	bool isEnd() const
-		{@+ return end_flag;@+}
-	const Symmetry& operator*() const
-		{@+ return s.sym();@+}
-};
-
-
-@ This simple abstraction just constructs a vector of induced
-symmetries from the given equivalence and outer symmetry. A
-permutation might optionally permute the classes of the equivalence.
-
-@<|InducedSymmetries| class declaration@>=
-class InducedSymmetries : public vector<Symmetry> {
-public:@;
-	InducedSymmetries(const Equivalence& e, const Symmetry& s);
-	InducedSymmetries(const Equivalence& e, const Permutation& p, const Symmetry& s);
-	void print() const;
-};
-
-
-
-@ End of {\tt symmetry.h} file.
--- a/dynare++/tl/cc/t_container.cc
+++ b/dynare++/tl/cc/t_container.cc
@ -0,0 +1,127 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "t_container.hh"
+#include "kron_prod.hh"
+#include "ps_tensor.hh"
+#include "pyramid_prod.hh"
+
+const int FGSContainer::num_one_time = 10;
+
+// |UGSContainer| conversion from |FGSContainer|
+UGSContainer::UGSContainer(const FGSContainer &c)
+  : TensorContainer<UGSTensor>(c.num())
+{
+  for (FGSContainer::const_iterator it = c.begin();
+       it != c.end(); ++it)
+    {
+      UGSTensor *unfolded = new UGSTensor(*((*it).second));
+      insert(unfolded);
+    }
+}
+
+/* We set |l| to dimension of |t|, this is a tensor which multiplies
+   tensors from the container from the left. Also we set |k| to a
+   dimension of the resulting tensor. We go through all equivalences on
+   |k| element set and pickup only those which have $l$ classes.
+
+   In each loop, we fetch all necessary tensors for the product to the
+   vector |ts|. Then we form Kronecker product |KronProdAll| and feed it
+   with tensors from |ts|. Then we form unfolded permuted symmetry tensor
+   |UPSTensor| as matrix product of |t| and Kronecker product |kp|. Then
+   we add the permuted data to |out|. This is done by |UPSTensor| method
+   |addTo|. */
+
+void
+UGSContainer::multAndAdd(const UGSTensor &t, UGSTensor &out) const
+{
+  int l = t.dimen();
+  int k = out.dimen();
+  const EquivalenceSet &eset = ebundle.get(k);
+
+  for (EquivalenceSet::const_iterator it = eset.begin();
+       it != eset.end(); ++it)
+    {
+      if ((*it).numClasses() == l)
+        {
+          vector<const UGSTensor *> ts
+            = fetchTensors(out.getSym(), *it);
+          KronProdAllOptim kp(l);
+          for (int i = 0; i < l; i++)
+            kp.setMat(i, *(ts[i]));
+          kp.optimizeOrder();
+          UPSTensor ups(out.getDims(), *it, t, kp);
+          ups.addTo(out);
+        }
+    }
+}
+
+// |FGSContainer| conversion from |UGSContainer|
+FGSContainer::FGSContainer(const UGSContainer &c)
+  : TensorContainer<FGSTensor>(c.num())
+{
+  for (UGSContainer::const_iterator it = c.begin();
+       it != c.end(); ++it)
+    {
+      FGSTensor *folded = new FGSTensor(*((*it).second));
+      insert(folded);
+    }
+}
+
+// |FGSContainer::multAndAdd| folded code
+/* Here we perform one step of the Faa Di Bruno operation. We call the
+   |multAndAdd| for unfolded tensor. */
+void
+FGSContainer::multAndAdd(const FGSTensor &t, FGSTensor &out) const
+{
+  UGSTensor ut(t);
+  multAndAdd(ut, out);
+}
+
+// |FGSContainer::multAndAdd| unfolded code
+/* This is the same as |@<|UGSContainer::multAndAdd| code@>|
+   but we do not construct |UPSTensor| from the Kronecker
+   product, but |FPSTensor|. */
+void
+FGSContainer::multAndAdd(const UGSTensor &t, FGSTensor &out) const
+{
+  int l = t.dimen();
+  int k = out.dimen();
+  const EquivalenceSet &eset = ebundle.get(k);
+
+  for (EquivalenceSet::const_iterator it = eset.begin();
+       it != eset.end(); ++it)
+    {
+      if ((*it).numClasses() == l)
+        {
+          vector<const FGSTensor *> ts
+            = fetchTensors(out.getSym(), *it);
+          KronProdAllOptim kp(l);
+          for (int i = 0; i < l; i++)
+            kp.setMat(i, *(ts[i]));
+          kp.optimizeOrder();
+          FPSTensor fps(out.getDims(), *it, t, kp);
+          fps.addTo(out);
+        }
+    }
+}
+
+/* This fills a given vector with integer sequences corresponding to
+   first |num| indices from interval |start| (including) to |end|
+   (excluding). If there are not |num| of such indices, the shorter vector
+   is returned. */
+Tensor::index
+FGSContainer::getIndices(int num, vector<IntSequence> &out,
+                         const Tensor::index &start,
+                         const Tensor::index &end)
+{
+  out.clear();
+  int i = 0;
+  Tensor::index run = start;
+  while (i < num && run != end)
+    {
+      out.push_back(run.getCoor());
+      i++;
+      ++run;
+    }
+  return run;
+}
--- a/dynare++/tl/cc/t_container.cweb
+++ b/dynare++/tl/cc/t_container.cweb
@ -1,138 +0,0 @@
-@q $Id: t_container.cweb 148 2005-04-19 15:12:26Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt t\_container.cpp} file.
-@s USubTensor int
-@c
-#include "t_container.h" 
-#include "kron_prod.h"
-#include "ps_tensor.h"
-#include "pyramid_prod.h"
-
-const int FGSContainer::num_one_time = 10;
-@<|UGSContainer| conversion from |FGSContainer|@>;
-@<|UGSContainer::multAndAdd| code@>;
-@<|FGSContainer| conversion from |UGSContainer|@>;
-@<|FGSContainer::multAndAdd| folded code@>;
-@<|FGSContainer::multAndAdd| unfolded code@>;
-@<|FGSContainer::getIndices| code@>;
-
-@ 
-@<|UGSContainer| conversion from |FGSContainer|@>=
-UGSContainer::UGSContainer(const FGSContainer& c)
-	: TensorContainer<UGSTensor>(c.num())
-{
-	for (FGSContainer::const_iterator it = c.begin();
-		 it != c.end(); ++it) {
-		UGSTensor* unfolded = new UGSTensor(*((*it).second));
-		insert(unfolded);
-	}
-}
-
-@ We set |l| to dimension of |t|, this is a tensor which multiplies
-tensors from the container from the left. Also we set |k| to a
-dimension of the resulting tensor. We go through all equivalences on
-|k| element set and pickup only those which have $l$ classes.
-
-In each loop, we fetch all necessary tensors for the product to the
-vector |ts|. Then we form Kronecker product |KronProdAll| and feed it
-with tensors from |ts|. Then we form unfolded permuted symmetry tensor
-|UPSTensor| as matrix product of |t| and Kronecker product |kp|. Then
-we add the permuted data to |out|. This is done by |UPSTensor| method
-|addTo|.
-
-@<|UGSContainer::multAndAdd| code@>=
-void UGSContainer::multAndAdd(const UGSTensor& t, UGSTensor& out) const
-{
-	int l = t.dimen();
-	int k = out.dimen();
-	const EquivalenceSet& eset = ebundle.get(k);
-
-	for (EquivalenceSet::const_iterator it = eset.begin();
-		 it != eset.end(); ++it) {
-		if ((*it).numClasses() == l) {
-			vector<const UGSTensor*> ts =
-				fetchTensors(out.getSym(), *it);
-			KronProdAllOptim kp(l);
-			for (int i = 0; i < l; i++)
-				kp.setMat(i, *(ts[i]));
-			kp.optimizeOrder();
-			UPSTensor ups(out.getDims(), *it, t, kp);
-			ups.addTo(out);
-		}
-	}
-}
-
-@ 
-@<|FGSContainer| conversion from |UGSContainer|@>=
-FGSContainer::FGSContainer(const UGSContainer& c)
-	: TensorContainer<FGSTensor>(c.num())
-{
-	for (UGSContainer::const_iterator it = c.begin();
-		 it != c.end(); ++it) {
-		FGSTensor* folded = new FGSTensor(*((*it).second));
-		insert(folded);
-	}
-}
-
-
-@ Here we perform one step of the Faa Di Bruno operation. We call the
-|multAndAdd| for unfolded tensor.
-@<|FGSContainer::multAndAdd| folded code@>=
-void FGSContainer::multAndAdd(const FGSTensor& t, FGSTensor& out) const
-{
-	UGSTensor ut(t);
-	multAndAdd(ut, out);
-}
-
-@ This is the same as |@<|UGSContainer::multAndAdd| code@>|
-but we do not construct |UPSTensor| from the Kronecker
-product, but |FPSTensor|.
-
-@<|FGSContainer::multAndAdd| unfolded code@>=
-void FGSContainer::multAndAdd(const UGSTensor& t, FGSTensor& out) const
-{
-	int l = t.dimen();
-	int k = out.dimen();
-	const EquivalenceSet& eset = ebundle.get(k);
-
-	for (EquivalenceSet::const_iterator it = eset.begin();
-		 it != eset.end(); ++it) {
-		if ((*it).numClasses() == l) {
-			vector<const FGSTensor*> ts =
-				fetchTensors(out.getSym(), *it);
-			KronProdAllOptim kp(l);
-			for (int i = 0; i < l; i++)
-				kp.setMat(i, *(ts[i]));
-			kp.optimizeOrder();
-			FPSTensor fps(out.getDims(), *it, t, kp);
-			fps.addTo(out);
-		}
-	}
-}
-
-
-@ This fills a given vector with integer sequences corresponding to
-first |num| indices from interval |start| (including) to |end|
-(excluding). If there are not |num| of such indices, the shorter vector
-is returned.
-
-@<|FGSContainer::getIndices| code@>=
-Tensor::index
-FGSContainer::getIndices(int num, vector<IntSequence>& out,
-						 const Tensor::index& start,
-						 const Tensor::index& end)
-{
-	out.clear();
-	int i = 0;
-	Tensor::index run = start;
-	while (i < num && run != end) {
-		out.push_back(run.getCoor());
-		i++;
-		++run;
-	}
-	return run;
-}
-
-
-@ End of {\tt t\_container.cpp} file.
--- a/dynare++/tl/cc/t_container.hh
+++ b/dynare++/tl/cc/t_container.hh
@ -0,0 +1,387 @@
+// Copyright 2004, Ondra Kamenik
+
+// Tensor containers.
+
+/* One of primary purposes of the tensor library is to perform one step
+   of the Faa Di Bruno formula:
+   $$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
+   [h_{y^l}]_{\gamma_1\ldots\gamma_l}\sum_{c\in M_{l,k}}
+   \prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
+   $$
+   where $h_{y^l}$ and $g_{s^i}$ are tensors, $M_{l,k}$ is a set of all
+   equivalences with $l$ classes of $k$ element set, $c_m$ is $m$-the
+   class of equivalence $c$, and $\vert c_m\vert$ is its
+   cardinality. Further, $c_m(\alpha)$ is a sequence of $\alpha$s picked
+   by equivalence class $c_m$.
+
+   In order to accomplish this operation, we basically need some storage
+   of all tensors of the form $\left[g_{s^i}\right]$. Note that $s$ can
+   be compound, for instance $s=[y,u]$. Then we need storage for
+   $\left[g_{y^3}\right]$, $\left[g_{y^2u}\right]$,
+   $\left[g_{yu^5}\right]$, etc.
+
+   We need an object holding all tensors of the same type. Here type
+   means an information, that coordinates of the tensors can be of type
+   $y$, or $u$. We will group only tensors, whose symmetry is described
+   by |Symmetry| class. These are only $y^2u^3$, not $yuyu^2$. So, we are
+   going to define a class which will hold tensors whose symmetries are
+   of type |Symmetry| and have the same symmetry length (number of
+   different coordinate types). Also, for each symmetry there will be at
+   most one tensor.
+
+   The class has two purposes: The first is to provide storage (insert
+   and retrieve). The second is to perform the above step of Faa Di Bruno. This is
+   going through all equivalences with $l$ classes, perform the tensor
+   product and add to the result.
+
+   We define a template class |TensorContainer|. From different
+   instantiations of the template class we will inherit to create concrete
+   classes, for example container of unfolded general symmetric
+   tensors. The one step of the Faa Di Bruno (we call it |multAndAdd|) is
+   implemented in the concrete subclasses, because the implementation
+   depends on storage. Note even, that |multAndAdd| has not a template
+   common declaration. This is because sparse tensor $h$ is multiplied by
+   folded tensors $g$ yielding folded tensor $B$, but unfolded tensor $h$
+   is multiplied by unfolded tensors $g$ yielding unfolded tensor $B$. */
+
+#ifndef T_CONTAINER_H
+#define T_CONTAINER_H
+
+#include "symmetry.hh"
+#include "gs_tensor.hh"
+#include "tl_exception.hh"
+#include "tl_static.hh"
+#include "sparse_tensor.hh"
+#include "equivalence.hh"
+#include "rfs_tensor.hh"
+#include "Vector.h"
+
+#include <map>
+#include <string>
+#include <sstream>
+
+#include <matio.h>
+
+// |ltsym| predicate
+/* We need a predicate on strict weak ordering of
+   symmetries. */
+struct ltsym
+{
+  bool
+  operator()(const Symmetry &s1, const Symmetry &s2) const
+  {
+    return s1 < s2;
+  }
+};
+
+/* Here we define the template class for tensor container. We implement
+   it as |stl::map|. It is a unique container, no two tensors with same
+   symmetries can coexist. Keys of the map are symmetries, values are
+   pointers to tensor. The class is responsible for deallocating all
+   tensors. Creation of the tensors is done outside.
+
+   The class has integer |n| as its member. It is a number of different
+   coordinate types of all contained tensors. Besides intuitive insert
+   and retrieve interface, we define a method |fetchTensors|, which for a
+   given symmetry and given equivalence calculates symmetries implied by
+   the symmetry and all equivalence classes, and fetches corresponding
+   tensors in a vector.
+
+   Also, each instance of the container has a reference to
+   |EquivalenceBundle| which allows an access to equivalences. */
+
+template<class _Ttype>
+class TensorContainer
+{
+protected:
+  typedef const _Ttype *_const_ptr;
+  typedef _Ttype *_ptr;
+  typedef map<Symmetry, _ptr, ltsym> _Map;
+  typedef typename _Map::value_type _mvtype;
+public:
+  typedef typename _Map::iterator iterator;
+  typedef typename _Map::const_iterator const_iterator;
+private:
+  int n;
+  _Map m;
+protected:
+  const EquivalenceBundle &ebundle;
+public:
+  TensorContainer(int nn)
+    : n(nn), ebundle(*(tls.ebundle))
+  {
+  }
+  /* This is just a copy constructor. This makes a hard copy of all tensors. */
+  TensorContainer(const TensorContainer<_Ttype> &c)
+    : n(c.n), m(), ebundle(c.ebundle)
+  {
+    for (const_iterator it = c.m.begin(); it != c.m.end(); ++it)
+      {
+        _Ttype *ten = new _Ttype(*((*it).second));
+        insert(ten);
+      }
+  }
+
+  // |TensorContainer| subtensor constructor
+  /* This constructor constructs a new tensor container, whose tensors
+     are in-place subtensors of the given container. */
+  TensorContainer(int first_row, int num, TensorContainer<_Ttype> &c)
+    : n(c.n), ebundle(*(tls.ebundle))
+  {
+    for (iterator it = c.m.begin(); it != c.m.end(); ++it)
+      {
+        _Ttype *t = new _Ttype(first_row, num, *((*it).second));
+        insert(t);
+      }
+  }
+
+  _const_ptr
+  get(const Symmetry &s) const
+  {
+    TL_RAISE_IF(s.num() != num(),
+                "Incompatible symmetry lookup in TensorContainer::get");
+    const_iterator it = m.find(s);
+    if (it == m.end())
+      {
+        TL_RAISE("Symmetry not found in TensorContainer::get");
+        return NULL;
+      }
+    else
+      {
+        return (*it).second;
+      }
+  }
+
+  _ptr
+  get(const Symmetry &s)
+  {
+    TL_RAISE_IF(s.num() != num(),
+                "Incompatible symmetry lookup in TensorContainer::get");
+    iterator it = m.find(s);
+    if (it == m.end())
+      {
+        TL_RAISE("Symmetry not found in TensorContainer::get");
+        return NULL;
+      }
+    else
+      {
+        return (*it).second;
+      }
+  }
+
+  bool
+  check(const Symmetry &s) const
+  {
+    TL_RAISE_IF(s.num() != num(),
+                "Incompatible symmetry lookup in TensorContainer::check");
+    const_iterator it = m.find(s);
+    return it != m.end();
+  }
+
+  void
+  insert(_ptr t)
+  {
+    TL_RAISE_IF(t->getSym().num() != num(),
+                "Incompatible symmetry insertion in TensorContainer::insert");
+    TL_RAISE_IF(check(t->getSym()),
+                "Tensor already in container in TensorContainer::insert");
+    m.insert(_mvtype(t->getSym(), t));
+    if (!t->isFinite())
+      {
+        throw TLException(__FILE__, __LINE__,  "NaN or Inf asserted in TensorContainer::insert");
+      }
+  }
+
+  void
+  remove(const Symmetry &s)
+  {
+    iterator it = m.find(s);
+    if (it != m.end())
+      {
+        _ptr t = (*it).second;
+        m.erase(it);
+        delete t;
+      }
+  }
+
+  void
+  clear()
+  {
+    while (!m.empty())
+      {
+        delete (*(m.begin())).second;
+        m.erase(m.begin());
+      }
+  }
+
+  int
+  getMaxDim() const
+  {
+    int res = -1;
+    for (const_iterator run = m.begin(); run != m.end(); ++run)
+      {
+        int dim = (*run).first.dimen();
+        if (dim > res)
+          res = dim;
+      }
+    return res;
+  }
+
+  /* Debug print. */
+  void
+  print() const
+  {
+    printf("Tensor container: nvars=%d, tensors=%D\n", n, m.size());
+    for (const_iterator it = m.begin(); it != m.end(); ++it)
+      {
+        printf("Symmetry: ");
+        (*it).first.print();
+        ((*it).second)->print();
+      }
+  }
+
+  /* Output to the MAT file. */
+  void
+  writeMat(mat_t *fd, const char *prefix) const
+  {
+    for (const_iterator it = begin(); it != end(); ++it)
+      {
+        char lname[100];
+        sprintf(lname, "%s_g", prefix);
+        const Symmetry &sym = (*it).first;
+        for (int i = 0; i < sym.num(); i++)
+          {
+            char tmp[10];
+            sprintf(tmp, "_%d", sym[i]);
+            strcat(lname, tmp);
+          }
+        ConstTwoDMatrix m(*((*it).second));
+        m.writeMat(fd, lname);
+      }
+  }
+
+  /* Output to the Memory Map. */
+  void
+  writeMMap(map<string, ConstTwoDMatrix> &mm, const string &prefix) const
+  {
+    ostringstream lname;
+    for (const_iterator it = begin(); it != end(); ++it)
+      {
+        lname.str(prefix);
+        lname << "_g";
+        const Symmetry &sym = (*it).first;
+        for (int i = 0; i < sym.num(); i++)
+          lname << "_" << sym[i];
+        mm.insert(make_pair(lname.str(), ConstTwoDMatrix(*((*it).second))));
+      }
+  }
+
+  /* Here we fetch all tensors given by symmetry and equivalence. We go
+     through all equivalence classes, calculate implied symmetry, and
+     fetch its tensor storing it in the same order to the vector. */
+
+  vector<_const_ptr>
+  fetchTensors(const Symmetry &rsym, const Equivalence &e) const
+  {
+    vector<_const_ptr> res(e.numClasses());
+    int i = 0;
+    for (Equivalence::const_seqit it = e.begin();
+         it != e.end(); ++it, i++)
+      {
+        Symmetry s(rsym, *it);
+        res[i] = get(s);
+      }
+    return res;
+  }
+
+  virtual ~TensorContainer()
+  {
+    clear();
+  }
+
+  int
+  num() const
+  {
+    return n;
+  }
+  const EquivalenceBundle &
+  getEqBundle() const
+  {
+    return ebundle;
+  }
+
+  const_iterator
+  begin() const
+  {
+    return m.begin();
+  }
+  const_iterator
+  end() const
+  {
+    return m.end();
+  }
+  iterator
+  begin()
+  {
+    return m.begin();
+  }
+  iterator
+  end()
+  {
+    return m.end();
+  }
+};
+
+/* Here is a container storing |UGSTensor|s. We declare |multAndAdd| method. */
+
+class FGSContainer;
+class UGSContainer : public TensorContainer<UGSTensor>
+{
+public:
+  UGSContainer(int nn)
+    : TensorContainer<UGSTensor>(nn)
+  {
+  }
+  UGSContainer(const UGSContainer &uc)
+    : TensorContainer<UGSTensor>(uc)
+  {
+  }
+  UGSContainer(const FGSContainer &c);
+  void multAndAdd(const UGSTensor &t, UGSTensor &out) const;
+};
+
+/* Here is a container storing |FGSTensor|s. We declare two versions of
+   |multAndAdd| method. The first works for folded $B$ and folded $h$
+   tensors, the second works for folded $B$ and unfolded $h$. There is no
+   point to do it for unfolded $B$ since the algorithm go through all the
+   indices of $B$ and calculates corresponding columns. So, if $B$ is
+   needed unfolded, it is more effective to calculate its folded version
+   and then unfold by conversion.
+
+   The static member |num_one_time| is a number of columns formed from
+   product of $g$ tensors at one time. This is subject to change, probably
+   we will have to do some tuning and decide about this number based on
+   symmetries, and dimensions in the runtime. */
+
+class FGSContainer : public TensorContainer<FGSTensor>
+{
+  static const int num_one_time;
+public:
+  FGSContainer(int nn)
+    : TensorContainer<FGSTensor>(nn)
+  {
+  }
+  FGSContainer(const FGSContainer &fc)
+    : TensorContainer<FGSTensor>(fc)
+  {
+  }
+  FGSContainer(const UGSContainer &c);
+  void multAndAdd(const FGSTensor &t, FGSTensor &out) const;
+  void multAndAdd(const UGSTensor &t, FGSTensor &out) const;
+private:
+  static Tensor::index getIndices(int num, vector<IntSequence> &out,
+                                  const Tensor::index &start,
+                                  const Tensor::index &end);
+};
+
+#endif
--- a/dynare++/tl/cc/t_container.hweb
+++ b/dynare++/tl/cc/t_container.hweb
@ -1,380 +0,0 @@
-@q $Id: t_container.hweb 2353 2009-09-03 19:22:36Z michel $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Tensor containers. Start of {\tt t\_container.h} file.
-
-One of primary purposes of the tensor library is to perform one step
-of the Faa Di Bruno formula:
-$$\left[B_{s^k}\right]_{\alpha_1\ldots\alpha_k}=
-[h_{y^l}]_{\gamma_1\ldots\gamma_l}\sum_{c\in M_{l,k}}
-\prod_{m=1}^l\left[g_{s^{\vert c_m\vert}}\right]^{\gamma_m}_{c_m(\alpha)}
-$$
-where $h_{y^l}$ and $g_{s^i}$ are tensors, $M_{l,k}$ is a set of all
-equivalences with $l$ classes of $k$ element set, $c_m$ is $m$-the
-class of equivalence $c$, and $\vert c_m\vert$ is its
-cardinality. Further, $c_m(\alpha)$ is a sequence of $\alpha$s picked
-by equivalence class $c_m$.
-
-In order to accomplish this operation, we basically need some storage
-of all tensors of the form $\left[g_{s^i}\right]$. Note that $s$ can
-be compound, for instance $s=[y,u]$. Then we need storage for
-$\left[g_{y^3}\right]$, $\left[g_{y^2u}\right]$,
-$\left[g_{yu^5}\right]$, etc.
- 
-We need an object holding all tensors of the same type. Here type
-means an information, that coordinates of the tensors can be of type
-$y$, or $u$. We will group only tensors, whose symmetry is described
-by |Symmetry| class. These are only $y^2u^3$, not $yuyu^2$. So, we are
-going to define a class which will hold tensors whose symmetries are
-of type |Symmetry| and have the same symmetry length (number of
-different coordinate types). Also, for each symmetry there will be at
-most one tensor.
-
-The class has two purposes: The first is to provide storage (insert
-and retrieve). The second is to perform the above step of Faa Di Bruno. This is
-going through all equivalences with $l$ classes, perform the tensor
-product and add to the result.
-  
-We define a template class |TensorContainer|. From different
-instantiations of the template class we will inherit to create concrete
-classes, for example container of unfolded general symmetric
-tensors. The one step of the Faa Di Bruno (we call it |multAndAdd|) is
-implemented in the concrete subclasses, because the implementation
-depends on storage. Note even, that |multAndAdd| has not a template
-common declaration. This is because sparse tensor $h$ is multiplied by
-folded tensors $g$ yielding folded tensor $B$, but unfolded tensor $h$
-is multiplied by unfolded tensors $g$ yielding unfolded tensor $B$.
-
-@c
-#ifndef T_CONTAINER_H
-#define T_CONTAINER_H
-
-#include "symmetry.h"
-#include "gs_tensor.h"
-#include "tl_exception.h"
-#include "tl_static.h"
-#include "sparse_tensor.h"
-#include "equivalence.h"
-#include "rfs_tensor.h"
-#include "Vector.h"
-
-#include <map>
-#include <string>
-#include <sstream>
-
-#include <matio.h>
-
-@<|ltsym| predicate@>;
-@<|TensorContainer| class definition@>;
-@<|UGSContainer| class declaration@>;
-@<|FGSContainer| class declaration@>;
-
-#endif
-
-@ We need a predicate on strict weak ordering of symmetries.
-@<|ltsym| predicate@>=
-struct ltsym {
-	bool operator()(const Symmetry& s1, const Symmetry& s2) const
-	{@+ return s1 < s2;@+}
-};
-
-@ Here we define the template class for tensor container. We implement
-it as |stl::map|. It is a unique container, no two tensors with same
-symmetries can coexist. Keys of the map are symmetries, values are
-pointers to tensor. The class is responsible for deallocating all
-tensors. Creation of the tensors is done outside.
-
-The class has integer |n| as its member. It is a number of different
-coordinate types of all contained tensors. Besides intuitive insert
-and retrieve interface, we define a method |fetchTensors|, which for a
-given symmetry and given equivalence calculates symmetries implied by
-the symmetry and all equivalence classes, and fetches corresponding
-tensors in a vector.
-
-Also, each instance of the container has a reference to
-|EquivalenceBundle| which allows an access to equivalences.
-
-@s _const_ptr int;
-@s _ptr int;
-@s _Map int;
-
-@<|TensorContainer| class definition@>=
-template<class _Ttype> class TensorContainer {
-protected:@;
-	typedef const _Ttype* _const_ptr;
-	typedef _Ttype* _ptr;
-	typedef map<Symmetry, _ptr, ltsym> _Map;@/
-	typedef typename _Map::value_type _mvtype;@/
-public:@;
-	typedef typename _Map::iterator iterator;@/
-	typedef typename _Map::const_iterator const_iterator;@/
-private:@;
-	int n;
-	_Map m;
-protected:@;
-	const EquivalenceBundle& ebundle;
-public:@;
-	TensorContainer(int nn)
-		: n(nn), ebundle(*(tls.ebundle)) @+ {}
-	@<|TensorContainer| copy constructor@>;
-	@<|TensorContainer| subtensor constructor@>;
-	@<|TensorContainer:get| code@>;
-	@<|TensorContainer::check| code@>;
-	@<|TensorContainer::insert| code@>;
-	@<|TensorContainer::remove| code@>;
-	@<|TensorContainer::clear| code@>;
-	@<|TensorContainer::fetchTensors| code@>;
-	@<|TensorContainer::getMaxDim| code@>;
-	@<|TensorContainer::print| code@>;
-	@<|TensorContainer::writeMat| code@>;
-	@<|TensorContainer::writeMMap| code@>;
-
-	virtual ~TensorContainer()
-		{@+ clear();@+}
-
-	@<|TensorContainer| inline methods@>;
-};
-
-@ 
-@<|TensorContainer| inline methods@>=
-	int num() const
-		{@+ return n;@+}
-	const EquivalenceBundle& getEqBundle() const
-		{@+ return ebundle;@+}
-
-	const_iterator begin() const
-		{@+ return m.begin();@+}
-	const_iterator end() const
-		{@+ return m.end();@+}
-	iterator begin()
-		{@+ return m.begin();@+}
-	iterator end()
-		{@+ return m.end();@+}
-
-@ This is just a copy constructor. This makes a hard copy of all tensors.
-@<|TensorContainer| copy constructor@>=
-TensorContainer(const TensorContainer<_Ttype>& c)
-	: n(c.n), m(), ebundle(c.ebundle)
-{
-	for (const_iterator it = c.m.begin(); it != c.m.end(); ++it) {
-		_Ttype* ten = new _Ttype(*((*it).second));
-		insert(ten);
-	}
-}
-
-@ This constructor constructs a new tensor container, whose tensors
-are in-place subtensors of the given container.
-
-@<|TensorContainer| subtensor constructor@>=
-TensorContainer(int first_row, int num, TensorContainer<_Ttype>& c)
-	: n(c.n), ebundle(*(tls.ebundle))
-{
-	for (iterator it = c.m.begin(); it != c.m.end(); ++it) {
-		_Ttype* t = new _Ttype(first_row, num, *((*it).second));
-		insert(t);
-	}
-}
-
-
-@ 
-@<|TensorContainer:get| code@>=
-_const_ptr get(const Symmetry& s) const
-{
-	TL_RAISE_IF(s.num() != num(),
-				"Incompatible symmetry lookup in TensorContainer::get");
-	const_iterator it = m.find(s);
-	if (it == m.end()) {
-		TL_RAISE("Symmetry not found in TensorContainer::get");
-		return NULL;
-	} else {
-		return (*it).second;
-	}
-}
-@#
-
-_ptr get(const Symmetry& s)
-{
-	TL_RAISE_IF(s.num() != num(),
-				"Incompatible symmetry lookup in TensorContainer::get");
-	iterator it = m.find(s);
-	if (it == m.end()) {
-		TL_RAISE("Symmetry not found in TensorContainer::get");
-		return NULL;
-	} else {
-		return (*it).second;
-	}
-}
-
-@ 
-@<|TensorContainer::check| code@>=
-bool check(const Symmetry& s) const
-{
-	TL_RAISE_IF(s.num() != num(),
-				"Incompatible symmetry lookup in TensorContainer::check");
-	const_iterator it = m.find(s);
-	return it != m.end();
-}
-
-@ 
-@<|TensorContainer::insert| code@>=
-void insert(_ptr t)
-{
-	TL_RAISE_IF(t->getSym().num() != num(),
-				"Incompatible symmetry insertion in TensorContainer::insert");
-	TL_RAISE_IF(check(t->getSym()),
-				"Tensor already in container in TensorContainer::insert");
-	m.insert(_mvtype(t->getSym(),t));
-	if (! t->isFinite()) {
-		throw TLException(__FILE__, __LINE__,  "NaN or Inf asserted in TensorContainer::insert");
-	}
-}
-
-@ 
-@<|TensorContainer::remove| code@>=
-void remove(const Symmetry& s)
-{
-	iterator it = m.find(s);
-	if (it != m.end()) {
-		_ptr t = (*it).second;
-		m.erase(it);
-		delete t;
-	}
-}
-
-
-@ 
-@<|TensorContainer::clear| code@>=
-void clear()
-{
-	while (! m.empty()) {
-		delete (*(m.begin())).second;
-		m.erase(m.begin());
-	}
-}
-
-@ 
-@<|TensorContainer::getMaxDim| code@>=
-int getMaxDim() const
-{
-	int res = -1;
-	for (const_iterator run = m.begin(); run != m.end(); ++run) {
-		int dim = (*run).first.dimen();
-		if (dim > res)
-			res = dim;
-	}
-	return res;
-}
-
-
-@ Debug print.
-@<|TensorContainer::print| code@>=
-void print() const
-{
-	printf("Tensor container: nvars=%d, tensors=%D\n", n, m.size());
-	for (const_iterator it = m.begin(); it != m.end(); ++it) {
-		printf("Symmetry: ");
-		(*it).first.print();
-		((*it).second)->print();
-	}
-}
-
-@ Output to the MAT file.
-@<|TensorContainer::writeMat| code@>=
-void writeMat(mat_t* fd, const char* prefix) const
-{
-	for (const_iterator it = begin(); it != end(); ++it) {
-		char lname[100];
-		sprintf(lname, "%s_g", prefix);
-		const Symmetry& sym = (*it).first;
-		for (int i = 0; i < sym.num(); i++) {
-			char tmp[10];
-			sprintf(tmp, "_%d", sym[i]);
-			strcat(lname, tmp);
-		}
-		ConstTwoDMatrix m(*((*it).second));
-		m.writeMat(fd, lname);
-	}
-}
-
-@ Output to the Memory Map.
-@<|TensorContainer::writeMMap| code@>=
-void writeMMap(map<string,ConstTwoDMatrix> &mm, const string &prefix) const
-{
-  ostringstream lname;
-  for (const_iterator it = begin(); it != end(); ++it) {
-    lname.str(prefix);
-    lname << "_g";
-    const Symmetry& sym = (*it).first;
-    for (int i = 0; i < sym.num(); i++)
-      lname << "_" << sym[i];
-    mm.insert(make_pair(lname.str(), ConstTwoDMatrix(*((*it).second))));
-  }
-}
-
-@ Here we fetch all tensors given by symmetry and equivalence. We go
-through all equivalence classes, calculate implied symmetry, and
-fetch its tensor storing it in the same order to the vector.
-
-@<|TensorContainer::fetchTensors| code@>=
-vector<_const_ptr>
-fetchTensors(const Symmetry& rsym, const Equivalence& e) const
-{
-	vector<_const_ptr> res(e.numClasses());
-	int i = 0;
-	for (Equivalence::const_seqit it = e.begin();
-		 it != e.end(); ++it, i++) {
-		Symmetry s(rsym, *it);
-		res[i] = get(s);
-	}
-	return res;
-}
-
-@ Here is a container storing |UGSTensor|s. We declare |multAndAdd| method.
-
-@<|UGSContainer| class declaration@>=
-class FGSContainer;
-class UGSContainer : public TensorContainer<UGSTensor> {
-public:@;
-	UGSContainer(int nn)
-		: TensorContainer<UGSTensor>(nn)@+ {}
-	UGSContainer(const UGSContainer& uc)
-		: TensorContainer<UGSTensor>(uc)@+ {}
-	UGSContainer(const FGSContainer& c);
-	void multAndAdd(const UGSTensor& t, UGSTensor& out) const;
-};
-
-
-@ Here is a container storing |FGSTensor|s. We declare two versions of
-|multAndAdd| method. The first works for folded $B$ and folded $h$
-tensors, the second works for folded $B$ and unfolded $h$. There is no
-point to do it for unfolded $B$ since the algorithm go through all the
-indices of $B$ and calculates corresponding columns. So, if $B$ is
-needed unfolded, it is more effective to calculate its folded version
-and then unfold by conversion.
-
-The static member |num_one_time| is a number of columns formed from
-product of $g$ tensors at one time. This is subject to change, probably
-we will have to do some tuning and decide about this number based on
-symmetries, and dimensions in the runtime.
-
-@s FGSContainer int
-@<|FGSContainer| class declaration@>=
-class FGSContainer : public TensorContainer<FGSTensor> {
-	static const int num_one_time;
-public:@;
-	FGSContainer(int nn)
-		: TensorContainer<FGSTensor>(nn)@+ {}
-	FGSContainer(const FGSContainer& fc)
-		: TensorContainer<FGSTensor>(fc)@+ {}
-	FGSContainer(const UGSContainer& c);
-	void multAndAdd(const FGSTensor& t, FGSTensor& out) const;
-	void multAndAdd(const UGSTensor& t, FGSTensor& out) const;
-private:@;
-	static Tensor::index
-	getIndices(int num, vector<IntSequence>& out,
-			   const Tensor::index& start,
-			   const Tensor::index& end);
-};
-
-
-@ End of {\tt t\_container.h} file.
--- a/dynare++/tl/cc/t_polynomial.cc
+++ b/dynare++/tl/cc/t_polynomial.cc
@ -0,0 +1,68 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "t_polynomial.hh"
+#include "kron_prod.hh"
+
+// |PowerProvider::getNext| unfolded code
+/* This method constructs unfolded |ut| of higher dimension, deleting
+   the previous. */
+
+const URSingleTensor &
+PowerProvider::getNext(const URSingleTensor *dummy)
+{
+  if (ut)
+    {
+      URSingleTensor *ut_new = new URSingleTensor(nv, ut->dimen()+1);
+      KronProd::kronMult(ConstVector(origv), ConstVector(ut->getData()), ut_new->getData());
+      delete ut;
+      ut = ut_new;
+    }
+  else
+    {
+      ut = new URSingleTensor(nv, 1);
+      ut->getData() = origv;
+    }
+  return *ut;
+}
+
+// |PowerProvider::getNext| folded code
+/* This method just constructs next unfolded |ut| and creates folded
+   |ft|. */
+
+const FRSingleTensor &
+PowerProvider::getNext(const FRSingleTensor *dummy)
+{
+  getNext(ut);
+  if (ft)
+    delete ft;
+  ft = new FRSingleTensor(*ut);
+  return *ft;
+}
+
+PowerProvider::~PowerProvider()
+{
+  if (ut)
+    delete ut;
+  if (ft)
+    delete ft;
+}
+
+UTensorPolynomial::UTensorPolynomial(const FTensorPolynomial &fp)
+  : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(fp.nrows(), fp.nvars())
+{
+  for (FTensorPolynomial::const_iterator it = fp.begin();
+       it != fp.end(); ++it)
+    {
+      insert(new UFSTensor(*((*it).second)));
+    }
+}
+
+FTensorPolynomial::FTensorPolynomial(const UTensorPolynomial &up)
+  : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(up.nrows(), up.nvars())
+{
+  for (UTensorPolynomial::const_iterator it = up.begin();
+       it != up.end(); ++it)
+    {
+      insert(new FFSTensor(*((*it).second)));
+    }
+}
--- a/dynare++/tl/cc/t_polynomial.cweb
+++ b/dynare++/tl/cc/t_polynomial.cweb
@ -1,80 +0,0 @@
-@q $Id: t_polynomial.cweb 1210 2007-03-19 21:38:49Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt t\_polynomial.cpp} file.
-
-@c
-#include "t_polynomial.h"
-#include "kron_prod.h"
-
-@<|PowerProvider::getNext| unfolded code@>;
-@<|PowerProvider::getNext| folded code@>;
-@<|PowerProvider| destructor code@>;
-@<|UTensorPolynomial| constructor conversion code@>;
-@<|FTensorPolynomial| constructor conversion code@>;
-
-
-@ This method constructs unfolded |ut| of higher dimension, deleting
-the previous.
-
-@<|PowerProvider::getNext| unfolded code@>=
-const URSingleTensor& PowerProvider::getNext(const URSingleTensor* dummy)
-{
-	if (ut) {
-		URSingleTensor* ut_new = new URSingleTensor(nv, ut->dimen()+1);
-		KronProd::kronMult(ConstVector(origv), ConstVector(ut->getData()), ut_new->getData());
-		delete ut;
-		ut = ut_new;
-	} else {
-		ut = new URSingleTensor(nv, 1);
-		ut->getData() = origv;
-	}
-	return *ut;
-}
-
-@ This method just constructs next unfolded |ut| and creates folded
-|ft|.
- 
-@<|PowerProvider::getNext| folded code@>=
-const FRSingleTensor& PowerProvider::getNext(const FRSingleTensor* dummy)
-{
-	getNext(ut);
-	if (ft)
-		delete ft;
-	ft = new FRSingleTensor(*ut);
-	return *ft;
-}
-
-@ 
-@<|PowerProvider| destructor code@>=
-PowerProvider::~PowerProvider()
-{
-	if (ut)
-		delete ut;
-	if (ft)
-		delete ft;
-}
-
-@ Clear.
-@<|UTensorPolynomial| constructor conversion code@>=
-UTensorPolynomial::UTensorPolynomial(const FTensorPolynomial& fp)
-	: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(fp.nrows(), fp.nvars())
-{
-	for (FTensorPolynomial::const_iterator it = fp.begin();
-		 it != fp.end(); ++it) {
-		insert(new UFSTensor(*((*it).second)));
-	}
-}
-
-@ Clear.
-@<|FTensorPolynomial| constructor conversion code@>=
-FTensorPolynomial::FTensorPolynomial(const UTensorPolynomial& up)
-	: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(up.nrows(), up.nvars())
-{
-	for (UTensorPolynomial::const_iterator it = up.begin();
-		 it != up.end(); ++it) {
-		insert(new FFSTensor(*((*it).second)));
-	}
-}
-
-@ End of {\tt t\_polynomial.cpp} file.
--- a/dynare++/tl/cc/t_polynomial.hh
+++ b/dynare++/tl/cc/t_polynomial.hh
@ -0,0 +1,536 @@
+// Copyright 2004, Ondra Kamenik
+
+// Tensor polynomial evaluation.
+
+/* We need to evaluate a tensor polynomial of the form:
+   $$
+   \left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
+   \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
+   \ldots+
+   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
+   $$
+   where $x$ is a column vector.
+
+   We have basically two options. The first is to use the formula above,
+   the second is to use a Horner-like formula:
+   $$
+   \left[\cdots\left[\left[\left[g_{x^{n-1}}\right]+
+   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_{n-1}\alpha_n}
+   [x]^{\alpha_n}\right]_{\alpha_1\ldots\alpha_{n-2}\alpha_{n-1}}
+   [x]^{\alpha_{n-1}}\right]\cdots\right]_{\alpha_1}
+   [x]^{\alpha_1}
+   $$
+
+   Alternativelly, we can put the the polynomial into a more compact form
+   $$\left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
+   \left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
+   \ldots+
+   \left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
+   = [G]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n\left[\matrix{1\cr x}\right]^{\alpha_i}
+   $$
+   Then the polynomial evaluation becomes just a matrix multiplication of the vector power.
+
+   Here we define the tensor polynomial as a container of full symmetry
+   tensors and add an evaluation methods. We have two sorts of
+   containers, folded and unfolded. For each type we declare two methods
+   implementing the above formulas. We define classes for the
+   compactification of the polynomial. The class derives from the tensor
+   and has a eval method. */
+
+#include "t_container.hh"
+#include "fs_tensor.hh"
+#include "rfs_tensor.hh"
+#include "tl_static.hh"
+
+/* Just to make the code nicer, we implement a Kronecker power of a
+   vector encapsulated in the following class. It has |getNext| method
+   which returns either folded or unfolded row-oriented single column
+   Kronecker power of the vector according to the type of a dummy
+   argument. This allows us to use the type dependent code in templates
+   below.
+
+   The implementation of the Kronecker power is that we maintain the last
+   unfolded power. If unfolded |getNext| is called, we Kronecker multiply
+   the last power with a vector and return it. If folded |getNext| is
+   called, we do the same plus we fold it.
+
+   |getNext| returns the vector for the first call (first power), the
+   second power is returned on the second call, and so on. */
+
+class PowerProvider
+{
+  Vector origv;
+  URSingleTensor *ut;
+  FRSingleTensor *ft;
+  int nv;
+public:
+  PowerProvider(const ConstVector &v)
+    : origv(v), ut(NULL), ft(NULL), nv(v.length())
+  {
+  }
+  ~PowerProvider();
+  const URSingleTensor&getNext(const URSingleTensor *dummy);
+  const FRSingleTensor&getNext(const FRSingleTensor *dummy);
+};
+
+/* The tensor polynomial is basically a tensor container which is more
+   strict on insertions. It maintains number of rows and number of
+   variables and allows insertions only of those tensors, which yield
+   these properties. The maximum dimension is maintained by |insert|
+   method.
+
+   So we re-implement |insert| method and implement |evalTrad|
+   (traditional polynomial evaluation) and horner-like evaluation
+   |evalHorner|.
+
+   In addition, we implement derivatives of the polynomial and its
+   evaluation. The evaluation of a derivative is different from the
+   evaluation of the whole polynomial, simply because the evaluation of
+   the derivatives is a tensor, and the evaluation of the polynomial is a
+   vector (zero dimensional tensor). See documentation to
+   |@<|TensorPolynomial::derivative| code@>| and
+   |@<|TensorPolynomial::evalPartially| code@>| for details. */
+
+template <class _Ttype, class _TGStype, class _Stype>
+class TensorPolynomial : public TensorContainer<_Ttype>
+{
+  int nr;
+  int nv;
+  int maxdim;
+  typedef TensorContainer<_Ttype> _Tparent;
+  typedef typename _Tparent::_ptr _ptr;
+public:
+  TensorPolynomial(int rows, int vars)
+    : TensorContainer<_Ttype>(1),
+    nr(rows), nv(vars), maxdim(0)
+  {
+  }
+  TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &tp, int k)
+    : TensorContainer<_Ttype>(tp),
+    nr(tp.nr), nv(tp.nv), maxdim(0)
+  {
+    derivative(k);
+  }
+  TensorPolynomial(int first_row, int num, TensorPolynomial<_Ttype, _TGStype, _Stype> &tp)
+    : TensorContainer<_Ttype>(first_row, num, tp),
+    nr(num), nv(tp.nv), maxdim(tp.maxdim)
+  {
+  }
+
+  // |TensorPolynomial| contract constructor code@
+  /* This constructor takes a tensor polynomial
+     $$P(x,y)=\sum^m_{k=0}[g_{(xy)^k}]_{\alpha_1\ldots\alpha_k}
+     \left[\matrix{x\cr y}\right]^{\alpha_1\ldots\alpha_k}$$
+     and for a given $x$ it makes a polynomial
+     $$Q(y)=P(x,y).$$
+
+     The algorithm for each full symmetry $(xy)^k$ works with subtensors (slices) of
+     symmetry $x^iy^j$ (with $i+j=k$), and contracts these subtensors with respect to
+     $x^i$ to obtain a tensor of full symmetry $y^j$. Since the column
+     $x^i$ is calculated by |PowerProvider| we cycle for $i=1,...,m$. Then
+     we have to add everything for $i=0$.
+
+     The code works as follows: For slicing purposes we need stack sizes
+     |ss| corresponing to lengths of $x$ and $y$, and then identity |pp|
+     for unfolding a symmetry of the slice to obtain stack coordinates of
+     the slice. Then we do the calculations for $i=1,\ldots,m$ and then for
+     $i=0$. */
+
+  TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &tp, const Vector &xval)
+    : TensorContainer<_Ttype>(1),
+    nr(tp.nrows()), nv(tp.nvars() - xval.length()), maxdim(0)
+  {
+    TL_RAISE_IF(nvars() < 0,
+                "Length of xval too big in TensorPolynomial contract constructor");
+    IntSequence ss(2); ss[0] = xval.length(); ss[1] = nvars();
+    IntSequence pp(2); pp[0] = 0; pp[1] = 1;
+
+    // do contraction for all $i>0$
+    /* Here we setup the |PowerProvider|, and cycle through
+       $i=1,\ldots,m$. Within the loop we cycle through $j=0,\ldots,m-i$. If
+       there is a tensor with symmetry $(xy)^{i+j}$ in the original
+       polynomial, we make its slice with symmetry $x^iy^j$, and
+       |contractAndAdd| it to the tensor |ten| in the |this| polynomial with
+       a symmetry $y^j$.
+
+       Note three things: First, the tensor |ten| is either created and put
+       to |this| container or just got from the container, this is done in
+       |@<initialize |ten| of dimension |j|@>|. Second, the contribution to
+       the |ten| tensor must be multiplied by $\left(\matrix{i+j\cr
+       j}\right)$, since there are exactly that number of slices of
+       $(xy)^{i+j}$ of the symmetry $x^iy^j$ and all must be added. Third,
+       the tensor |ten| is fully symmetric and |_TGStype::contractAndAdd|
+       works with general symmetry, that is why we have to in-place convert
+       fully syummetric |ten| to a general symmetry tensor. */
+    PowerProvider pwp(xval);
+    for (int i = 1; i <= tp.maxdim; i++)
+      {
+        const _Stype &xpow = pwp.getNext((const _Stype *) NULL);
+        for (int j = 0; j <= tp.maxdim-i; j++)
+          {
+            if (tp.check(Symmetry(i+j)))
+              {
+                // initialize |ten| of dimension |j|
+                /* The pointer |ten| is either a new tensor or got from |this| container. */
+                _Ttype *ten;
+                if (_Tparent::check(Symmetry(j)))
+                  {
+                    ten = _Tparent::get(Symmetry(j));
+                  }
+                else
+                  {
+                    ten = new _Ttype(nrows(), nvars(), j);
+                    ten->zeros();
+                    insert(ten);
+                  }
+
+                Symmetry sym(i, j);
+                IntSequence coor(sym, pp);
+                _TGStype slice(*(tp.get(Symmetry(i+j))), ss, coor, TensorDimens(sym, ss));
+                slice.mult(Tensor::noverk(i+j, j));
+                _TGStype tmp(*ten);
+                slice.contractAndAdd(0, tmp, xpow);
+              }
+          }
+      }
+
+    // do contraction for $i=0$
+    /* This is easy. The code is equivalent to code |@<do contraction for
+       all $i>0$@>| as for $i=0$. The contraction here takes a form of a
+       simple addition. */
+    for (int j = 0; j <= tp.maxdim; j++)
+      {
+        if (tp.check(Symmetry(j)))
+          {
+
+            // initialize |ten| of dimension |j|
+            /* Same code as above */
+            _Ttype *ten;
+            if (_Tparent::check(Symmetry(j)))
+              {
+                ten = _Tparent::get(Symmetry(j));
+              }
+            else
+              {
+                ten = new _Ttype(nrows(), nvars(), j);
+                ten->zeros();
+                insert(ten);
+              }
+
+            Symmetry sym(0, j);
+            IntSequence coor(sym, pp);
+            _TGStype slice(*(tp.get(Symmetry(j))), ss, coor, TensorDimens(sym, ss));
+            ten->add(1.0, slice);
+          }
+      }
+  }
+
+  TensorPolynomial(const TensorPolynomial &tp)
+    : TensorContainer<_Ttype>(tp), nr(tp.nr), nv(tp.nv), maxdim(tp.maxdim)
+  {
+  }
+  int
+  nrows() const
+  {
+    return nr;
+  }
+  int
+  nvars() const
+  {
+    return nv;
+  }
+
+  /* Here we cycle up to the maximum dimension, and if a tensor exists in
+     the container, then we multiply it with the Kronecker power of the
+     vector supplied by |PowerProvider|. */
+
+  void
+  evalTrad(Vector &out, const ConstVector &v) const
+  {
+    if (_Tparent::check(Symmetry(0)))
+      out = _Tparent::get(Symmetry(0))->getData();
+    else
+      out.zeros();
+
+    PowerProvider pp(v);
+    for (int d = 1; d <= maxdim; d++)
+      {
+        const _Stype &p = pp.getNext((const _Stype *) NULL);
+        Symmetry cs(d);
+        if (_Tparent::check(cs))
+          {
+            const _Ttype *t = _Tparent::get(cs);
+            t->multaVec(out, p.getData());
+          }
+      }
+  }
+
+  /* Here we construct by contraction |maxdim-1| tensor first, and then
+     cycle. The code is clear, the only messy thing is |new| and |delete|. */
+
+  void
+  evalHorner(Vector &out, const ConstVector &v) const
+  {
+    if (_Tparent::check(Symmetry(0)))
+      out = _Tparent::get(Symmetry(0))->getData();
+    else
+      out.zeros();
+
+    if (maxdim == 0)
+      return;
+
+    _Ttype *last;
+    if (maxdim == 1)
+      last = new _Ttype(*(_Tparent::get(Symmetry(1))));
+    else
+      last = new _Ttype(*(_Tparent::get(Symmetry(maxdim))), v);
+    for (int d = maxdim-1; d >= 1; d--)
+      {
+        Symmetry cs(d);
+        if (_Tparent::check(cs))
+          {
+            const _Ttype *nt = _Tparent::get(cs);
+            last->add(1.0, ConstTwoDMatrix(*nt));
+          }
+        if (d > 1)
+          {
+            _Ttype *new_last = new _Ttype(*last, v);
+            delete last;
+            last = new_last;
+          }
+      }
+    last->multaVec(out, v);
+    delete last;
+  }
+
+  /* Before a tensor is inserted, we check for the number of rows, and
+     number of variables. Then we insert and update the |maxdim|. */
+
+  void
+  insert(_ptr t)
+  {
+    TL_RAISE_IF(t->nrows() != nr,
+                "Wrong number of rows in TensorPolynomial::insert");
+    TL_RAISE_IF(t->nvar() != nv,
+                "Wrong number of variables in TensorPolynomial::insert");
+    TensorContainer<_Ttype>::insert(t);
+    if (maxdim < t->dimen())
+      maxdim = t->dimen();
+  }
+
+  /* The polynomial takes the form
+     $$\sum_{i=0}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
+     \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_i},$$ where
+     $\left[g_{y^i}\right]$ are $i$-order derivatives of the polynomial. We
+     assume that ${1\over i!}\left[g_{y^i}\right]$ are items in the tensor
+     container.  This method differentiates the polynomial by one order to
+     yield:
+     $$\sum_{i=1}^n{1\over i!}\left[i\cdot g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
+     \left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_{i-1}},$$
+     where $\left[i\cdot{1\over i!}\cdot g_{y^i}\right]$ are put to the container.
+
+     A polynomial can be derivative of some order, and the order cannot be
+     recognized from the object. That is why we need to input the order. */
+
+  void
+  derivative(int k)
+  {
+    for (int d = 1; d <= maxdim; d++)
+      {
+        if (_Tparent::check(Symmetry(d)))
+          {
+            _Ttype *ten = _Tparent::get(Symmetry(d));
+            ten->mult((double) max((d-k), 0));
+          }
+      }
+  }
+
+  /* Now let us suppose that we have an |s| order derivative of a
+     polynomial whose $i$ order derivatives are $\left[g_{y^i}\right]$, so
+     we have
+     $$\sum_{i=s}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
+     \prod_{k=1}^{i-s}\left[y\right]^{\alpha_k},$$
+     where ${1\over i!}\left[g_{y^i}\right]$ are tensors in the container.
+
+     This methods performs this evaluation. The result is an |s| dimensional
+     tensor. Note that when combined with the method |derivative|, they
+     evaluate a derivative of some order. For example a sequence of calls
+     |g.derivative(0)|, |g.derivative(1)| and |der=g.evalPartially(2, v)|
+     calculates $2!$ multiple of the second derivative of |g| at |v|. */
+
+  _Ttype *
+  evalPartially(int s, const ConstVector &v)
+  {
+    TL_RAISE_IF(v.length() != nvars(),
+                "Wrong length of vector for TensorPolynomial::evalPartially");
+
+    _Ttype *res = new _Ttype(nrows(), nvars(), s);
+    res->zeros();
+
+    if (_Tparent::check(Symmetry(s)))
+      res->add(1.0, *(_Tparent::get(Symmetry(s))));
+
+    for (int d = s+1; d <= maxdim; d++)
+      {
+        if (_Tparent::check(Symmetry(d)))
+          {
+            const _Ttype &ltmp = *(_Tparent::get(Symmetry(d)));
+            _Ttype *last = new _Ttype(ltmp);
+            for (int j = 0; j < d - s; j++)
+              {
+                _Ttype *newlast = new _Ttype(*last, v);
+                delete last;
+                last = newlast;
+              }
+            res->add(1.0, *last);
+            delete last;
+          }
+      }
+
+    return res;
+  }
+};
+
+/* This just gives a name to unfolded tensor polynomial. */
+
+class FTensorPolynomial;
+class UTensorPolynomial : public TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>
+{
+public:
+  UTensorPolynomial(int rows, int vars)
+    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(rows, vars)
+  {
+  }
+  UTensorPolynomial(const UTensorPolynomial &up, int k)
+    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(up, k)
+  {
+  }
+  UTensorPolynomial(const FTensorPolynomial &fp);
+  UTensorPolynomial(const UTensorPolynomial &tp, const Vector &xval)
+    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(tp, xval)
+  {
+  }
+  UTensorPolynomial(int first_row, int num, UTensorPolynomial &tp)
+    : TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(first_row, num, tp)
+  {
+  }
+};
+
+/* This just gives a name to folded tensor polynomial. */
+
+class FTensorPolynomial : public TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>
+{
+public:
+  FTensorPolynomial(int rows, int vars)
+    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(rows, vars)
+  {
+  }
+  FTensorPolynomial(const FTensorPolynomial &fp, int k)
+    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fp, k)
+  {
+  }
+  FTensorPolynomial(const UTensorPolynomial &up);
+  FTensorPolynomial(const FTensorPolynomial &tp, const Vector &xval)
+    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(tp, xval)
+  {
+  }
+  FTensorPolynomial(int first_row, int num, FTensorPolynomial &tp)
+    : TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(first_row, num, tp)
+  {
+  }
+};
+
+/* The compact form of |TensorPolynomial| is in fact a full symmetry
+   tensor, with the number of variables equal to the number of variables
+   of the polynomial plus 1 for $1$. */
+
+template <class _Ttype, class _TGStype, class _Stype>
+class CompactPolynomial : public _Ttype
+{
+public:
+  /* This constructor copies matrices from the given tensor polynomial to
+     the appropriate location in this matrix. It creates a dummy tensor
+     |dum| with two variables (one corresponds to $1$, the other to
+     $x$). The index goes through this dummy tensor and the number of
+     columns of the folded/unfolded general symmetry tensor corresponding
+     to the selections of $1$ or $x$ given by the index. Length of $1$ is
+     one, and length of $x$ is |pol.nvars()|. This nvs information is
+     stored in |dumnvs|. The symmetry of this general symmetry dummy tensor
+     |dumgs| is given by a number of ones and x's in the index. We then
+     copy the matrix, if it exists in the polynomial and increase |offset|
+     for the following cycle. */
+
+  CompactPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype> &pol)
+    : _Ttype(pol.nrows(), pol.nvars()+1, pol.getMaxDim())
+  {
+    _Ttype::zeros();
+
+    IntSequence dumnvs(2);
+    dumnvs[0] = 1;
+    dumnvs[1] = pol.nvars();
+
+    int offset = 0;
+    _Ttype dum(0, 2, _Ttype::dimen());
+    for (Tensor::index i = dum.begin(); i != dum.end(); ++i)
+      {
+        int d = i.getCoor().sum();
+        Symmetry symrun(_Ttype::dimen()-d, d);
+        _TGStype dumgs(0, TensorDimens(symrun, dumnvs));
+        if (pol.check(Symmetry(d)))
+          {
+            TwoDMatrix subt(*this, offset, dumgs.ncols());
+            subt.add(1.0, *(pol.get(Symmetry(d))));
+          }
+        offset += dumgs.ncols();
+      }
+  }
+
+  /* We create |x1| to be a concatenation of $1$ and $x$, and then create
+     |PowerProvider| to make a corresponding power |xpow| of |x1|, and
+     finally multiply this matrix with the power. */
+
+  void
+  eval(Vector &out, const ConstVector &v) const
+  {
+    TL_RAISE_IF(v.length()+1 != _Ttype::nvar(),
+                "Wrong input vector length in CompactPolynomial::eval");
+    TL_RAISE_IF(out.length() != _Ttype::nrows(),
+                "Wrong output vector length in CompactPolynomial::eval");
+
+    Vector x1(v.length()+1);
+    Vector x1p(x1, 1, v.length());
+    x1p = v;
+    x1[0] = 1.0;
+
+    if (_Ttype::dimen() == 0)
+      out = ConstVector(*this, 0);
+    else
+      {
+        PowerProvider pp(x1);
+        const _Stype &xpow = pp.getNext((const _Stype *) NULL);
+        for (int i = 1; i < _Ttype::dimen(); i++)
+          xpow = pp.getNext((const _Stype *) NULL);
+        multVec(0.0, out, 1.0, xpow);
+      }
+  }
+};
+
+/* Specialization of the |CompactPolynomial| for unfolded tensor. */
+class UCompactPolynomial : public CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>
+{
+public:
+  UCompactPolynomial(const UTensorPolynomial &upol)
+    : CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>(upol)
+  {
+  }
+};
+
+/* Specialization of the |CompactPolynomial| for folded tensor. */
+class FCompactPolynomial : public CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>
+{
+public:
+  FCompactPolynomial(const FTensorPolynomial &fpol)
+    : CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fpol)
+  {
+  }
+};
--- a/dynare++/tl/cc/t_polynomial.hweb
+++ b/dynare++/tl/cc/t_polynomial.hweb
@ -1,507 +0,0 @@
-@q $Id: t_polynomial.hweb 2336 2009-01-14 10:37:02Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Tensor polynomial evaluation. Start of {\tt t\_polynomial.h} file.
-
-We need to evaluate a tensor polynomial of the form:
-$$
-\left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
-\left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
-\ldots+
-\left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
-$$
-where $x$ is a column vector.
-
-We have basically two options. The first is to use the formula above,
-the second is to use a Horner-like formula:
-$$
-\left[\cdots\left[\left[\left[g_{x^{n-1}}\right]+
-\left[g_{x^n}\right]_{\alpha_1\ldots\alpha_{n-1}\alpha_n}
-[x]^{\alpha_n}\right]_{\alpha_1\ldots\alpha_{n-2}\alpha_{n-1}}
-[x]^{\alpha_{n-1}}\right]\cdots\right]_{\alpha_1}
-[x]^{\alpha_1}
-$$
-
-Alternativelly, we can put the the polynomial into a more compact form
-$$\left[g_{x}\right]_{\alpha_1}[x]^{\alpha_1}+
-\left[g_{x^2}\right]_{\alpha_1\alpha_2}[x]^{\alpha_1}[x]^{\alpha_2}+
-\ldots+
-\left[g_{x^n}\right]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n[x]^{\alpha_i}
-= [G]_{\alpha_1\ldots\alpha_n}\prod_{i=1}^n\left[\matrix{1\cr x}\right]^{\alpha_i}
-$$
-Then the polynomial evaluation becomes just a matrix multiplication of the vector power.
-
-Here we define the tensor polynomial as a container of full symmetry
-tensors and add an evaluation methods. We have two sorts of
-containers, folded and unfolded. For each type we declare two methods
-implementing the above formulas. We define classes for the
-compactification of the polynomial. The class derives from the tensor
-and has a eval method.
-
-
-@s PowerProvider int
-@s TensorPolynomial int
-@s UTensorPolynomial int
-@s FTensorPolynomial int
-@s CompactPolynomial int
-@s UCompactPolynomial int
-@s FCompactPolynomial int
-
-@c
-#include "t_container.h"
-#include "fs_tensor.h"
-#include "rfs_tensor.h"
-#include"tl_static.h"
-
-@<|PowerProvider| class declaration@>;
-@<|TensorPolynomial| class declaration@>;
-@<|UTensorPolynomial| class declaration@>;
-@<|FTensorPolynomial| class declaration@>;
-@<|CompactPolynomial| class declaration@>;
-@<|UCompactPolynomial| class declaration@>;
-@<|FCompactPolynomial| class declaration@>;
-
-@ Just to make the code nicer, we implement a Kronecker power of a
-vector encapsulated in the following class. It has |getNext| method
-which returns either folded or unfolded row-oriented single column
-Kronecker power of the vector according to the type of a dummy
-argument. This allows us to use the type dependent code in templates
-below.
-
-The implementation of the Kronecker power is that we maintain the last
-unfolded power. If unfolded |getNext| is called, we Kronecker multiply
-the last power with a vector and return it. If folded |getNext| is
-called, we do the same plus we fold it.
-
-|getNext| returns the vector for the first call (first power), the
- second power is returned on the second call, and so on.
-
-@<|PowerProvider| class declaration@>=
-class PowerProvider {
-	Vector origv;
-	URSingleTensor* ut;
-	FRSingleTensor* ft;
-	int nv;
-public:@;
-	PowerProvider(const ConstVector& v)
-		: origv(v), ut(NULL), ft(NULL), nv(v.length())@+ {}
-	~PowerProvider();
-	const URSingleTensor& getNext(const URSingleTensor* dummy);
-	const FRSingleTensor& getNext(const FRSingleTensor* dummy);
-};
-
-@ The tensor polynomial is basically a tensor container which is more
-strict on insertions. It maintains number of rows and number of
-variables and allows insertions only of those tensors, which yield
-these properties. The maximum dimension is maintained by |insert|
-method.
-
-So we re-implement |insert| method and implement |evalTrad|
-(traditional polynomial evaluation) and horner-like evaluation
-|evalHorner|.
-
-In addition, we implement derivatives of the polynomial and its
-evaluation. The evaluation of a derivative is different from the
-evaluation of the whole polynomial, simply because the evaluation of
-the derivatives is a tensor, and the evaluation of the polynomial is a
-vector (zero dimensional tensor). See documentation to
-|@<|TensorPolynomial::derivative| code@>| and
-|@<|TensorPolynomial::evalPartially| code@>| for details.
-
-@s _Stype int
-@s _TGStype int
-
-@<|TensorPolynomial| class declaration@>=
-template <class _Ttype, class _TGStype, class _Stype>@;
-class TensorPolynomial : public TensorContainer<_Ttype> {
-	int nr;
-	int nv;
-	int maxdim;
-	typedef TensorContainer<_Ttype> _Tparent;
-	typedef typename _Tparent::_ptr _ptr;
-public:@;
-	TensorPolynomial(int rows, int vars)
-		: TensorContainer<_Ttype>(1),
-		  nr(rows), nv(vars), maxdim(0) {}
-	TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& tp, int k)
-		: TensorContainer<_Ttype>(tp),
-		  nr(tp.nr), nv(tp.nv), maxdim(0) {@+ derivative(k);@+}
-	TensorPolynomial(int first_row, int num, TensorPolynomial<_Ttype, _TGStype, _Stype>& tp)
-		: TensorContainer<_Ttype>(first_row, num, tp),
-		  nr(num), nv(tp.nv), maxdim(tp.maxdim)@+ {}
-	@<|TensorPolynomial| contract constructor code@>;
-	TensorPolynomial(const TensorPolynomial& tp)
-		: TensorContainer<_Ttype>(tp), nr(tp.nr), nv(tp.nv), maxdim(tp.maxdim)@+ {}
-	int nrows() const
-		{@+ return nr;@+}
-	int nvars() const
-		{@+ return nv;@+}
-	@<|TensorPolynomial::evalTrad| code@>;
-	@<|TensorPolynomial::evalHorner| code@>;
-	@<|TensorPolynomial::insert| code@>;
-	@<|TensorPolynomial::derivative| code@>;
-	@<|TensorPolynomial::evalPartially| code@>;
-};
-
-
-@ This constructor takes a tensor polynomial 
-$$P(x,y)=\sum^m_{k=0}[g_{(xy)^k}]_{\alpha_1\ldots\alpha_k}
-\left[\matrix{x\cr y}\right]^{\alpha_1\ldots\alpha_k}$$
-and for a given $x$ it makes a polynomial
-$$Q(y)=P(x,y).$$
-
-The algorithm for each full symmetry $(xy)^k$ works with subtensors (slices) of
-symmetry $x^iy^j$ (with $i+j=k$), and contracts these subtensors with respect to
-$x^i$ to obtain a tensor of full symmetry $y^j$. Since the column
-$x^i$ is calculated by |PowerProvider| we cycle for $i=1,...,m$. Then
-we have to add everything for $i=0$.
-
-The code works as follows: For slicing purposes we need stack sizes
-|ss| corresponing to lengths of $x$ and $y$, and then identity |pp|
-for unfolding a symmetry of the slice to obtain stack coordinates of
-the slice. Then we do the calculations for $i=1,\ldots,m$ and then for
-$i=0$.
-
-@<|TensorPolynomial| contract constructor code@>=
-TensorPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& tp, const Vector& xval)
-	: TensorContainer<_Ttype>(1),
-	  nr(tp.nrows()), nv(tp.nvars() - xval.length()), maxdim(0)
-{
-	TL_RAISE_IF(nvars() < 0,
-				"Length of xval too big in TensorPolynomial contract constructor");
-	IntSequence ss(2);@+ ss[0] = xval.length();@+ ss[1] = nvars();
-	IntSequence pp(2);@+ pp[0] = 0;@+ pp[1] = 1;
-
-	@<do contraction for all $i>0$@>;
-	@<do contraction for $i=0$@>;
-}
-
-@ Here we setup the |PowerProvider|, and cycle through
-$i=1,\ldots,m$. Within the loop we cycle through $j=0,\ldots,m-i$. If
-there is a tensor with symmetry $(xy)^{i+j}$ in the original
-polynomial, we make its slice with symmetry $x^iy^j$, and
-|contractAndAdd| it to the tensor |ten| in the |this| polynomial with
-a symmetry $y^j$.
-
-Note three things: First, the tensor |ten| is either created and put
-to |this| container or just got from the container, this is done in
-|@<initialize |ten| of dimension |j|@>|. Second, the contribution to
-the |ten| tensor must be multiplied by $\left(\matrix{i+j\cr
-j}\right)$, since there are exactly that number of slices of
-$(xy)^{i+j}$ of the symmetry $x^iy^j$ and all must be added. Third,
-the tensor |ten| is fully symmetric and |_TGStype::contractAndAdd|
-works with general symmetry, that is why we have to in-place convert
-fully syummetric |ten| to a general symmetry tensor.
-
-@<do contraction for all $i>0$@>=
-	PowerProvider pwp(xval);
-	for (int i = 1; i <= tp.maxdim; i++) {
-		const _Stype& xpow = pwp.getNext((const _Stype*)NULL);
-		for (int j = 0; j <= tp.maxdim-i; j++) {
-			if (tp.check(Symmetry(i+j))) {
-				@<initialize |ten| of dimension |j|@>;
-				Symmetry sym(i,j);
-				IntSequence coor(sym, pp);
-				_TGStype slice(*(tp.get(Symmetry(i+j))), ss, coor, TensorDimens(sym, ss));
-				slice.mult(Tensor::noverk(i+j, j));
-				_TGStype tmp(*ten);
-				slice.contractAndAdd(0, tmp, xpow);
-			}
-		}
-	}
-
-@ This is easy. The code is equivalent to code |@<do contraction for
-all $i>0$@>| as for $i=0$. The contraction here takes a form of a
-simple addition.
-
-@<do contraction for $i=0$@>=
-	for (int j = 0; j <= tp.maxdim; j++) {
-		if (tp.check(Symmetry(j))) {
-			@<initialize |ten| of dimension |j|@>;
-			Symmetry sym(0, j);
-			IntSequence coor(sym, pp);
-			_TGStype slice(*(tp.get(Symmetry(j))), ss, coor, TensorDimens(sym, ss));
-			ten->add(1.0, slice);
-		}
-	}
-
-
-@ The pointer |ten| is either a new tensor or got from |this| container.
-@<initialize |ten| of dimension |j|@>=
-	_Ttype* ten;
-	if (_Tparent::check(Symmetry(j))) {
-		ten = _Tparent::get(Symmetry(j));
-	} else {
-		ten = new _Ttype(nrows(), nvars(), j);
-		ten->zeros();
-		insert(ten);
-	}
-
-
-@ Here we cycle up to the maximum dimension, and if a tensor exists in
-the container, then we multiply it with the Kronecker power of the
-vector supplied by |PowerProvider|.
-
-@<|TensorPolynomial::evalTrad| code@>=
-void evalTrad(Vector& out, const ConstVector& v) const
-{
-	if (_Tparent::check(Symmetry(0)))
-		out = _Tparent::get(Symmetry(0))->getData();
-	else
-		out.zeros();
-
-	PowerProvider pp(v);
-	for (int d = 1; d <= maxdim; d++) {
-		const _Stype& p = pp.getNext((const _Stype*)NULL);
-		Symmetry cs(d);
-		if (_Tparent::check(cs)) {
-			const _Ttype* t = _Tparent::get(cs);
-			t->multaVec(out, p.getData());
-		}
-	}
-}
-
-@ Here we construct by contraction |maxdim-1| tensor first, and then
-cycle. The code is clear, the only messy thing is |new| and |delete|.
-
-@<|TensorPolynomial::evalHorner| code@>=
-void evalHorner(Vector& out, const ConstVector& v) const
-{
-	if (_Tparent::check(Symmetry(0)))
-		out = _Tparent::get(Symmetry(0))->getData();
-	else
-		out.zeros();
-
-	if (maxdim == 0)
-		return;
-
-	_Ttype* last;
-	if (maxdim == 1)
-		last = new _Ttype(*(_Tparent::get(Symmetry(1))));
-	else 
-		last = new _Ttype(*(_Tparent::get(Symmetry(maxdim))), v);
-	for (int d = maxdim-1; d >=1; d--) {
-		Symmetry cs(d);
-		if (_Tparent::check(cs)) {
-			const _Ttype* nt = _Tparent::get(cs);
-			last->add(1.0, ConstTwoDMatrix(*nt));
-		}
-		if (d > 1) {
-			_Ttype* new_last = new _Ttype(*last, v);
-			delete last;
-			last = new_last;
-		}
-	}
-	last->multaVec(out, v);
-	delete last;
-}
-
-@ Before a tensor is inserted, we check for the number of rows, and
-number of variables. Then we insert and update the |maxdim|.
-
-@<|TensorPolynomial::insert| code@>=
-void insert(_ptr t)
-{
-	TL_RAISE_IF(t->nrows() != nr,
-				"Wrong number of rows in TensorPolynomial::insert");
-	TL_RAISE_IF(t->nvar() != nv,
-				"Wrong number of variables in TensorPolynomial::insert");
-	TensorContainer<_Ttype>::insert(t);
-	if (maxdim < t->dimen())
-		maxdim = t->dimen();
-}
-
-@ The polynomial takes the form
-$$\sum_{i=0}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
-\left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_i},$$ where
-$\left[g_{y^i}\right]$ are $i$-order derivatives of the polynomial. We
-assume that ${1\over i!}\left[g_{y^i}\right]$ are items in the tensor
-container.  This method differentiates the polynomial by one order to
-yield:
-$$\sum_{i=1}^n{1\over i!}\left[i\cdot g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
-\left[y\right]^{\alpha_1}\ldots\left[y\right]^{\alpha_{i-1}},$$
-where $\left[i\cdot{1\over i!}\cdot g_{y^i}\right]$ are put to the container.
-
-A polynomial can be derivative of some order, and the order cannot be
-recognized from the object. That is why we need to input the order.
-
-@<|TensorPolynomial::derivative| code@>=
-void derivative(int k)
-{
-	for (int d = 1; d <= maxdim; d++) {
-		if (_Tparent::check(Symmetry(d))) {
-			_Ttype* ten = _Tparent::get(Symmetry(d));
-			ten->mult((double) max((d-k), 0));
-		}
-	}
-}
-
-@ Now let us suppose that we have an |s| order derivative of a
-polynomial whose $i$ order derivatives are $\left[g_{y^i}\right]$, so
-we have
-$$\sum_{i=s}^n{1\over i!}\left[g_{y^i}\right]_{\alpha_1\ldots\alpha_i}
-\prod_{k=1}^{i-s}\left[y\right]^{\alpha_k},$$
-where ${1\over i!}\left[g_{y^i}\right]$ are tensors in the container.
-
-This methods performs this evaluation. The result is an |s| dimensional
-tensor. Note that when combined with the method |derivative|, they
-evaluate a derivative of some order. For example a sequence of calls
-|g.derivative(0)|, |g.derivative(1)| and |der=g.evalPartially(2, v)|
-calculates $2!$ multiple of the second derivative of |g| at |v|.
-
-@<|TensorPolynomial::evalPartially| code@>=
-_Ttype* evalPartially(int s, const ConstVector& v)
-{
-	TL_RAISE_IF(v.length() != nvars(),
-				"Wrong length of vector for TensorPolynomial::evalPartially");
-
-	_Ttype* res = new _Ttype(nrows(), nvars(), s);
-	res->zeros();
-
-	if (_Tparent::check(Symmetry(s)))
-		res->add(1.0, *(_Tparent::get(Symmetry(s))));
-
-	for (int d = s+1; d <= maxdim; d++) {
-		if (_Tparent::check(Symmetry(d))) {
-			const _Ttype& ltmp = *(_Tparent::get(Symmetry(d)));
-			_Ttype* last = new _Ttype(ltmp);
-			for (int j = 0; j < d - s; j++) {
-				_Ttype* newlast = new _Ttype(*last, v);
-				delete last;
-				last = newlast;
-			}
-			res->add(1.0, *last);
-			delete last;
-		}
-	}
-
-	return res;
-}
-
-@ This just gives a name to unfolded tensor polynomial.
-@<|UTensorPolynomial| class declaration@>=
-class FTensorPolynomial;
-class UTensorPolynomial : public TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor> {
-public:@;
-	UTensorPolynomial(int rows, int vars)
-		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(rows, vars)@+ {}
-	UTensorPolynomial(const UTensorPolynomial& up, int k)
-		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(up, k)@+ {}
-	UTensorPolynomial(const FTensorPolynomial& fp);
-	UTensorPolynomial(const UTensorPolynomial& tp, const Vector& xval)
-		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(tp, xval)@+ {}
-	UTensorPolynomial(int first_row, int num, UTensorPolynomial& tp)
-		: TensorPolynomial<UFSTensor, UGSTensor, URSingleTensor>(first_row, num, tp)@+ {}
-};
-
-@ This just gives a name to folded tensor polynomial.
-@<|FTensorPolynomial| class declaration@>=
-class FTensorPolynomial : public TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor> {
-public:@;
-	FTensorPolynomial(int rows, int vars)
-		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(rows, vars)@+ {}
-	FTensorPolynomial(const FTensorPolynomial& fp, int k)
-		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fp, k)@+ {}
-	FTensorPolynomial(const UTensorPolynomial& up);
-	FTensorPolynomial(const FTensorPolynomial& tp, const Vector& xval)
-		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(tp, xval)@+ {}
-	FTensorPolynomial(int first_row, int num, FTensorPolynomial& tp)
-		: TensorPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(first_row, num, tp)@+ {}
-};
-
-@ The compact form of |TensorPolynomial| is in fact a full symmetry
-tensor, with the number of variables equal to the number of variables
-of the polynomial plus 1 for $1$.
-
-@<|CompactPolynomial| class declaration@>=
-template <class _Ttype, class _TGStype, class _Stype>@;
-class CompactPolynomial : public _Ttype {
-public:@;
-	@<|CompactPolynomial| constructor code@>;
-	@<|CompactPolynomial::eval| method code@>;
-};
-
-@ This constructor copies matrices from the given tensor polynomial to
-the appropriate location in this matrix. It creates a dummy tensor
-|dum| with two variables (one corresponds to $1$, the other to
-$x$). The index goes through this dummy tensor and the number of
-columns of the folded/unfolded general symmetry tensor corresponding
-to the selections of $1$ or $x$ given by the index. Length of $1$ is
-one, and length of $x$ is |pol.nvars()|. This nvs information is
-stored in |dumnvs|. The symmetry of this general symmetry dummy tensor
-|dumgs| is given by a number of ones and x's in the index. We then
-copy the matrix, if it exists in the polynomial and increase |offset|
-for the following cycle.
-
-@<|CompactPolynomial| constructor code@>=
-CompactPolynomial(const TensorPolynomial<_Ttype, _TGStype, _Stype>& pol)
-	: _Ttype(pol.nrows(), pol.nvars()+1, pol.getMaxDim())
-{
-	_Ttype::zeros();
-
-	IntSequence dumnvs(2);
-	dumnvs[0] = 1;
-	dumnvs[1] = pol.nvars();
-
-	int offset = 0;
-	_Ttype dum(0, 2, _Ttype::dimen());
-	for (Tensor::index i = dum.begin(); i != dum.end(); ++i) {
-		int d = i.getCoor().sum();
-		Symmetry symrun(_Ttype::dimen()-d, d);
-		_TGStype dumgs(0, TensorDimens(symrun, dumnvs));
-		if (pol.check(Symmetry(d))) {
-			TwoDMatrix subt(*this, offset, dumgs.ncols());
-			subt.add(1.0, *(pol.get(Symmetry(d))));	
-		}
-		offset += dumgs.ncols();
-	}
-}
-
-
-@ We create |x1| to be a concatenation of $1$ and $x$, and then create
-|PowerProvider| to make a corresponding power |xpow| of |x1|, and
-finally multiply this matrix with the power.
-
-@<|CompactPolynomial::eval| method code@>=
-void eval(Vector& out, const ConstVector& v) const
-{
-	TL_RAISE_IF(v.length()+1 != _Ttype::nvar(),
-				"Wrong input vector length in CompactPolynomial::eval");
-	TL_RAISE_IF(out.length() != _Ttype::nrows(),
-				"Wrong output vector length in CompactPolynomial::eval");
-
-	Vector x1(v.length()+1);
-	Vector x1p(x1, 1, v.length());
-	x1p = v;
-	x1[0] = 1.0;
-
-	if (_Ttype::dimen() == 0)
-		out = ConstVector(*this, 0);
-	else {
-		PowerProvider pp(x1);
-		const _Stype& xpow = pp.getNext((const _Stype*)NULL);
-		for (int i = 1; i < _Ttype::dimen(); i++)
-			xpow = pp.getNext((const _Stype*)NULL);
-		multVec(0.0, out, 1.0, xpow);
-	}
-}
-
-@ Specialization of the |CompactPolynomial| for unfolded tensor.
-@<|UCompactPolynomial| class declaration@>=
-class UCompactPolynomial : public CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor> {
-public:@;
-	UCompactPolynomial(const UTensorPolynomial& upol)
-		: CompactPolynomial<UFSTensor, UGSTensor, URSingleTensor>(upol)@+ {}
-};
-
-@ Specialization of the |CompactPolynomial| for folded tensor.
-@<|FCompactPolynomial| class declaration@>=
-class FCompactPolynomial : public CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor> {
-public:@;
-	FCompactPolynomial(const FTensorPolynomial& fpol)
-		: CompactPolynomial<FFSTensor, FGSTensor, FRSingleTensor>(fpol)@+ {}
-};
-
-
-
-@ End of {\tt t\_polynomial.h} file.
--- a/dynare++/tl/cc/tensor.cc
+++ b/dynare++/tl/cc/tensor.cc
@ -0,0 +1,222 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "tensor.hh"
+#include "tl_exception.hh"
+#include "tl_static.hh"
+
+// |Tensor| static methods
+/* Here we implement calculation of $\pmatrix{n\cr k}$ where $n-k$ is
+   usually bigger than $k$.
+
+   Also we implement $a^b$. */
+
+int
+Tensor::noverk(int n, int k)
+{
+  return tls.ptriang->noverk(n, k);
+}
+
+int
+Tensor::power(int a, int b)
+{
+  int res = 1;
+  for (int i = 0; i < b; i++)
+    res *= a;
+  return res;
+}
+
+// |Tensor::noverseq_ip| static method
+/* Here we calculate a generalized combination number
+   $\left(\matrix{a\cr b_1,\ldots,b_n}\right)$, where $a=b_1+\ldots+
+   b_n$. We use the identity
+   $$\left(\matrix{a\cr b_1,\ldots,b_n}\right)=\left(\matrix{b_1+b_2\cr b_1}\right)\cdot
+   \left(\matrix{a\cr b_1+b_2,b_3,\ldots,b_n}\right)$$
+
+   This number is exactly a number of unfolded indices corresponding to
+   one folded index, where the sequence $b_1,\ldots,b_n$ is the symmetry
+   of the index. */
+
+int
+Tensor::noverseq_ip(IntSequence &s)
+{
+  if (s.size() == 0 || s.size() == 1)
+    return 1;
+  s[1] += s[0];
+  return noverk(s[1], s[0]) * noverseq(IntSequence(s, 1, s.size()));
+}
+
+/* Here we increment a given sequence within full symmetry given by
+   |nv|, which is number of variables in each dimension. The underlying
+   tensor is unfolded, so we increase the rightmost by one, and if it is
+   |nv| we zero it and increase the next one to the left. */
+
+void
+UTensor::increment(IntSequence &v, int nv)
+{
+  if (v.size() == 0)
+    return;
+  int i = v.size()-1;
+  v[i]++;
+  while (i > 0 && v[i] == nv)
+    {
+      v[i] = 0;
+      v[--i]++;
+    }
+}
+
+/* This is dual to |UTensor::increment(IntSequence& v, int nv)|. */
+
+void
+UTensor::decrement(IntSequence &v, int nv)
+{
+  if (v.size() == 0)
+    return;
+  int i = v.size()-1;
+  v[i]--;
+  while (i > 0 && v[i] == -1)
+    {
+      v[i] = nv -1;
+      v[--i]--;
+    }
+}
+
+/* Here we increment index for general symmetry for unfolded
+   storage. The sequence |nvmx| assigns for each coordinate a number of
+   variables. Since the storage is unfolded, we do not need information
+   about what variables are symmetric, everything necessary is given by
+   |nvmx|. */
+
+void
+UTensor::increment(IntSequence &v, const IntSequence &nvmx)
+{
+  if (v.size() == 0)
+    return;
+  int i = v.size()-1;
+  v[i]++;
+  while (i > 0 && v[i] == nvmx[i])
+    {
+      v[i] = 0;
+      v[--i]++;
+    }
+}
+
+/* This is a dual code to |UTensor::increment(IntSequence& v, const
+   IntSequence& nvmx)|. */
+
+void
+UTensor::decrement(IntSequence &v, const IntSequence &nvmx)
+{
+  if (v.size() == 0)
+    return;
+  int i = v.size()-1;
+  v[i]--;
+  while (i > 0 && v[i] == -1)
+    {
+      v[i] = nvmx[i] -1;
+      v[--i]--;
+    }
+}
+
+/* Here we return an offset for a given coordinates of unfolded full
+   symmetry tensor. This is easy. */
+
+int
+UTensor::getOffset(const IntSequence &v, int nv)
+{
+  int pow = 1;
+  int res = 0;
+  for (int i = v.size()-1; i >= 0; i--)
+    {
+      res += v[i]*pow;
+      pow *= nv;
+    }
+  return res;
+}
+
+/* Also easy. */
+
+int
+UTensor::getOffset(const IntSequence &v, const IntSequence &nvmx)
+{
+  int pow = 1;
+  int res = 0;
+  for (int i = v.size()-1; i >= 0; i--)
+    {
+      res += v[i]*pow;
+      pow *= nvmx[i];
+    }
+  return res;
+}
+
+/* Decrementing of coordinates of folded index is not that easy. Note
+   that if a trailing part of coordinates is $(b, a, a, a)$ (for
+   instance) with $b<a$, then a preceding coordinates are $(b, a-1, n-1,
+   n-1)$, where $n$ is a number of variables |nv|. So we find the left
+   most element which is equal to the last element, decrease it by one,
+   and then set all elements to the right to $n-1$. */
+
+void
+FTensor::decrement(IntSequence &v, int nv)
+{
+  int i = v.size()-1;
+  while (i > 0 && v[i-1] == v[i])
+    i--;
+  v[i]--;
+  for (int j = i+1; j < v.size(); j++)
+    v[j] = nv-1;
+}
+
+/* This calculates order of the given index of our ordering of
+   indices. In order to understand how it works, let us take number of
+   variables $n$ and dimension $k$, and write down all the possible
+   combinations of indices in our ordering. For example for $n=4$ and
+   $k=3$, the sequence looks as:
+
+   \def\tr#1#2#3{\hbox{\rlap{#1}\hskip 0.7em\rlap{#2}\hskip 0.7em\rlap{#3}\hskip 0.7em}}
+   \halign{\tabskip=3em \hskip2cm #&#&#&#\cr
+   \tr 000 &\tr 111 &\tr 222 &\tr 333\cr
+   \tr 001 &\tr 112 &\tr 223 \cr
+   \tr 002 &\tr 113 &\tr 233 \cr
+   \tr 003 &\tr 122 \cr
+   \tr 011 &\tr 123\cr
+   \tr 012 &\tr 133\cr
+   \tr 013\cr
+   \tr 022\cr
+   \tr 023\cr
+   \tr 033\cr
+   }
+
+   Now observe, that a number of sequences starting with zero is the same
+   as total number of sequences with the same number of variables but
+   with dimension minus one. More generally, if $S_{n,k}$ denotes number
+   of indices of $n$ variables and dimension $k$, then the number of
+   indices beginning with $m$ is exactly $S_{n-m,k-1}$. This is because $m$
+   can be subtracted from all items, and we obtain sequence of indices of
+   $n-m$ variables. So we have formula:
+   $$S_{n,k}=S_{n,k-1}+S_{n-1,k-1}+\ldots+S_{1,k-1}$$
+
+   Now it is easy to calculate offset of index of the form
+   $(m,\ldots,m)$. It is a sum of all above it, this is
+   $S_{n,k-1}+\ldots+S_{n-m,k-1}$. We know that $S_{n,k}=\pmatrix{n+k-1\cr
+   k}$. Using above formula, we can calculate offset of $(m,\ldots,m)$ as
+   $$\pmatrix{n+k-1\cr k}-\pmatrix{n-m+k-1\cr k}$$
+
+   The offset of general index $(m_1,m_2,\ldots,m_k)$ is calculated
+   recursively, since it is offset of $(m_1,\ldots,m_1)$ for $n$
+   variables plus offset of $(m_2-m_1,m_3-m_1,\ldots,m_k-m_1)$ for
+   $n-m_1$ variables. */
+
+int
+FTensor::getOffsetRecurse(IntSequence &v, int nv)
+{
+  if (v.size() == 0)
+    return 0;
+  int prefix = v.getPrefixLength();
+  int m = v[0];
+  int k = v.size();
+  int s1 = noverk(nv+k-1, k) - noverk(nv-m+k-1, k);
+  IntSequence subv(v, prefix, k);
+  subv.add(-m);
+  int s2 = getOffsetRecurse(subv, nv-m);
+  return s1+s2;
+}
--- a/dynare++/tl/cc/tensor.cweb
+++ b/dynare++/tl/cc/tensor.cweb
@ -1,229 +0,0 @@
-@q $Id: tensor.cweb 429 2005-08-16 15:20:09Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@ Start of {\tt tensor.cpp} file.
-
-@c
-#include "tensor.h"
-#include "tl_exception.h"
-#include "tl_static.h"
-
-@<|Tensor| static methods@>;
-@<|Tensor::noverseq_ip| static method@>;
-@<|UTensor::increment| code 1@>;
-@<|UTensor::decrement| code 1@>;
-@<|UTensor::increment| code 2@>;
-@<|UTensor::decrement| code 2@>;
-@<|UTensor::getOffset| code 1@>;
-@<|UTensor::getOffset| code 2@>;
-@<|FTensor::decrement| code@>;
-@<|FTensor::getOffsetRecurse| code@>;
-
-@ Here we implement calculation of $\pmatrix{n\cr k}$ where $n-k$ is
-usually bigger than $k$.
-
-Also we implement $a^b$.
-
-@<|Tensor| static methods@>=
-int Tensor::noverk(int n, int k)
-{
-	return tls.ptriang->noverk(n,k);
-}
-@#
-int Tensor::power(int a, int b)
-{
-	int res = 1;
-	for (int i = 0; i < b; i++)
-		res *= a;
-	return res;
-}
-
-@ Here we calculate a generalized combination number
-$\left(\matrix{a\cr b_1,\ldots,b_n}\right)$, where $a=b_1+\ldots+
-b_n$. We use the identity
-$$\left(\matrix{a\cr b_1,\ldots,b_n}\right)=\left(\matrix{b_1+b_2\cr b_1}\right)\cdot
-\left(\matrix{a\cr b_1+b_2,b_3,\ldots,b_n}\right)$$
-
-This number is exactly a number of unfolded indices corresponding to
-one folded index, where the sequence $b_1,\ldots,b_n$ is the symmetry
-of the index.
-
-@<|Tensor::noverseq_ip| static method@>=
-int Tensor::noverseq_ip(IntSequence& s)
-{
-	if (s.size() == 0 || s.size() == 1)
-		return 1;
-	s[1] += s[0];
-	return noverk(s[1],s[0]) * noverseq(IntSequence(s, 1, s.size()));
-}
-
-@ Here we increment a given sequence within full symmetry given by
-|nv|, which is number of variables in each dimension. The underlying
-tensor is unfolded, so we increase the rightmost by one, and if it is
-|nv| we zero it and increase the next one to the left.
-
-@<|UTensor::increment| code 1@>=
-void UTensor::increment(IntSequence& v, int nv)
-{
-	if (v.size() == 0)
-		return;
-	int i = v.size()-1;
-	v[i]++;
-	while (i > 0 && v[i] == nv) {
-		v[i] = 0;
-		v[--i]++;
-	}
-}
-
-@ This is dual to |UTensor::increment(IntSequence& v, int nv)|.
-
-@<|UTensor::decrement| code 1@>=
-void UTensor::decrement(IntSequence& v, int nv)
-{
-	if (v.size() == 0)
-		return;
-	int i = v.size()-1;
-	v[i]--;
-	while (i > 0 && v[i] == -1) {
-		v[i] = nv -1;
-		v[--i]--;
-	}
-}
-
-@ Here we increment index for general symmetry for unfolded
-storage. The sequence |nvmx| assigns for each coordinate a number of
-variables. Since the storage is unfolded, we do not need information
-about what variables are symmetric, everything necessary is given by
-|nvmx|.
-
-@<|UTensor::increment| code 2@>=
-void UTensor::increment(IntSequence& v, const IntSequence& nvmx)
-{
-	if (v.size() == 0)
-		return;
-	int i = v.size()-1;
-	v[i]++;
-	while (i > 0 && v[i] == nvmx[i]) {
-		v[i] = 0;
-		v[--i]++;
-	}
-}
-
-@ This is a dual code to |UTensor::increment(IntSequence& v, const
-IntSequence& nvmx)|.
-
-@<|UTensor::decrement| code 2@>=
-void UTensor::decrement(IntSequence& v, const IntSequence& nvmx)
-{
-	if (v.size() == 0)
-		return;
-	int i = v.size()-1;
-	v[i]--;
-	while (i > 0 && v[i] == -1) {
-		v[i] = nvmx[i] -1;
-		v[--i]--;
-	}
-}
-
-@ Here we return an offset for a given coordinates of unfolded full
-symmetry tensor. This is easy.
-
-@<|UTensor::getOffset| code 1@>=
-int UTensor::getOffset(const IntSequence& v, int nv)
-{
-	int pow = 1;
-	int res = 0;
-	for (int i = v.size()-1; i >= 0; i--) {
-		res += v[i]*pow;
-		pow *= nv;
-	}
-	return res;
-}
-
-@ Also easy.
-@<|UTensor::getOffset| code 2@>=
-int UTensor::getOffset(const IntSequence& v, const IntSequence& nvmx)
-{
-	int pow = 1;
-	int res = 0;
-	for (int i = v.size()-1; i >= 0; i--) {
-		res += v[i]*pow;
-		pow *= nvmx[i];
-	}
-	return res;
-}
-
- 
-@ Decrementing of coordinates of folded index is not that easy. Note
-that if a trailing part of coordinates is $(b, a, a, a)$ (for
-instance) with $b<a$, then a preceding coordinates are $(b, a-1, n-1,
-n-1)$, where $n$ is a number of variables |nv|. So we find the left
-most element which is equal to the last element, decrease it by one,
-and then set all elements to the right to $n-1$.
-
-@<|FTensor::decrement| code@>=
-void FTensor::decrement(IntSequence& v, int nv)
-{
-	int i = v.size()-1;
-	while (i > 0 && v[i-1]==v[i])
-		i--;
-	v[i]--;
-	for (int j = i+1; j < v.size(); j++)
-		v[j] = nv-1;
-}
-
-@ This calculates order of the given index of our ordering of
-indices. In order to understand how it works, let us take number of
-variables $n$ and dimension $k$, and write down all the possible
-combinations of indices in our ordering. For example for $n=4$ and
-$k=3$, the sequence looks as:
-
-\def\tr#1#2#3{\hbox{\rlap{#1}\hskip 0.7em\rlap{#2}\hskip 0.7em\rlap{#3}\hskip 0.7em}}
-\halign{\tabskip=3em \hskip2cm #&#&#&#\cr
-\tr 000 &\tr 111 &\tr 222 &\tr 333\cr
-\tr 001 &\tr 112 &\tr 223 \cr
-\tr 002 &\tr 113 &\tr 233 \cr 
-\tr 003 &\tr 122 \cr
-\tr 011 &\tr 123\cr
-\tr 012 &\tr 133\cr
-\tr 013\cr
-\tr 022\cr
-\tr 023\cr
-\tr 033\cr
-}
-
-Now observe, that a number of sequences starting with zero is the same
-as total number of sequences with the same number of variables but
-with dimension minus one. More generally, if $S_{n,k}$ denotes number
-of indices of $n$ variables and dimension $k$, then the number of
-indices beginning with $m$ is exactly $S_{n-m,k-1}$. This is because $m$
-can be subtracted from all items, and we obtain sequence of indices of
-$n-m$ variables. So we have formula:
-$$S_{n,k}=S_{n,k-1}+S_{n-1,k-1}+\ldots+S_{1,k-1}$$
-
-Now it is easy to calculate offset of index of the form
-$(m,\ldots,m)$. It is a sum of all above it, this is
-$S_{n,k-1}+\ldots+S_{n-m,k-1}$. We know that $S_{n,k}=\pmatrix{n+k-1\cr
-k}$. Using above formula, we can calculate offset of $(m,\ldots,m)$ as
-$$\pmatrix{n+k-1\cr k}-\pmatrix{n-m+k-1\cr k}$$
-
-The offset of general index $(m_1,m_2,\ldots,m_k)$ is calculated
-recursively, since it is offset of $(m_1,\ldots,m_1)$ for $n$
-variables plus offset of $(m_2-m_1,m_3-m_1,\ldots,m_k-m_1)$ for
-$n-m_1$ variables.
-
-@<|FTensor::getOffsetRecurse| code@>=
-int FTensor::getOffsetRecurse(IntSequence& v, int nv)
-{
-	if (v.size() == 0) return 0;
-	int prefix = v.getPrefixLength();
-	int m = v[0];
-	int k = v.size();
-	int s1 = noverk(nv+k-1,k) - noverk(nv-m+k-1,k);
-	IntSequence subv(v, prefix, k);
-	subv.add(-m);
-	int s2 = getOffsetRecurse(subv, nv-m);
-	return s1+s2;
-}
-
-@ End of {\tt tensor.cpp} file.
--- a/dynare++/tl/cc/tensor.hh
+++ b/dynare++/tl/cc/tensor.hh
@ -0,0 +1,309 @@
+// Copyright 2004, Ondra Kamenik
+
+// Tensor concept.
+
+/* Here we define a tensor class. Tensor is a mathematical object
+   corresponding to a $(n+1)$-dimensional array. An element of such array
+   is denoted $[B]_{\alpha_1\ldots\alpha_n}^\beta$, where $\beta$ is a
+   special index and $\alpha_1\ldots\alpha_n$ are other indices. The
+   class |Tensor| and its subclasses view such array as a 2D matrix,
+   where $\beta$ corresponds to one dimension, and
+   $\alpha_1\ldots\alpha_2$ unfold to the other dimension. Whether
+   $\beta$ correspond to rows or columns is decided by tensor subclasses,
+   however, most of our tensors will have rows indexed by $\beta$, and
+   $\alpha_1\ldots\alpha_n$ will unfold column-wise.
+
+   There might be some symmetries in the tensor data. For instance, if
+   $\alpha_1$ is interchanged with $\alpha_3$ and the both elements equal
+   for all possible $\alpha_i$, and $\beta$, then there is a symmetry
+   of $\alpha_1$ and $\alpha_3$.
+
+   For any symmetry, there are basically two possible storages of the
+   data. The first is unfolded storage, which stores all elements
+   regardless the symmetry. The other storage type is folded, which
+   stores only elements which do not repeat. We declare abstract classes
+   for unfolded tensor, and folded tensor.
+
+   Also, here we also define a concept of tensor index which is the
+   $n$-tuple $\alpha_1\ldots\alpha_n$. It is an iterator, which iterates
+   in dependence of symmetry and storage of the underlying tensor.
+
+   Although we do not decide about possible symmetries at this point, it
+   is worth noting that we implement two kinds of symmetries. The first
+   one is a full symmetry where all indices are interchangeable. The
+   second one is a generalization of the first. We define tensor of a
+   symmetry, where there are a few groups of indices interchangeable
+   within a group and not across. Moreover, the groups are required to be
+   consequent partitions of the index $n$-tuple. This is, we do not allow
+   $\alpha_1$ be interchangeable with $\alpha_3$ and not with $\alpha_2$
+   at the same time.
+
+   However, some intermediate results are, in fact, tensors of a symmetry
+   not fitting to our concept. We develop the tensor abstraction for it,
+   but these objects are not used very often. They have limited usage
+   due to their specialized constructor. */
+
+#ifndef TENSOR_H
+#define TENSOR_H
+
+#include "int_sequence.hh"
+#include "twod_matrix.hh"
+
+/* The index represents $n$-tuple $\alpha_1\ldots\alpha_n$. Since its
+   movement is dependent on the underlying tensor (with storage and
+   symmetry), we maintain a pointer to that tensor, we maintain the
+   $n$-tuple (or coordinates) as |IntSequence| and also we maintain the
+   offset number (column, or row) of the index in the tensor. The pointer
+   is const, since we do not need to change data through the index.
+
+   Here we require the |tensor| to implement |increment| and |decrement|
+   methods, which calculate following and preceding $n$-tuple. Also, we
+   need to calculate offset number from the given coordinates, so the
+   tensor must implement method |getOffset|. This method is used only in
+   construction of the index from the given coordinates. As the index is
+   created, the offset is automatically incremented, and decremented
+   together with index. The|getOffset| method can be relatively
+   computationally complex. This must be kept in mind.  Also we generally
+   suppose that n-tuple of all zeros is the first offset (first columns
+   or row).
+
+   What follows is a definition of index class, the only
+   interesting point is |operator==| which decides only according to
+   offset, not according to the coordinates. This is useful since there
+   can be more than one of coordinate representations of past-the-end
+   index. */
+
+template<class _Tptr>
+class _index
+{
+  typedef _index<_Tptr> _Self;
+  _Tptr tensor;
+  int offset;
+  IntSequence coor;
+public:
+  _index(_Tptr t, int n)
+    : tensor(t), offset(0), coor(n, 0)
+  {
+  }
+  _index(_Tptr t, const IntSequence &cr, int c)
+    : tensor(t), offset(c), coor(cr)
+  {
+  }
+  _index(_Tptr t, const IntSequence &cr)
+    : tensor(t), offset(tensor->getOffset(cr)), coor(cr)
+  {
+  }
+  _index(const _index &ind)
+    : tensor(ind.tensor), offset(ind.offset), coor(ind.coor)
+  {
+  }
+  const _Self &
+  operator=(const _Self &in)
+  {
+    tensor = in.tensor; offset = in.offset; coor = in.coor;
+    return *this;
+  }
+  _Self &
+  operator++()
+  {
+    tensor->increment(coor); offset++; return *this;
+  }
+  _Self &
+  operator--()
+  {
+    tensor->decrement(coor); offset--; return *this;
+  }
+  int
+  operator*() const
+  {
+    return offset;
+  }
+  bool
+  operator==(const _index &n) const
+  {
+    return offset == n.offset;
+  }
+  bool
+  operator!=(const _index &n) const
+  {
+    return offset != n.offset;
+  }
+  const IntSequence &
+  getCoor() const
+  {
+    return coor;
+  }
+  void
+  print() const
+  {
+    printf("%4d: ", offset);  coor.print();
+  }
+};
+
+/* Here is the |Tensor| class, which is nothing else than a simple subclass
+   of |TwoDMatrix|. The unique semantically new member is |dim| which is tensor
+   dimension (length of $\alpha_1\ldots\alpha_n$). We also declare
+   |increment|, |decrement| and |getOffset| methods as pure virtual.
+
+   We also add members for index begin and index end. This is useful,
+   since |begin| and |end| methods do not return instance but only
+   references, which prevent making additional copy of index (for example
+   in for cycles as |in != end()| which would do a copy of index for each
+   cycle). The index begin |in_beg| is constructed as a sequence of all
+   zeros, and |in_end| is constructed from the sequence |last| passed to
+   the constructor, since it depends on subclasses. Also we have to say,
+   along what coordinate is the multidimensional index. This is used only
+   for initialization of |in_end|.
+
+   Also, we declare static auxiliary functions for $\pmatrix{n\cr k}$
+   which is |noverk| and $a^b$, which is |power|. */
+
+class Tensor : public TwoDMatrix
+{
+public:
+  enum indor {along_row, along_col};
+  typedef _index<const Tensor *> index;
+protected:
+  const index in_beg;
+  const index in_end;
+  int dim;
+public:
+  Tensor(indor io, const IntSequence &last, int r, int c, int d)
+    : TwoDMatrix(r, c),
+      in_beg(this, d),
+      in_end(this, last, (io == along_row) ? r : c),
+      dim(d)
+  {
+  }
+  Tensor(indor io, const IntSequence &first, const IntSequence &last,
+         int r, int c, int d)
+    : TwoDMatrix(r, c),
+      in_beg(this, first, 0),
+      in_end(this, last, (io == along_row) ? r : c),
+      dim(d)
+  {
+  }
+  Tensor(int first_row, int num, Tensor &t)
+    : TwoDMatrix(first_row, num, t),
+      in_beg(t.in_beg),
+      in_end(t.in_end),
+      dim(t.dim)
+  {
+  }
+  Tensor(const Tensor &t)
+    : TwoDMatrix(t),
+      in_beg(this, t.in_beg.getCoor(), *(t.in_beg)),
+      in_end(this, t.in_end.getCoor(), *(t.in_end)),
+      dim(t.dim)
+  {
+  }
+  virtual ~Tensor()
+  {
+  }
+  virtual void increment(IntSequence &v) const = 0;
+  virtual void decrement(IntSequence &v) const = 0;
+  virtual int getOffset(const IntSequence &v) const = 0;
+  int
+  dimen() const
+  {
+    return dim;
+  }
+
+  const index &
+  begin() const
+  {
+    return in_beg;
+  }
+  const index &
+  end() const
+  {
+    return in_end;
+  }
+
+  static int noverk(int n, int k);
+  static int power(int a, int b);
+  static int
+  noverseq(const IntSequence &s)
+  {
+    IntSequence seq(s);
+    return noverseq_ip((IntSequence &) s);
+  }
+private:
+  static int noverseq_ip(IntSequence &s);
+};
+
+/* Here is an abstraction for unfolded tensor. We provide a pure
+   virtual method |fold| which returns a new instance of folded tensor of
+   the same symmetry. Also we provide static methods for incrementing and
+   decrementing an index with full symmetry and general symmetry as
+   defined above. */
+
+class FTensor;
+class UTensor : public Tensor
+{
+public:
+  UTensor(indor io, const IntSequence &last, int r, int c, int d)
+    : Tensor(io, last, r, c, d)
+  {
+  }
+  UTensor(const UTensor &ut)
+    : Tensor(ut)
+  {
+  }
+  UTensor(int first_row, int num, UTensor &t)
+    : Tensor(first_row, num, t)
+  {
+  }
+  virtual ~UTensor()
+  {
+  }
+  virtual FTensor&fold() const = 0;
+
+  static void increment(IntSequence &v, int nv);
+  static void decrement(IntSequence &v, int nv);
+  static void increment(IntSequence &v, const IntSequence &nvmx);
+  static void decrement(IntSequence &v, const IntSequence &nvmx);
+  static int getOffset(const IntSequence &v, int nv);
+  static int getOffset(const IntSequence &v, const IntSequence &nvmx);
+};
+
+/* This is an abstraction for folded tensor. It only provides a method
+   |unfold|, which returns the unfolded version of the same symmetry, and
+   static methods for decrementing indices.
+
+   We also provide static methods for decrementing the |IntSequence| in
+   folded fashion and also calculating an offset for a given
+   |IntSequence|. However, this is relatively complex calculation, so
+   this should be avoided if possible. */
+
+class FTensor : public Tensor
+{
+public:
+  FTensor(indor io, const IntSequence &last, int r, int c, int d)
+    : Tensor(io, last, r, c, d)
+  {
+  }
+  FTensor(const FTensor &ft)
+    : Tensor(ft)
+  {
+  }
+  FTensor(int first_row, int num, FTensor &t)
+    : Tensor(first_row, num, t)
+  {
+  }
+  virtual ~FTensor()
+  {
+  }
+  virtual UTensor&unfold() const = 0;
+
+  static void decrement(IntSequence &v, int nv);
+  static int
+  getOffset(const IntSequence &v, int nv)
+  {
+    IntSequence vtmp(v); return getOffsetRecurse(vtmp, nv);
+  }
+private:
+  static int getOffsetRecurse(IntSequence &v, int nv);
+};
+
+#endif
--- a/dynare++/tl/cc/tensor.hweb
+++ b/dynare++/tl/cc/tensor.hweb
@ -1,252 +0,0 @@
-@q $Id: tensor.hweb 741 2006-05-09 11:12:46Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Tensor concept. Start of {\tt tensor.h} file.
-
-Here we define a tensor class. Tensor is a mathematical object
-corresponding to a $(n+1)$-dimensional array. An element of such array
-is denoted $[B]_{\alpha_1\ldots\alpha_n}^\beta$, where $\beta$ is a
-special index and $\alpha_1\ldots\alpha_n$ are other indices. The
-class |Tensor| and its subclasses view such array as a 2D matrix,
-where $\beta$ corresponds to one dimension, and
-$\alpha_1\ldots\alpha_2$ unfold to the other dimension. Whether
-$\beta$ correspond to rows or columns is decided by tensor subclasses,
-however, most of our tensors will have rows indexed by $\beta$, and
-$\alpha_1\ldots\alpha_n$ will unfold column-wise.
-
-There might be some symmetries in the tensor data. For instance, if
-$\alpha_1$ is interchanged with $\alpha_3$ and the both elements equal
-for all possible $\alpha_i$, and $\beta$, then there is a symmetry
-of $\alpha_1$ and $\alpha_3$.
-
-For any symmetry, there are basically two possible storages of the
-data. The first is unfolded storage, which stores all elements
-regardless the symmetry. The other storage type is folded, which
-stores only elements which do not repeat. We declare abstract classes
-for unfolded tensor, and folded tensor.
-
-Also, here we also define a concept of tensor index which is the
-$n$-tuple $\alpha_1\ldots\alpha_n$. It is an iterator, which iterates
-in dependence of symmetry and storage of the underlying tensor.
-
-Although we do not decide about possible symmetries at this point, it
-is worth noting that we implement two kinds of symmetries. The first
-one is a full symmetry where all indices are interchangeable. The
-second one is a generalization of the first. We define tensor of a
-symmetry, where there are a few groups of indices interchangeable
-within a group and not across. Moreover, the groups are required to be
-consequent partitions of the index $n$-tuple. This is, we do not allow
-$\alpha_1$ be interchangeable with $\alpha_3$ and not with $\alpha_2$
-at the same time.
-
-However, some intermediate results are, in fact, tensors of a symmetry
-not fitting to our concept. We develop the tensor abstraction for it,
-but these objects are not used very often. They have limited usage
-due to their specialized constructor.
-
-@c
-
-#ifndef TENSOR_H
-#define TENSOR_H
-
-#include "int_sequence.h"
-#include "twod_matrix.h"
-
-@<index class definition@>;
-@<|Tensor| class declaration@>;
-@<|UTensor| class declaration@>;
-@<|FTensor| class declaration@>;
-
-#endif
-
-@ The index represents $n$-tuple $\alpha_1\ldots\alpha_n$. Since its
-movement is dependent on the underlying tensor (with storage and
-symmetry), we maintain a pointer to that tensor, we maintain the
-$n$-tuple (or coordinates) as |IntSequence| and also we maintain the
-offset number (column, or row) of the index in the tensor. The pointer
-is const, since we do not need to change data through the index.
-
-Here we require the |tensor| to implement |increment| and |decrement|
-methods, which calculate following and preceding $n$-tuple. Also, we
-need to calculate offset number from the given coordinates, so the
-tensor must implement method |getOffset|. This method is used only in
-construction of the index from the given coordinates. As the index is
-created, the offset is automatically incremented, and decremented
-together with index. The|getOffset| method can be relatively
-computationally complex. This must be kept in mind.  Also we generally
-suppose that n-tuple of all zeros is the first offset (first columns
-or row).
-
-What follows is a definition of index class, the only
-interesting point is |operator==| which decides only according to
-offset, not according to the coordinates. This is useful since there
-can be more than one of coordinate representations of past-the-end
-index.
-
-@s _Tptr int
-@s _Self int
-
-@<index class definition@>=
-template<class _Tptr> class _index {
-	typedef _index<_Tptr> _Self;
-	_Tptr tensor;
-	int offset;
-	IntSequence coor;
-public:@;
-	_index(_Tptr t, int n) 
-		: tensor(t), offset(0), coor(n, 0)@+ {}
-	_index(_Tptr t, const IntSequence& cr, int c)
-		: tensor(t), offset(c), coor(cr)@+ {}
-	_index(_Tptr t, const IntSequence& cr)
-		: tensor(t), offset(tensor->getOffset(cr)), coor(cr)@+ {}
-	_index(const _index& ind)
-		: tensor(ind.tensor), offset(ind.offset), coor(ind.coor)@+ {}
-	const _Self& operator=(const _Self& in)
-		{@+ tensor = in.tensor;@+ offset = in.offset;@+ coor = in.coor;
-		return *this;@+}
-	_Self& operator++()
-		{@+ tensor->increment(coor);@+ offset++;@+ return *this;@+}
-	_Self& operator--()
-		{@+ tensor->decrement(coor);@+ offset--;@+ return *this;@+}
-	int operator*() const
-		{@+ return offset;@+}
-	bool operator==(const _index& n) const
-		{@+ return offset == n.offset;@+}
-	bool operator!=(const _index& n) const
-		{@+ return offset != n.offset;@+}
-	const IntSequence& getCoor() const
-		{@+ return coor;@+}
-	void print() const
-		{@+ printf("%4d: ", offset);@+  coor.print();@+}
-};
-
-@ Here is the |Tensor| class, which is nothing else than a simple subclass
-of |TwoDMatrix|. The unique semantically new member is |dim| which is tensor
-dimension (length of $\alpha_1\ldots\alpha_n$). We also declare
-|increment|, |decrement| and |getOffset| methods as pure virtual.
-
-We also add members for index begin and index end. This is useful,
-since |begin| and |end| methods do not return instance but only
-references, which prevent making additional copy of index (for example
-in for cycles as |in != end()| which would do a copy of index for each
-cycle). The index begin |in_beg| is constructed as a sequence of all
-zeros, and |in_end| is constructed from the sequence |last| passed to
-the constructor, since it depends on subclasses. Also we have to say,
-along what coordinate is the multidimensional index. This is used only
-for initialization of |in_end|.
-
-Also, we declare static auxiliary functions for $\pmatrix{n\cr k}$
-which is |noverk| and $a^b$, which is |power|.
-
-@s indor int
-
-@<|Tensor| class declaration@>=
-class Tensor : public TwoDMatrix {
-public:@;
-	enum indor {along_row, along_col};
-	typedef _index<const Tensor*> index;
-protected:@;
-	const index in_beg;
-	const index in_end;
-	int dim;
-public:@;
-	Tensor(indor io, const IntSequence& last, int r, int c, int d)
-		: TwoDMatrix(r, c),
-		  in_beg(this, d),
-		  in_end(this, last, (io == along_row)? r:c),
-		  dim(d)@+ {}
-	Tensor(indor io, const IntSequence& first, const IntSequence& last,
-		   int r, int c, int d)
-		: TwoDMatrix(r, c),
-		  in_beg(this, first, 0),
-		  in_end(this, last, (io == along_row)? r:c),
-		  dim(d)@+ {}
-	Tensor(int first_row, int num, Tensor& t)
-		: TwoDMatrix(first_row, num, t),
-		  in_beg(t.in_beg),
-		  in_end(t.in_end),
-		  dim(t.dim)@+ {}
-	Tensor(const Tensor& t)
-		: TwoDMatrix(t),
-		  in_beg(this, t.in_beg.getCoor(), *(t.in_beg)),
-		  in_end(this, t.in_end.getCoor(), *(t.in_end)),
-		  dim(t.dim)@+ {}
-	virtual ~Tensor()@+ {}
-	virtual void increment(IntSequence& v) const =0;
-	virtual void decrement(IntSequence& v) const =0;
-	virtual int getOffset(const IntSequence& v) const =0;
-	int dimen() const
-		{@+ return dim;@+}
-
-	const index& begin() const
-		{@+ return in_beg;@+}
-	const index& end() const
-		{@+ return in_end;@+}
-
-	static int noverk(int n, int k);
-	static int power(int a, int b);
-	static int noverseq(const IntSequence& s)
-		{
-			IntSequence seq(s);
-			return noverseq_ip((IntSequence&)s);
-		}
-private:@;
-	static int noverseq_ip(IntSequence& s);
-};
-
-@ Here is an abstraction for unfolded tensor. We provide a pure
-virtual method |fold| which returns a new instance of folded tensor of
-the same symmetry. Also we provide static methods for incrementing and
-decrementing an index with full symmetry and general symmetry as
-defined above.
-
-@<|UTensor| class declaration@>=
-class FTensor;
-class UTensor : public Tensor {
-public:@;
-	UTensor(indor io, const IntSequence& last, int r, int c, int d)
-		: Tensor(io, last, r, c, d)@+ {}
-	UTensor(const UTensor& ut)
-		: Tensor(ut)@+ {}
-	UTensor(int first_row, int num, UTensor& t)
-		: Tensor(first_row, num, t)@+ {}
-	virtual ~UTensor()@+ {}
-	virtual FTensor& fold() const =0;
-
-	static void increment(IntSequence& v, int nv);
-	static void decrement(IntSequence& v, int nv);
-	static void increment(IntSequence& v, const IntSequence& nvmx);
-	static void decrement(IntSequence& v, const IntSequence& nvmx);
-	static int getOffset(const IntSequence& v, int nv);
-	static int getOffset(const IntSequence& v, const IntSequence& nvmx);
-};
-
-@ This is an abstraction for folded tensor. It only provides a method
-|unfold|, which returns the unfolded version of the same symmetry, and
-static methods for decrementing indices.
-
-We also provide static methods for decrementing the |IntSequence| in
-folded fashion and also calculating an offset for a given
-|IntSequence|. However, this is relatively complex calculation, so
-this should be avoided if possible.
-
-@<|FTensor| class declaration@>=
-class FTensor : public Tensor {
-public:@;
-	FTensor(indor io, const IntSequence& last, int r, int c, int d)
-		: Tensor(io, last, r, c, d)@+ {}
-	FTensor(const FTensor& ft)
-		: Tensor(ft)@+ {}
-	FTensor(int first_row, int num, FTensor& t)
-		: Tensor(first_row, num, t)@+ {}
-	virtual ~FTensor()@+ {}
-	virtual UTensor& unfold() const =0;
-
-	static void decrement(IntSequence& v, int nv);
-	static int getOffset(const IntSequence& v, int nv)
-		{@+IntSequence vtmp(v);@+ return getOffsetRecurse(vtmp, nv);@+}
-private:@;
-	static int getOffsetRecurse(IntSequence& v, int nv);
-};
-
-@ End of {\tt tensor.h} file.
--- a/dynare++/tl/cc/tl_exception.hh
+++ b/dynare++/tl/cc/tl_exception.hh
@ -0,0 +1,74 @@
+// Copyright 2004, Ondra Kamenik
+
+// Exception.
+
+/* Within the code we often check some state of variables, typically
+   preconditions or postconditions. If the state is not as required, it
+   is worthless to continue, since this means some fatal error in
+   algorithms. In this case we raise an exception which can be caught at
+   some higher level. This header file defines a simple infrastructure
+   for this. */
+
+#ifndef TL_EXCEPTION_H
+#define TL_EXCEPTION_H
+
+#include <cstring>
+#include <cstdio>
+
+/* The basic idea of raising an exception if some condition fails is
+   that the conditions is checked only if required. We define global
+   |TL_DEBUG| macro which is integer and says, how many debug messages
+   the programm has to emit. We also define |TL_DEBUG_EXCEPTION| which
+   says, for what values of |TL_DEBUG| we will check for conditions of
+   the exceptions. If the |TL_DEBUG| is equal or higher than
+   |TL_DEBUG_EXCEPTION|, the exception conditions are checked.
+
+   We define |TL_RAISE|, and |TL_RAISE_IF| macros which throw an instance
+   of |TLException| if |TL_DEBUG >= TL_DEBUG_EXCEPTION|. The first is
+   unconditional throw, the second is conditioned by a given
+   expression. Note that if |TL_DEBUG < TL_DEBUG_EXCEPTION| then the code
+   is compiled but evaluation of the condition is passed. If code is
+   optimized, the optimizer also passes evaluation of |TL_DEBUG| and
+   |TL_DEBUG_EXCEPTION| comparison (I hope).
+
+   We provide default values for |TL_DEBUG| and |TL_DEBUG_EXCEPTION|. */
+
+#ifndef TL_DEBUG_EXCEPTION
+# define TL_DEBUG_EXCEPTION 1
+#endif
+
+#ifndef TL_DEBUG
+# define TL_DEBUG 0
+#endif
+
+#define TL_RAISE(mes)                                                   \
+  if (TL_DEBUG >= TL_DEBUG_EXCEPTION) throw TLException(__FILE__, __LINE__, mes);
+
+#define TL_RAISE_IF(expr, mes)                                          \
+  if (TL_DEBUG >= TL_DEBUG_EXCEPTION && (expr)) throw TLException(__FILE__, __LINE__, mes);
+
+/* Primitive exception class containing file name, line number and message. */
+
+class TLException
+{
+  char fname[50];
+  int lnum;
+  char message[500];
+public:
+  TLException(const char *f, int l, const char *mes)
+  {
+    strncpy(fname, f, 50); fname[49] = '\0';
+    strncpy(message, mes, 500); message[499] = '\0';
+    lnum = l;
+  }
+  virtual ~TLException()
+  {
+  }
+  virtual void
+  print() const
+  {
+    printf("At %s:%d:%s\n", fname, lnum, message);
+  }
+};
+
+#endif
--- a/dynare++/tl/cc/tl_exception.hweb
+++ b/dynare++/tl/cc/tl_exception.hweb
@ -1,79 +0,0 @@
-@q $Id: tl_exception.hweb 332 2005-07-15 13:41:48Z kamenik $ @>
-@q Copyright 2004, Ondra Kamenik @>
-
-@*2 Exception. Start of {\tt tl\_exception.h} file.
-
-Within the code we often check some state of variables, typically
-preconditions or postconditions. If the state is not as required, it
-is worthless to continue, since this means some fatal error in
-algorithms. In this case we raise an exception which can be caught at
-some higher level. This header file defines a simple infrastructure
-for this.
-
-@s TLException int
-@c
-#ifndef TL_EXCEPTION_H
-#define TL_EXCEPTION_H
-
-#include <cstring>
-#include <cstdio>
-
-@<body of tl\_exception header@>;
-
-#endif
-
-@ The basic idea of raising an exception if some condition fails is
-that the conditions is checked only if required. We define global
-|TL_DEBUG| macro which is integer and says, how many debug messages
-the programm has to emit. We also define |TL_DEBUG_EXCEPTION| which
-says, for what values of |TL_DEBUG| we will check for conditions of
-the exceptions. If the |TL_DEBUG| is equal or higher than
-|TL_DEBUG_EXCEPTION|, the exception conditions are checked.
-
-We define |TL_RAISE|, and |TL_RAISE_IF| macros which throw an instance
-of |TLException| if |TL_DEBUG >= TL_DEBUG_EXCEPTION|. The first is
-unconditional throw, the second is conditioned by a given
-expression. Note that if |TL_DEBUG < TL_DEBUG_EXCEPTION| then the code
-is compiled but evaluation of the condition is passed. If code is
-optimized, the optimizer also passes evaluation of |TL_DEBUG| and
-|TL_DEBUG_EXCEPTION| comparison (I hope).
-
-We provide default values for |TL_DEBUG| and |TL_DEBUG_EXCEPTION|.
-
-@<body of tl\_exception header@>=
-#ifndef TL_DEBUG_EXCEPTION
-#define TL_DEBUG_EXCEPTION 1
-#endif
-
-#ifndef TL_DEBUG
-#define TL_DEBUG 0
-#endif
-
-#define TL_RAISE(mes) \
-if (TL_DEBUG >= TL_DEBUG_EXCEPTION) throw TLException(__FILE__, __LINE__, mes);
-
-#define TL_RAISE_IF(expr, mes) \
-if (TL_DEBUG >= TL_DEBUG_EXCEPTION && (expr)) throw TLException(__FILE__, __LINE__, mes);
-
-@<|TLException| class definition@>;
-
-@ Primitive exception class containing file name, line number and message.
-@<|TLException| class definition@>=
-class TLException {
-	char fname[50];
-	int lnum;
-	char message[500];
-public:@;
-	TLException(const char* f, int l, const char* mes)
-		{
-			strncpy(fname, f, 50);@+ fname[49] = '\0';
-			strncpy(message, mes, 500);@+ message[499] = '\0';
-			lnum = l;
-		}
-	virtual ~TLException()@+ {}
-	virtual void print() const
-		{@+ printf("At %s:%d:%s\n", fname, lnum, message);@+}
-};
-
-
-@ End of {\tt tl\_exception.h} file.
--- a/dynare++/tl/cc/tl_static.cc
+++ b/dynare++/tl/cc/tl_static.cc
@ -0,0 +1,82 @@
+// Copyright 2004, Ondra Kamenik
+
+#include "tl_static.hh"
+#include "tl_exception.hh"
+
+TLStatic tls;
+
+/* Note that we allow for repeated calls of |init|. This is not normal
+   and the only purpose of allowing this is the test suite. */
+
+TLStatic::TLStatic()
+{
+  ebundle = NULL;
+  pbundle = NULL;
+  ptriang = NULL;
+}
+
+TLStatic::~TLStatic()
+{
+  if (ebundle)
+    delete ebundle;
+  if (pbundle)
+    delete pbundle;
+  if (ptriang)
+    delete ptriang;
+}
+
+void
+TLStatic::init(int dim, int nvar)
+{
+  if (ebundle)
+    ebundle->generateUpTo(dim);
+  else
+    ebundle = new EquivalenceBundle(dim);
+
+  if (pbundle)
+    pbundle->generateUpTo(dim);
+  else
+    pbundle = new PermutationBundle(dim);
+
+  if (ptriang)
+    delete ptriang;
+  ptriang = new PascalTriangle(nvar, dim);
+}
+
+/* The coefficients are stored in |data| row by row where a row are
+   coeffs with the same $k$.
+
+   We first initialize the first row with ones. Then for each other row
+   we initialize the first item to one, and other items are a sum of
+   coefficients of $n-1$ which is in code |i+j-1|. */
+
+PascalTriangle::PascalTriangle(int n, int k)
+  : data(new int[(n+1)*(k+1)]), kmax(k), nmax(n)
+{
+  for (int i = 0; i <= n; i++)
+    data[i] = 1;
+  for (int j = 1; j <= k; j++)
+    {
+      data[j*(nmax+1)] = 1;
+      for (int i = 1; i <= n; i++)
+        data[j*(nmax+1)+i] = noverk(i+j-1, j) + noverk(i+j-1, j-1);
+    }
+}
+
+/* Clear. Recall, that there are |nmax+1| items in a row. */
+
+int
+PascalTriangle::noverk(int n, int k) const
+{
+  TL_RAISE_IF(k > n || n < 0,
+              "Wrong arguments for PascalTriangle::noverk");
+
+  if (k <= kmax && n-k <= nmax)
+    return data[k*(nmax+1)+n-k];
+
+  if (n-k <= kmax && k <= nmax)
+    return data[(n-k)*(nmax+1)+k];
+
+  TL_RAISE("n or k out of range in PascalTriangle::noverk");
+  return 0;
+}
--- a/Show More
+++ b/Show More