OPF Description TemplateΒΆ

# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2013, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

"""
Template file used by the OPF Experiment Generator to generate the actual
description.py file by replacing $XXXXXXXX tokens with desired values.

This description.py file was generated by:
$EXP_GENERATOR_PROGRAM_PATH
"""

from nupic.frameworks.opf.exp_description_api import ExperimentDescriptionAPI

from nupic.frameworks.opf.exp_description_helpers import (
  updateConfigFromSubConfig,
  applyValueGettersToContainer,
  DeferredDictLookup)

from nupic.frameworks.opf.htm_prediction_model_callbacks import *
from nupic.frameworks.opf.metrics import MetricSpec
from nupic.swarming.experimentutils import (InferenceType, InferenceElement)
from nupic.support import aggregationDivide

from nupic.frameworks.opf.opf_task_driver import (
                                            IterationPhaseSpecLearnOnly,
                                            IterationPhaseSpecInferOnly,
                                            IterationPhaseSpecLearnAndInfer)



# ------------------------------------------------------------------------------
# Model Configuration Dictionary:
#
# Define the model parameters and adjust for any modifications if imported
# from a sub-experiment.
#
# These fields might be modified by a sub-experiment; this dict is passed between
# the sub-experiment and base experiment
#
#
# NOTE: Use of DEFERRED VALUE-GETTERs: dictionary fields and list elements
#   within the config dictionary may be assigned futures derived from the
#   ValueGetterBase class, such as DeferredDictLookup.
#   This facility is particularly handy for enabling substitution of values in
#   the config dictionary from other values in the config dictionary, which is
#   needed by permutation.py-based experiments. These values will be resolved
#   during the call to applyValueGettersToContainer(),
#   which we call after the base experiment's config dictionary is updated from
#   the sub-experiment. See ValueGetterBase and
#   DeferredDictLookup for more details about value-getters.
#
#   For each custom encoder parameter to be exposed to sub-experiment/permutation
#   overrides, define a variable in this section, using key names beginning with a
#   single underscore character to avoid collisions with pre-defined keys (e.g.,
#   _dsEncoderFieldName2_N).
#
#   Example:
#      config = dict(
#        _dsEncoderFieldName2_N = 70,
#        _dsEncoderFieldName2_W = 5,
#        dsEncoderSchema = [
#          base=dict(
#            fieldname='Name2', type='ScalarEncoder',
#            name='Name2', minval=0, maxval=270, clipInput=True,
#            n=DeferredDictLookup('_dsEncoderFieldName2_N'),
#            w=DeferredDictLookup('_dsEncoderFieldName2_W')),
#        ],
#      )
#      updateConfigFromSubConfig(config)
#      applyValueGettersToContainer(config)
#

config = {

  # Type of model that the rest of these parameters apply to
  'model' : "HTMPrediction",

  # The type of inference that this model will perform
  'inferenceType': $INFERENCE_TYPE,

  # How much in advance we want to predict. Used only when swarming over
  #  aggregations
  'predictAheadTime': $PREDICT_AHEAD_TIME,

  # The number of prediction steps to use. When swarming over aggregations, this
  #  is computed and filled in by the logic that follows this config
  #  declaration. It is computed based on the chosen aggreation and the
  #  above predictAheadTime.
  'predictionSteps': 'FilledInBelow',


  ##############################################################################
  # Dataset Aggregation Parameters (for training and inference datasets)
  ##############################################################################

  # Time-based Dataset Aggregation rules;
  #
  # Usage details and additional options: see
  # nupic.data.aggregator.generateDataset()
  #
  # Aggregation presently begins at the start of the dataset. For every
  # aggregation period, the records within the period are coalesced into a
  # single record per rules specified via the aggregationInfo property.
  #
  # Value schema:
  #   {
  #     'periodUnit1':value1, 'periodUnit2':value2, ...,
  #     'fields':[('fieldNameA', aggFuncNameA), ('fieldNameB', aggFuncNameB)]
  #   }
  #
  # Aggregation period units: combination of 0 or more unit/value properties:
  #   [years months] | [weeks days hours minutes seconds milliseconds microseconds]
  # NOTE: years and months are mutually-exclusive with the other units.
  # Example2: hours=1, minutes=30,
  #
  # Aggregation is disabled if the aggregationInfo key is omitted or all
  # expressed period unit values evaluate to 0
  #
  # Aggregation fields: list of field-name/aggregationFunctionName tuples;
  # e.g.: ("consumpion", "mean").
  #
  # Supported function names: "first", "last", "mean", "sum" (per
  # nupic.data.aggregator.py)
  #
  # NOTE: Designated Sequence id, Reset, and Timestamp fields are included
  #      automatically if not specified in aggregation fields.
  #
  # Aggregation period can be permuted over, so is separated out
  # (generated from AGGREGATION_PERIOD)
  '__aggregationPeriod' : $AGGREGATION_PERIOD

  # (value generated from AGGREGATION_INFO)
  'aggregationInfo' : $AGGREGATION_INFO,


  ##############################################################################
  # Sensor Region Parameters
  ##############################################################################

  # Sensor diagnostic output verbosity control;
  # if > 0: sensor region will print out on screen what it's sensing at each step
  # 0: silent; >=1: some info; >=2: more info; >=3: even more info
  # (see compute() in py/regions/RecordSensor.py)
  #
  'sensorVerbosity' : 0,

  # A dictionary specifying the period for automatically-generated resets from
  # a RecordSensor;
  #
  # None = disable automatically-generated resets (also disabled if all of the
  # specified values evaluate to 0).
  # Valid keys is the desired combination of the following:
  #   days, hours, minutes, seconds, milliseconds, microseconds, weeks
  #
  # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
  #
  # (value generated from SENSOR_AUTO_RESET)
  'sensorAutoReset' : $SENSOR_AUTO_RESET,


  # Dataset Encoder consists of field encoders that convert dataset record fields
  # to the internal representations suitable for input to the Sensor Region.
  #
  # Each field encoder dict must have the following keys per
  # nupic.encoders.MultiEncoder (multi.py):
  #  1) data fieldname          ('fieldname')
  #  2) an encoder type         ('type')
  #  3) and the encoder params  (all other keys)
  #
  # See specific encoder modules (e.g., sdrcateogry.py, scalar.py,
  # date.py, etc.) for encoder type values and descriptions of their specific params.
  #
  # Schema that describes how to build the encoder configuration.
  #
  #   dsEncoderSchema: [encoderSpec1, encoderSpec2, ...]
  #   encoderSpec: dictionary of parameters describing the field encoder
  #
  # In this dsEncoderSchema example, the field name "Name1" is a timestamp,
  # "Name2" is a scalar quantity, and "Name3" is a category
  #

  # Encoder specs;
  # Example:
  #          __field_name_encoder = dict(
  #            type = SDRCategoryEncoder',
  #            fieldname = name',
  #            name = 'name',
  #            n = 1000,
  #            w = DeferredDictLookup('spNumActivePerInhArea'),
  #          )
  # Generated from ENCODER_SPECS
  #
  $ENCODER_SPECS,


  # Example:
  #     dsEncoderSchema = [
  #       DeferredDictLookup('__field_name_encoder'),
  #     ],
  #
  # (value generated from DS_ENCODER_SCHEMA)
  #
  'dsEncoderSchema' : $DS_ENCODER_SCHEMA,



  ##############################################################################
  # General CLA Region Parameters
  ##############################################################################

  # Number of cell columns in the cortical region (same number for SP and TM)
  # (see also tpNCellsPerCol)
  # Replaces: spCoincCount
  'claRegionNColumns' : 2048,


  ##############################################################################
  # Spatial Pooler (SP) Parameters (SP is always enabled in OPF)
  ##############################################################################

  # SP diagnostic output verbosity control;
  # 0: silent; >=1: some info; >=2: more info;
  #
  'spVerbosity' : 0,

  # Print/logs stats every N iterations; 0 = disable stats
  'spPrintStatsPeriodIter' : 0,

  # SP inhibition control (absolute value);
  # Maximum number of active columns in the SP region's output (when there are more,
  # the weaker ones are suppressed)
  #
  'spNumActivePerInhArea' : 40,

  # potentialPct
  # What percent of the columns's receptive field is available
  # for potential synapses. At initialization time, we will
  # choose potentialPct * (2*potentialRadius+1)^2
  'spCoincInputPoolPct' : $SP_POOL_PCT,

  # The default connected threshold. Any synapse whose
  # permanence value is above the connected threshold is
  # a "connected synapse", meaning it can contribute to the
  # cell's firing. Typical value is 0.10. Cells whose activity
  # level before inhibition falls below minDutyCycleBeforeInh
  # will have their own internal synPermConnectedCell
  # threshold set below this default value.
  # (This concept applies to both SP and TM and so 'cells'
  # is correct here as opposed to 'columns')
  'spSynPermConnected' : $SP_PERM_CONNECTED,




  ##############################################################################
  # Temporal Memory (TM) Parameters
  ##############################################################################

  # TM diagnostic output verbosity control;
  # 0: silent; [1..6]: increasing levels of verbosity
  # (see verbosity in nupic/trunk/py/nupic/research/BacktrackingTM.py and BacktrackingTMCPP.py)
  #
  'tpVerbosity' : 0,

  # Print stats every N iterations during training; 0 = disable stats
  # TODO Why aren't experiments configuring stats for the inference phase? It seems
  #   like SP stats are dumped by SP Pooler directly regardless of whether it's
  #   in training or inference phase.  (waiting for email from Ron)
  # TODO: In LPF, these were accumulated/printed via iter/final callbacks installed
  #       by LPF; solve in OPF.
  'tpTrainPrintStatsPeriodIter' : 0,

  # Controls whether TM is enabled or disabled;
  # TM is necessary for making temporal predictions, such as predicting the next
  # inputs.  Without TP, the model is only capable of reconstructing missing sensor
  # inputs (via SP).
  #
  'tmEnable' : True,

  # The number of cells (i.e., states), allocated per column
  #
  'tpNCellsPerCol' : 32,

  # Initial Permanence
  # TODO need better explanation
  #
  'tpInitialPerm' : 0.21,

  # Permanence Increment
  #
  'tpPermanenceInc' : 0.1,

  # Permanence Decrement
  # If set to None, will automatically default to tpPermanenceInc value
  #
  'tpPermanenceDec' : None,

  # Temporal Pooler implementation selector (see _getTPClass in CLARegion.py)
  #
  'tpImplementation' : 'cpp',

  # Maximum number of segments per cell
  #  > 0 for fixed-size CLA
  # -1 for non-fixed-size CLA
  #
  # TODO for Ron: once the appropriate value is placed in TM constructor, see if
  #  we should eliminate this parameter from description.py
  #
  'tpMaxSegmentsPerCell' : 128,

  # Segment activation threshold.
  # A segment is active if it has >= tpSegmentActivationThreshold connected
  # synapses that are active due to infActiveState
  # None=use default
  # Replaces: tpActivationThreshold
  'tpSegmentActivationThreshold' : None,

  # Minimum number of active synapses for a segment to be considered during
  # search for the best-matching segments.
  # None=use default
  # Replaces: tpMinThreshold
  'tpMinSegmentMatchSynapseThreshold' : None,

  # Maximum number of synapses per segment
  #  > 0 for fixed-size CLA
  # -1 for non-fixed-size CLA
  #
  # TODO for Ron: once the appropriate value is placed in TM constructor, see if
  #  we should eliminate this parameter from description.py
  #
  'tpMaxSynapsesPerSegment' : 32,

  # New Synapse formation count
  # NOTE: If None, use spNumActivePerInhArea
  #
  # TODO need better explanation
  #
  'tpNewSynapseCount' : 20,

  # "Pay Attention Mode" length. This tells the TM how many new elements
  # to append to the end of a learned sequence at a time. Smaller values are
  # better for datasets with short sequences, higher values are better for
  # datasets with long sequences.
  'tpPamLength': 1,


  ##############################################################################
  # SDRClassifier parameters
  ##############################################################################
  'clRegionName' : 'SDRClassifierRegion',


  # Classifier diagnostic output verbosity control;
  # 0: silent; [1..6]: increasing levels of verbosity
  #
  'verbosity' : 0,

  # Comma separated list of steps ahead to learn in the classifier.
  'clSteps': $PREDICTION_STEPS,

  # This controls how fast the classifier learns/forgets. Higher values
  # make it adapt faster and forget older patterns faster.
  'clAlpha': None,

  # This allows the user to specify custom classifier params
  'clAdvancedParams' : {},
}

# end of config dictionary


# Adjust base config dictionary for any modifications if imported from a
# sub-experiment
updateConfigFromSubConfig(config)

# Compute predictionSteps based on the predictAheadTime and the aggregation
# period, which may be permuted over.
if config['predictAheadTime'] is not None:
  predictionSteps =  int(round(aggregationDivide(
        config['predictAheadTime'], config['__aggregationPeriod'])))
  assert (predictionSteps >= 1)
  config['clSteps'] = '%s' % (predictionSteps)


# Adjust config by applying ValueGetterBase-derived
# futures. NOTE: this MUST be called after updateConfigFromSubConfig() in order
# to support value-getter-based substitutions from the sub-experiment (if any)
applyValueGettersToContainer(config)