在input_fn中使用的解析相关帮助函数
2018-05-07 13:42 更新
#版权所有2017 TensorFlow作者.版权所有.
#根据Apache许可证2.0版(“许可证”)获得许可;
#除了符合许可证外,您不得使用此文件.
#您可以在获得许可证副本
#http://www.apache.org/licenses/LICENSE-2.0#除非适用法律要求或以书面形式同意软件根据许可证分发的按“现状”分发,
#没有任何形式的保证或条件,无论是明示还是暗示.
#请参阅许可证以了解特定语言的管理权限和权限
#许可证下的限制.
#==============================================================================
“在`input_fn`中使用的解析相关帮助函数.”
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
from tensorflow.python.feature_column import feature_column as fc
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import parsing_ops
def classifier_parse_example_spec(feature_columns,
label_key,
label_dtype=dtypes.int64,
label_default=None,
weight_column=None):
"""Generates parsing spec for tf.parse_example to be used with classifiers.
If users keep data in tf.Example format, they need to call tf.parse_example
with a proper feature spec. There are two main things that this utility helps:
* Users need to combine parsing spec of features with labels and weights
(if any) since they are all parsed from same tf.Example instance. This
utility combines these specs.
* It is difficult to map expected label by a classifier such as
`DNNClassifier` to corresponding tf.parse_example spec. This utility encodes
it by getting related information from users (key, dtype).
Example output of parsing spec:
```python
# Define features and transformations
feature_b = tf.feature_column.numeric_column(...)
feature_c_bucketized = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column("feature_c"), ...)
feature_a_x_feature_c = tf.feature_column.crossed_column(
columns=["feature_a", feature_c_bucketized], ...)
feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
parsing_spec = tf.estimator.classifier_parse_example_spec(
feature_columns, label_key='my-label', label_dtype=tf.string)
# For the above example, classifier_parse_example_spec would return the dict:
assert parsing_spec == {
"feature_a": parsing_ops.VarLenFeature(tf.string),
"feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
"feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
"my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string)
}
```
Example usage with a classifier:
```python
feature_columns = # define features via tf.feature_column
estimator = DNNClassifier(
n_classes=1000,
feature_columns=feature_columns,
weight_column='example-weight',
label_vocabulary=['photos', 'keep', ...],
hidden_units=[256, 64, 16])
# This label configuration tells the classifier the following:
# * weights are retrieved with key 'example-weight'
# * label is string and can be one of the following ['photos', 'keep', ...]
# * integer id for label 'photos' is 0, 'keep' is 1, ...
# Input builders
def input_fn_train(): # Returns a tuple of features and labels.
features = tf.contrib.learn.read_keyed_batch_features(
file_pattern=train_files,
batch_size=batch_size,
# creates parsing configuration for tf.parse_example
features=tf.estimator.classifier_parse_example_spec(
feature_columns,
label_key='my-label',
label_dtype=tf.string,
weight_column='example-weight'),
reader=tf.RecordIOReader)
labels = features.pop('my-label')
return features, labels
estimator.train(input_fn=input_fn_train)
```
Args:
feature_columns: An iterable containing all feature columns. All items
should be instances of classes derived from `_FeatureColumn`.
label_key: A string identifying the label. It means tf.Example stores labels
with this key.
label_dtype: A `tf.dtype` identifies the type of labels. By default it is
`tf.int64`. If user defines a `label_vocabulary`, this should be set as
`tf.string`. `tf.float32` labels are only supported for binary
classification.
label_default: used as label if label_key does not exist in given
tf.Example. An example usage: let's say `label_key` is 'clicked' and
tf.Example contains clicked data only for positive examples in following
format `key:clicked, value:1`. This means that if there is no data with
key 'clicked' it should count as negative example by setting
`label_deafault=0`. Type of this value should be compatible with
`label_dtype`.
weight_column: A string or a `_NumericColumn` created by
`tf.feature_column.numeric_column` defining feature column representing
weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. If it is a string, it is
used as a key to fetch weight tensor from the `features`. If it is a
`_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
then weight_column.normalizer_fn is applied on it to get weight tensor.
Returns:
A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
value.
Raises:
ValueError: If label is used in `feature_columns`.
ValueError: If weight_column is used in `feature_columns`.
ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
instance.
ValueError: If `weight_column` is not a `_NumericColumn` instance.
ValueError: if label_key is None.
"""
parsing_spec = fc.make_parse_example_spec(feature_columns)
if label_key in parsing_spec:
raise ValueError('label should not be used as feature. '
'label_key: {}, features: {}'.format(
label_key, parsing_spec.keys()))
parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype,
label_default)
if weight_column is None:
return parsing_spec
if isinstance(weight_column, six.string_types):
weight_column = fc.numeric_column(weight_column)
if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access
raise ValueError('weight_column should be an instance of '
'tf.feature_column.numeric_column. '
'Given type: {} value: {}'.format(
type(weight_column), weight_column))
if weight_column.key in parsing_spec:
raise ValueError('weight_column should not be used as feature. '
'weight_column: {}, features: {}'.format(
weight_column.key, parsing_spec.keys()))
parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access
return parsing_spec
def regressor_parse_example_spec(feature_columns,
label_key,
label_dtype=dtypes.float32,
label_default=None,
label_dimension=1,
weight_column=None):
"""Generates parsing spec for tf.parse_example to be used with regressors.
If users keep data in tf.Example format, they need to call tf.parse_example
with a proper feature spec. There are two main things that this utility helps:
* Users need to combine parsing spec of features with labels and weights
(if any) since they are all parsed from same tf.Example instance. This
utility combines these specs.
* It is difficult to map expected label by a regressor such as `DNNRegressor`
to corresponding tf.parse_example spec. This utility encodes it by getting
related information from users (key, dtype).
Example output of parsing spec:
```python
# Define features and transformations
feature_b = tf.feature_column.numeric_column(...)
feature_c_bucketized = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column("feature_c"), ...)
feature_a_x_feature_c = tf.feature_column.crossed_column(
columns=["feature_a", feature_c_bucketized], ...)
feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
parsing_spec = tf.estimator.regressor_parse_example_spec(
feature_columns, label_key='my-label')
# For the above example, regressor_parse_example_spec would return the dict:
assert parsing_spec == {
"feature_a": parsing_ops.VarLenFeature(tf.string),
"feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
"feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
"my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32)
}
```
Example usage with a regressor:
```python
feature_columns = # define features via tf.feature_column
estimator = DNNRegressor(
hidden_units=[256, 64, 16],
feature_columns=feature_columns,
weight_column='example-weight',
label_dimension=3)
# This label configuration tells the regressor the following:
# * weights are retrieved with key 'example-weight'
# * label is a 3 dimension tensor with float32 dtype.
# Input builders
def input_fn_train(): # Returns a tuple of features and labels.
features = tf.contrib.learn.read_keyed_batch_features(
file_pattern=train_files,
batch_size=batch_size,
# creates parsing configuration for tf.parse_example
features=tf.estimator.classifier_parse_example_spec(
feature_columns,
label_key='my-label',
label_dimension=3,
weight_column='example-weight'),
reader=tf.RecordIOReader)
labels = features.pop('my-label')
return features, labels
estimator.train(input_fn=input_fn_train)
```
Args:
feature_columns: An iterable containing all feature columns. All items
should be instances of classes derived from `_FeatureColumn`.
label_key: A string identifying the label. It means tf.Example stores labels
with this key.
label_dtype: A `tf.dtype` identifies the type of labels. By default it is
`tf.float32`.
label_default: used as label if label_key does not exist in given
tf.Example. By default default_value is none, which means
`tf.parse_example` will error out if there is any missing label.
label_dimension: Number of regression targets per example. This is the
size of the last dimension of the labels and logits `Tensor` objects
(typically, these have shape `[batch_size, label_dimension]`).
weight_column: A string or a `_NumericColumn` created by
`tf.feature_column.numeric_column` defining feature column representing
weights. It is used to down weight or boost examples during training. It
will be multiplied by the loss of the example. If it is a string, it is
used as a key to fetch weight tensor from the `features`. If it is a
`_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
then weight_column.normalizer_fn is applied on it to get weight tensor.
Returns:
A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
value.
Raises:
ValueError: If label is used in `feature_columns`.
ValueError: If weight_column is used in `feature_columns`.
ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
instance.
ValueError: If `weight_column` is not a `_NumericColumn` instance.
ValueError: if label_key is None.
"""
parsing_spec = fc.make_parse_example_spec(feature_columns)
if label_key in parsing_spec:
raise ValueError('label should not be used as feature. '
'label_key: {}, features: {}'.format(
label_key, parsing_spec.keys()))
parsing_spec[label_key] = parsing_ops.FixedLenFeature(
(label_dimension,), label_dtype, label_default)
if weight_column is None:
return parsing_spec
if isinstance(weight_column, six.string_types):
weight_column = fc.numeric_column(weight_column)
if not isinstance(weight_column, fc._NumericColumn): # pylint: disable=protected-access
raise ValueError('weight_column should be an instance of '
'tf.feature_column.numeric_column. '
'Given type: {} value: {}'.format(
type(weight_column), weight_column))
if weight_column.key in parsing_spec:
raise ValueError('weight_column should not be used as feature. '
'weight_column: {}, features: {}'.format(
weight_column.key, parsing_spec.keys()))
parsing_spec.update(weight_column._parse_example_spec) # pylint: disable=protected-access
return parsing_spec
以上内容是否对您有帮助:
更多建议: