# coding: utf-8
"""Scale all feature values into the certain range."""
import numpy as np
from hdnnpy.preprocess.preprocess_base import PreprocessBase
from hdnnpy.utils import (MPI, pprint)
[docs]class Scaling(PreprocessBase):
"""Scale all feature values into the certain range."""
name = 'scaling'
"""str: Name of this class."""
def __init__(self, min_=-1.0, max_=1.0):
"""
Args:
min\_ (float): Target minimum value of scaling.
max\_ (float): Target maximum value of scaling.
"""
assert isinstance(min_, float)
assert isinstance(max_, float)
assert min_ < max_
super().__init__()
self._max = {}
self._min = {}
self._target_max = max_
self._target_min = min_
@property
def max(self):
"""dict [~numpy.ndarray]: Initialized maximum values in each
feature dimension and each element."""
return self._max
@property
def min(self):
"""dict [~numpy.ndarray]: Initialized minimum values in each
feature dimension and each element."""
return self._min
@property
def target(self):
"""tuple [float, float]: Target min & max values of scaling."""
return self._target_min, self._target_max
[docs] def apply(self, dataset, elemental_composition, verbose=True):
"""Apply the same pre-processing for each element to dataset.
It accepts 1 or 2 for length of ``dataset``, each element of
which is regarded as ``0th-order``, ``1st-order``, ...
Args:
dataset (list [~numpy.ndarray]): Input dataset to be scaled.
elemental_composition (list [str]):
Element symbols corresponding to 1st dimension of
``dataset``.
verbose (bool, optional): Print log to stdout.
Returns:
list [~numpy.ndarray]:
Processed dataset into the same min-max range.
"""
order = len(dataset) - 1
assert 0 <= order <= 2
self._initialize_params(dataset[0], elemental_composition, verbose)
max_ = np.array(
[self._max[element] for element in elemental_composition])
min_ = np.array(
[self._min[element] for element in elemental_composition])
if order >= 0:
dataset[0] = ((dataset[0] - min_)
/ (max_ - min_)
* (self._target_max - self._target_min)
+ self._target_min)
if order >= 1:
dataset[1] = (dataset[1]
/ (max_ - min_)[..., None]
* (self._target_max - self._target_min))
if order >= 2:
dataset[2] = (dataset[2]
/ (max_ - min_)[..., None, None]
* (self._target_max - self._target_min))
return dataset
[docs] def dump_params(self):
"""Dump its own parameters as :obj:`str`.
Returns:
str: Formed parameters.
"""
params_str = (f'''
# target range
{self._target_max} {self._target_min}
''')
for element in self._elements:
max_ = self._max[element]
min_ = self._min[element]
max_str = ' '.join(map(str, max_))
min_str = ' '.join(map(str, min_))
params_str += f'''
{element} {max_.shape[0]}
# max
{max_str}
# min
{min_str}
'''
return params_str
[docs] def load(self, file_path, verbose=True):
"""Load internal parameters for each element.
Only root MPI process loads parameters.
Args:
file_path (~pathlib.Path): File path to load parameters.
verbose (bool, optional): Print log to stdout.
"""
if MPI.rank == 0:
ndarray = np.load(file_path)
self._elements = ndarray['elements'].item()
self._max = {element: ndarray[f'max:{element}']
for element in self._elements}
self._min = {element: ndarray[f'min:{element}']
for element in self._elements}
if verbose:
pprint(f'Loaded Scaling parameters from {file_path}.')
[docs] def save(self, file_path, verbose=True):
"""Save internal parameters for each element.
Only root MPI process saves parameters.
Args:
file_path (~pathlib.Path): File path to save parameters.
verbose (bool, optional): Print log to stdout.
"""
if MPI.rank == 0:
info = {'elements': self._elements}
max_ = {f'max:{k}': v for k, v in self._max.items()}
min_ = {f'min:{k}': v for k, v in self._min.items()}
np.savez(file_path, **info, **max_, **min_)
if verbose:
pprint(f'Saved Scaling parameters to {file_path}.')
def _initialize_params(self, data, elemental_composition, verbose):
"""Initialize parameters only once for new elements."""
for element in set(elemental_composition) - self._elements:
n_feature = data.shape[2]
mask = np.array(elemental_composition) == element
X = data[:, mask].reshape(-1, n_feature)
self._elements.add(element)
self._max[element] = X.max(axis=0)
self._min[element] = X.min(axis=0)
if verbose:
pprint(f'Initialized Scaling parameters for {element}')