Source code for pynbody.array

"""
array
=====

Defines a shallow wrapper around numpy.ndarray for extra functionality like unit-tracking.

For most purposes, the differences between numpy.ndarray and
array.SimArray are not important. However, when units are specified
(by setting the ``units`` attribute), the behaviour is slightly
different. In particular,

* it becomes impossible to add or subtract arrays with incompatible dimensions

>>> SimArray([1,2], "Mpc") + SimArray([1,2], "Msol"))
ValueError

* addition or subtraction causes auto-unit conversion. For example

>>> SimArray([1,2], "Mpc") + SimArray([1,2], "kpc")
SimArray([1.001, 1.002], "Mpc")

* Note that in this context the left value takes precedence in
  specifying the return units, so that reversing the order of the
  operation here would return results in kpc.

* If only one of the arrays specifies a Unit, no checking occurs and
  the unit of the returned array is assumed to be the same as the one
  specified input unit.

* Powers to single integer or rational powers will maintain unit
  tracking.  Powers to float or other powers will not be able to do
  so.

>>> SimArray([1,2],"Msol Mpc**-3")**2
SimArray([1, 4], 'Msol**2 Mpc**-6')
>>> SimArray([1,2],"Msol Mpc**-3")**(1,3)
SimArray([ 1.,1.26], 'Msol**1/3 Mpc**-1')

Syntax above mirrors syntax in units module, where a length-two tuple
can represent a rational number, in this case one third.

>>> SimArray([1.,2], "Msol Mpc**-3")**0.333
SimArray([ 1.,1.26])  # Lost track of units



*Getting the array in specified units*
--------------------------------------

Given an array, you can convert it in-place into units of your
own chosing:

>>> x = SimArray([1,2], "Msol")
>>> x.convert_units('kg')
>>> print x
SimArray([  1.99e+30,   3.98e+30], 'kg')

Or you can leave the original array alone and get a *copy* in
different units, correctly converted:

>>> x = SimArray([1,2], "Msol")
>>> print x.in_units("kg")
SimArray([  1.99e+30,   3.98e+30], 'kg')
>>> print x
SimArray([1,2], "Msol")

If the SimArray was created by a SimSnap (which is most likely), it
has a pointer into the SimSnap's properties so that the cosmological
context is automatically fetched. For example, comoving -> physical
conversions are correctly achieved:

>>> f = pynbody.load("fname")
>>> f['pos']
SimArray([[ 0.05419805, -0.0646539 , -0.15700017],
         [ 0.05169899, -0.06193341, -0.14475258],
         [ 0.05672406, -0.06384531, -0.15909944],
         ...,
         [ 0.0723075 , -0.07650762, -0.07657281],
         [ 0.07166634, -0.07453796, -0.08020873],
         [ 0.07165282, -0.07468577, -0.08020587]], '2.86e+04 kpc a')

>>> f['pos'].convert_units('kpc')
>>> f['pos']
SimArray([[ 1548.51403101, -1847.2525312 , -4485.71463308],
         [ 1477.1124212 , -1769.52421398, -4135.78377699],
         [ 1620.68592366, -1824.15000686, -4545.69387564],
         ...,
         [ 2065.9264273 , -2185.92982874, -2187.79225915],
         [ 2047.60759667, -2129.6537339 , -2291.6758134 ],
         [ 2047.2214441 , -2133.87693163, -2291.59406997]], 'kpc')


*Specifying rules for ufunc's*
------------------------------

In general, it's not possible to infer what the output units from a given
ufunc should be. While numpy built-in ufuncs should be handled OK, other
ufuncs will need their output units defined (otherwise a numpy.ndarray
will be returned instead of our custom type.)

To do this, decorate a function with SimArray.ufunc_rule(ufunc). The function
you define should take the same number of parameters as the ufunc. These will
be the input parameters of the ufunc. You should return the correct units for
the output, or raise units.UnitsException (in the latter case, the return
array will be made into a numpy.ndarray.)

For example, here is the code for the correct addition/subtraction
handler:

.. code-block:: python

    @SimArray.ufunc_rule(np.add)
    @SimArray.ufunc_rule(np.subtract)
    def _consistent_units(a,b) :

        # This will be called whenever the standard numpy ufuncs np.add
        # or np.subtract are called with parameters a,b.

        # You should always be ready for the inputs to have no units.

        a_units = a.units if hasattr(a, 'units') else None
        b_units = b.units if hasattr(b, 'units') else None

        # Now do the logic. If we're adding incompatible units,
        # we want just to get a plain numpy array out. If we only
        # know the units of one of the arrays, we assume the output
        # is in those units.

        if a_units is not None and b_units is not None :
            if a_units==b_units :
                return a_units
            else :
                raise units.UnitsException("Incompatible units")

        elif a_units is not None :
            return a_units
        else :
            return b_units

"""

import numpy as np
import weakref
import os
from . import units as units
from functools import reduce
_units = units
from .backcompat import property
from .backcompat import fractions
import atexit
import functools


[docs]class SimArray(np.ndarray):

    """
    Defines a shallow wrapper around numpy.ndarray for extra
    functionality like unit-tracking.
    """

    _ufunc_registry = {}

    @property
    def ancestor(self):
        """Provides the basemost SimArray that an IndexedSimArray is based on."""
        return self

    @property
    def derived(self):
        if self.sim and self.name:
            return self.sim.is_derived_array(self.name, getattr(self, 'family', None))
        else:
            return False

    @derived.setter
    def derived(self, value):
        if value:
            raise ValueError("Can only unlink an array. Delete an array to force a rederivation if this is the intended effect.")
        if self.derived:
            self.sim.unlink_array(self.name)

    def __reduce__(self):
        T = np.ndarray.__reduce__(self)
        T = (
            T[0], T[1], (self.units, T[2][0], T[2][1], T[2][2], T[2][3], T[2][4]))
        return T

    def __setstate__(self, args):
        self._units = args[0]
        self.sim = None
        self._name = None
        np.ndarray.__setstate__(self, args[1:])

    def __new__(subtype, data, units=None, sim=None, **kwargs):
        new = np.array(data, **kwargs).view(subtype)
        if hasattr(data, 'units') and hasattr(data, 'sim') and units is None and sim is None:
            units = data.units
            sim = data.sim

        if hasattr(data, 'family'):
            new.family = data.family

        if isinstance(units, str):
            units = _units.Unit(units)

        new._units = units

        # Always associate a SimArray with the top-level snapshot.
        # Otherwise we run into problems with how the reference should
        # behave: we don't want to lose the link to the simulation by
        # storing a weakref to a SubSnap that might be deconstructed,
        # but we also wouldn't want to store a strong ref to a SubSnap
        # since that would keep the entire simulation alive even if
        # deleted.
        #
        # So, set the sim attribute to the top-level snapshot and use
        # the normal weak-reference system.

        if sim is not None:
            new.sim = sim.ancestor
            # will generate a weakref automatically

        new._name = None

        return new

    def __array_finalize__(self, obj):
        if obj is None:
            return
        elif obj is not self and hasattr(obj, 'units'):
            self._units = obj.units
            self._sim = obj._sim
            self._name = obj._name
            if hasattr(obj, 'family'):
                self.family = obj.family
        else:
            self._units = None
            self._sim = lambda: None
            self._name = None

    def __array_wrap__(self, array, context=None):
        if context is None:
            n_array = array.view(SimArray)
            return n_array

        try:
            ufunc = context[0]
            output_units = SimArray._ufunc_registry[ufunc](*context[1])
            n_array = array.view(SimArray)
            n_array.units = output_units
            n_array.sim = self.sim
            n_array._name = self._name
            return n_array
        except (KeyError, units.UnitsException):
            return array

    @staticmethod
    def ufunc_rule(for_ufunc):
        def x(fn):
            SimArray._ufunc_registry[for_ufunc] = fn
            return fn

        return x

    @property
    def units(self):
        if hasattr(self.base, 'units'):
            return self.base.units
        else:
            if self._units is None:
                return _units.no_unit
            else:
                return self._units

    @units.setter
    def units(self, u):
        if not isinstance(u, units.UnitBase) and u is not None:
            u = units.Unit(u)

        if hasattr(self.base, 'units'):
            self.base.units = u
        else:
            if hasattr(u, "_no_unit"):
                self._units = None
            else:
                self._units = u

    @property
    def name(self):
        if hasattr(self.base, 'name'):
            return self.base.name
        return self._name

    @property
    def sim(self):
        if hasattr(self.base, 'sim'):
            if self.family and self.base.sim:
                return self.base.sim[self.family]
            else:
                return self.base.sim
        return self._sim()

    @sim.setter
    def sim(self, s):
        if hasattr(self.base, 'sim'):
            self.base.sim = s
        else:
            if s is not None:
                self._sim = weakref.ref(s)
            else:
                self._sim = lambda: None

    @property
    def family(self):
        try:
            return self._family
        except AttributeError:
            return None

    @family.setter
    def family(self, fam):
        self._family = fam

    def __mul__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            x = self.copy()
            x.units = x.units * rhs
            return x
        else:
            return np.ndarray.__mul__(self, rhs)

    def __div__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            x = self.copy()
            x.units = x.units / rhs
            return x
        else:
            return np.ndarray.__div__(self, rhs)

    def __truediv__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            x = self.copy()
            x.units = x.units / rhs
            return x
        else:
            return np.ndarray.__truediv__(self, rhs)

    def __imul__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            self.units *= rhs
        else:
            np.ndarray.__imul__(self, rhs)
            try:
                self.units *= rhs.units
            except AttributeError:
                pass
        return self

    def __idiv__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            self.units /= rhs
        else:
            np.ndarray.__idiv__(self, rhs)
            try:
                self.units /= rhs.units
            except AttributeError:
                pass
        return self

    def __itruediv__(self, rhs):
        if isinstance(rhs, _units.UnitBase):
            self.units /= rhs
        else:
            np.ndarray.__itruediv__(self, rhs)
            try:
                self.units /= rhs.units
            except AttributeError:
                pass
        return self

    def conversion_context(self):
        if self.sim is not None:
            return self.sim.conversion_context()
        else:
            return {}

    def _generic_add(self, x, add_op=np.add):
        if hasattr(x, 'units') and not hasattr(self.units, "_no_unit") and not hasattr(x.units, "_no_unit"):
            # Check unit compatibility

            try:
                context = x.conversion_context()
            except AttributeError:
                context = {}

            # Our own contextual information overrides x's
            context.update(self.conversion_context())

            try:
                cr = x.units.ratio(self.units,
                                   **context)
            except units.UnitsException:
                raise ValueError("Incompatible physical dimensions %r and %r, context %r" % (
                    str(self.units), str(x.units), str(self.conversion_context())))

            if cr == 1.0:
                r = add_op(self, x)

            else:
                b = np.multiply(x, cr)
                if hasattr(b, 'units'):
                    b.units = None

                if not np.can_cast(b.dtype,self.dtype):
                    b = np.asarray(b, dtype=x.dtype)


                r = add_op(self, b)

            return r

        elif units.is_unit(x):
            x = x.in_units(self.units, **self.conversion_context())
            r = add_op(self, x)
            return r
        else:
            r = add_op(self, x)
            return r

    def __add__(self, x):
        if isinstance(x, _units.UnitBase):
            return x + self
        else:
            return self._generic_add(x)

    def __sub__(self, x):
        if isinstance(x, _units.UnitBase):
            return (-x + self).in_units(self.units)
        else:
            return self._generic_add(x, np.subtract)

    def __iadd__(self, x):
        self._generic_add(x, np.ndarray.__iadd__)
        return self

    def __isub__(self, x):
        self._generic_add(x, np.ndarray.__isub__)
        return self

    def __pow__(self, x):
        numerical_x = x

        if isinstance(x, tuple):
            x = fractions.Fraction(x[0], x[1])
            numerical_x = float(x)
        elif isinstance(x, fractions.Fraction):
            numerical_x = float(x)

        # The following magic circumvents our normal unit-assignment
        # code which couldn't cope with the numerical version of x
        # in the case of fractions. All this is necessary to make the
        # magic tuple->fraction conversions work seamlessly.
        r = np.asarray(np.power(self.view(np.ndarray), numerical_x)).view(SimArray)

        # Recent numpy versions can take 1-element arrays and return
        # scalars, in which case we now have a floating point number :(
        if type(r) is not SimArray:
            return r

        if self.units is not None and (
                isinstance(x, fractions.Fraction) or
                isinstance(x, int)):
            r.sim = self.sim
            r.units = self.units ** x
        else:
            r.units = None
            r.sim = self.sim

        return r

    def __repr__(self):
        x = np.ndarray.__repr__(self)
        if not hasattr(self.units, "_no_unit"):
            return x[:-1] + ", '" + str(self.units) + "')"
        else:
            return x

    def __setitem__(self, item, to):
        if hasattr(to, "in_units") and not hasattr(self.units, "_no_unit") and not hasattr(to.units, "_no_unit"):
            np.ndarray.__setitem__(self, item, to.in_units(self.units))
        else:
            np.ndarray.__setitem__(self, item, to)

    def __setslice__(self, a, b, to):
        self.__setitem__(slice(a, b), to)

    def abs(self, *args, **kwargs):
        x = np.abs(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def cumsum(self, axis=None, dtype=None, out=None):
        x = np.ndarray.cumsum(self, axis, dtype, out)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def prod(self, axis=None, dtype=None, out=None):
        x = np.ndarray.prod(self, axis, dtype, out)
        if hasattr(x, 'units') and axis is not None and self.units is not None:
            x.units = self.units ** self.shape[axis]
        if hasattr(x, 'units') and axis is None and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def sum(self, *args, **kwargs):
        x = np.ndarray.sum(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def mean(self, *args, **kwargs):
        x = np.ndarray.mean(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

    def mean_by_mass(self, *args, **kwargs):
        return self.sim.mean_by_mass(self.name)

[docs]    def max(self, *args, **kwargs):
        x = np.ndarray.max(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def min(self, *args, **kwargs):
        x = np.ndarray.min(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def ptp(self, *args, **kwargs):
        x = np.ndarray.ptp(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def std(self, *args, **kwargs):
        x = np.ndarray.std(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def var(self, *args, **kwargs):
        x = np.ndarray.var(self, *args, **kwargs)
        if hasattr(x, 'units') and self.units is not None:
            x.units = self.units ** 2
        if hasattr(x, 'sim') and self.sim is not None:
            x.sim = self.sim
        return x

[docs]    def set_units_like(self, new_unit):
        """Set the units for this array by performing dimensional analysis
        on the supplied unit and referring to the units of the original
        file"""

        if self.sim is not None:
            self.units = self.sim.infer_original_units(new_unit)
        else:
            raise RuntimeError("No link to SimSnap")

[docs]    def set_default_units(self, quiet=False):
        """Set the units for this array by performing dimensional analysis
        on the default dimensions for the array."""

        if self.sim is not None:
            try:
                self.units = self.sim._default_units_for(self.name)
            except (KeyError, units.UnitsException):
                if not quiet:
                    raise
        else:
            raise RuntimeError("No link to SimSnap")

[docs]    def in_original_units(self):
        """Retun a copy of this array expressed in the units
        specified in the parameter file."""

        return self.in_units(self.sim.infer_original_units(self.units))

[docs]    def in_units(self, new_unit, **context_overrides):
        """Return a copy of this array expressed relative to an alternative
        unit."""

        context = self.conversion_context()
        context.update(context_overrides)

        if self.units is not None:
            r = self * self.units.ratio(new_unit,
                                        **context)
            r.units = new_unit
            return r
        else:
            raise ValueError("Units of array unknown")

[docs]    def convert_units(self, new_unit):
        """Convert units of this array in-place. Note that if
        this is a sub-view, the entire base array will be converted."""

        if self.base is not None and hasattr(self.base, 'units'):
            self.base.convert_units(new_unit)
        else:
            self *= self.units.ratio(new_unit,
                                     **(self.conversion_context()))
            self.units = new_unit

[docs]    def write(self, **kwargs):
        """
        Write this array to disk according to the standard method
        associated with its base file. This is equivalent to calling

        >>> sim.gas.write_array('array')

        in the case of writing out the array 'array' for the gas
        particle family.  See the description of
        :func:`pynbody.snapshot.SimSnap.write_array` for options.
        """

        if self.sim and self.name:
            self.sim.write_array(self.name, fam=self.family, **kwargs)
        else:
            raise RuntimeError("No link to SimSnap")

    def __del__(self):
        """Clean up disk if this was made from a named
        shared array"""

        if getattr(self, '_shared_del', False):
            _shared_array_unlink(self)

# Set up the correct comparison functions


def _unit_aware_comparison(ar, other, comparison_op=None):
    # guaranteed to be called with ar a SimArray instance
    if units.is_unit_like(other):
        if units.has_units(ar):
            # either other is a unit, or an array with a unit If
            # it's an array with a unit that matches our own, we
            # want to fall straight through to the comparison
            # operation. If it's an array with a unit that doesn't
            # match ours, OR it's a plain unit, we want to
            # convert first.
            if units.is_unit(other) or other.units != ar.units:
                other = other.in_units(ar.units)
        else:
            raise units.UnitsException("One side of a comparison has units and the other side does not")

    return comparison_op(ar, other)

for f in np.ndarray.__lt__, np.ndarray.__le__, np.ndarray.__eq__, \
        np.ndarray.__ne__, np.ndarray.__gt__, np.ndarray.__ge__:

    # N.B. cannot use functools.partial because it doesn't implement the descriptor
    # protocol
[docs]    @functools.wraps(f, assigned=("__name__", "__doc__"))
    def wrapper_function(self, other, comparison_op=f):
        return _unit_aware_comparison(self, other, comparison_op=comparison_op)

    setattr(SimArray, f.__name__, wrapper_function)


# Now add dirty bit setters to all the operations which are known
# to modify the numpy array

def _dirty_fn(w):
    def q(a, *y, **kw):
        if a.sim is not None and a.name is not None:
            a.sim._dirty(a.name)

        if kw != {}:
            return w(a, *y, **kw)
        else:
            return w(a, *y)

    q.__name__ = w.__name__
    return q

_dirty_fns = ['__setitem__', '__setslice__',
              '__irshift__',
              '__imod__',
              '__iand__',
              '__ifloordiv__',
              '__ilshift__',
              '__imul__',
              '__ior__',
              '__ixor__',
              '__isub__',
              '__invert__',
              '__iadd__',
              '__itruediv__',
              '__idiv__',
              '__ipow__']

for x in _dirty_fns:
    setattr(SimArray, x, _dirty_fn(getattr(SimArray, x)))

_u = SimArray.ufunc_rule


def _get_units_or_none(*a):
    if len(a) == 1:
        if hasattr(a[0], "units"):
            return a[0].units
        else:
            return None
    else:
        r = []
        for x in a:
            if hasattr(x, "units"):
                r.append(x.units)
            else:
                r.append(None)

        return r

#
# Now we have the rules for unit outputs after numpy built-in ufuncs
#
# Note if these raise UnitsException, a standard numpy array is returned
# from the ufunc to indicate the units can't be calculated. That means
# ufuncs can do 'non-physical' things, but then return ndarrays instead
# of SimArrays.


@_u(np.sqrt)
def _sqrt_units(a):
    if a.units is not None:
        return a.units ** (1, 2)
    else:
        return None


@_u(np.multiply)
def _mul_units(a, b):
    a_units, b_units = _get_units_or_none(a, b)
    if a_units is not None and b_units is not None:
        return a_units * b_units
    elif a_units is not None:
        return a_units
    else:
        return b_units


@_u(np.divide)
@_u(np.true_divide)
def _div_units(a, b):
    a_units, b_units = _get_units_or_none(a, b)
    if a_units is not None and b_units is not None:
        return a_units / b_units
    elif a_units is not None:
        return a_units
    else:
        return 1 / b_units


@_u(np.add)
@_u(np.subtract)
def _consistent_units(a, b):
    a_units, b_units = _get_units_or_none(a, b)
    if a_units is not None and b_units is not None:
        if a_units == b_units:
            return a_units
        else:
            raise units.UnitsException("Incompatible units")

    elif a_units is not None:
        return a_units
    else:
        return b_units


@_u(np.power)
def _pow_units(a, b):
    a_units = _get_units_or_none(a)
    if a_units is not None:
        if not isinstance(b, int) and not isinstance(b, units.Fraction):
            raise units.UnitsException("Can't track units")
        return a_units ** b
    else:
        return None


@_u(np.arctan)
@_u(np.arctan2)
@_u(np.arcsin)
@_u(np.arccos)
@_u(np.arcsinh)
@_u(np.arccosh)
@_u(np.arctanh)
@_u(np.sin)
@_u(np.tan)
@_u(np.cos)
@_u(np.sinh)
@_u(np.tanh)
@_u(np.cosh)
def _trig_units(*a):
    return 1


@_u(np.greater)
@_u(np.greater_equal)
@_u(np.less)
@_u(np.less_equal)
@_u(np.equal)
@_u(np.not_equal)
def _comparison_units(*a):
    return None


class IndexedSimArray(object):

    @property
    def derived(self):
        return self.base.derived

    @property
    def ancestor(self):
        return self.base.ancestor

    def __init__(self, array, ptr):
        self.base = array
        self._ptr = ptr

    def __array__(self, dtype=None):
        return np.asanyarray(self.base[self._ptr], dtype=dtype)

    def _reexpress_index(self, index):
        if isinstance(index, tuple) or (isinstance(index, list) and len(index) > 0 and hasattr(index[0], '__len__')):
            return [self._ptr[index[0]]] + list(index[1:])
        else:
            return self._ptr[index]

    def __getitem__(self, item):
        return self.base[self._reexpress_index(item)]

    def __setitem__(self, item, to):
        self.base[self._reexpress_index(item)] = to

    def __getslice__(self, a, b):
        return self.__getitem__(slice(a, b))

    def __setslice__(self, a, b, to):
        self.__setitem__(slice(a, b), to)

    def __repr__(self):
        return self.__array__().__repr__()  # Could be optimized

    def __str__(self):
        return self.__array__().__str__()  # Could be optimized

    def __len__(self):
        return len(self._ptr)

    def __reduce__(self):
        return SimArray(self).__reduce__()

    @property
    def shape(self):
        x = [len(self._ptr)]
        x += self.base.shape[1:]
        return tuple(x)

    @property
    def ndim(self):
        return self.base.ndim

    @property
    def units(self):
        return self.base.units

    @units.setter
    def units(self, u):
        self.base.units = u

    @property
    def sim(self):
        if self.base.sim is not None:
            return self.base.sim[self._ptr]
        else:
            return None

    @sim.setter
    def sim(self, s):
        self.base.sim = s

    @property
    def dtype(self):
        return self.base.dtype

    def conversion_context(self):
        return self.base.conversion_context()

    def set_units_like(self, new_unit):
        self.base.set_units_like(new_unit)

    def in_units(self, new_unit, **context_overrides):
        return IndexedSimArray(self.base.in_units(new_unit, **context_overrides), self._ptr)

    def convert_units(self, new_unit):
        self.base.convert_units(new_unit)

    def write(self, **kwargs):
        self.base.write(**kwargs)

    def prod(self):
        return np.array(self).prod()


# The IndexedSimArray class is now supplemented by wrapping all the
# standard numpy methods with a generated function which extracts an
# array realization of the subview before calling the underlying
# method.

def _wrap_fn(w):
    def q(s, *y,  **kw):
        # AP: I Don't understand why the following condition should be necessary,
        # but it seems required on McMaster setup (Py 2.6.5, NP 1.4.1)
        if kw != {}:
            return w(SimArray(s), *y, **kw)
        else:
            return w(SimArray(s), *y)

    q.__name__ = w.__name__
    return q

# functions we definitely want to wrap, even though there's an existing
# implementation
_override = "__eq__", "__ne__", "__gt__", "__ge__", "__lt__", "__le__"

for x in set(np.ndarray.__dict__).union(SimArray.__dict__):
    w = getattr(SimArray, x)
    if 'array' not in x and ((not hasattr(IndexedSimArray, x)) or x in _override) and hasattr(w, '__call__'):
        setattr(IndexedSimArray, x, _wrap_fn(w))


############################################################
# SUPPORT FOR SHARING ARRAYS BETWEEN PROCESSES
############################################################

try:
    import ctypes
    import multiprocessing
    import multiprocessing.sharedctypes
    import tempfile
    import functools
    import os
    import time
    import random
    import mmap
    import posix_ipc
    _all_shared_arrays = []
except ImportError:
    posix_ipc = None


def _array_factory(dims, dtype, zeros, shared):
    """Create an array of dimensions *dims* with the given numpy *dtype*.
    If *zeros* is True, the returned array is guaranteed zeroed. If *shared*
    is True, the returned array uses shared memory so can be efficiently
    shared across processes."""
    global _all_shared_arrays

    if not hasattr(dims, '__len__'):
        dims = (dims,)

    if shared and posix_ipc:
        random.seed(os.getpid() * time.time())
        fname = "pynbody-" + \
            ("".join([random.choice('abcdefghijklmnopqrstuvwxyz')
                      for i in range(10)]))
        _all_shared_arrays.append(fname)
        # memmaps of zero length seem not to be permitted, so have to
        # make zero length arrays a special case
        zero_size = False
        if dims[0] == 0:
            zero_size = True
            dims = (1,) + dims[1:]
        if hasattr(dims, '__len__'):
            size = reduce(np.multiply, dims)
        else:
            size = dims

        mem = posix_ipc.SharedMemory(fname, posix_ipc.O_CREX, size=int(np.dtype(dtype).itemsize*size))
        # write zeros into the file pointer before memmaping, to get a graceful exception if the
        # promised memory isn't available (otherwise will trigger a bus error)
        try:
            zeros=(b"\0")*1024*1024
            remaining = int(np.dtype(dtype).itemsize*size)
            while remaining > 0 :
                os.write(mem.fd, zeros[:remaining])
                remaining-=len(zeros)

        except OSError as exc :
            if not ((exc.errno == 45 or exc.errno == 6) and os.uname()[0] == "Darwin"):
                _shared_array_unlink(fname)
                raise MemoryError("Unable to create shared memory region")

        # fd, fname = tempfile.mkstemp()
        # ret_ar = np.memmap(os.fdopen(mem.fd), dtype=dtype, shape=dims).view(SimArray)
        mapfile = mmap.mmap(mem.fd, mem.size)
        ret_ar = np.frombuffer(mapfile, dtype=dtype, count=size).reshape(
            dims).view(SimArray)
        ret_ar._shared_fname = fname
        ret_ar._shared_del = True
        if zero_size:
            ret_ar = ret_ar[1:]
        mem.close_fd()

    else:
        if zeros:
            ret_ar = np.zeros(dims, dtype=dtype).view(SimArray)
        else:
            ret_ar = np.empty(dims, dtype=dtype).view(SimArray)
    return ret_ar


if posix_ipc:

    class _deconstructed_shared_array(tuple):
        pass

[docs]    class RemoteKeyboardInterrupt(Exception):
        pass

    def _shared_array_deconstruct(ar, transfer_ownership=False):
        """Deconstruct an array backed onto shared memory into something that can be
        passed between processes efficiently. If *transfer_ownership* is True,
        also transfers responsibility for deleting the underlying memory (if this
        process has it) to the reconstructing process."""

        assert isinstance(ar, SimArray)
        ar_base = ar
        while isinstance(ar_base.base, SimArray):
            ar_base = ar_base.base

        assert hasattr(ar_base,'_shared_fname'), "Cannot prepare an array for shared use unless it was created in shared memory"

        ownership_out = transfer_ownership and ar_base._shared_del
        if transfer_ownership:
            ar_base._shared_del = False

        offset = ar.__array_interface__['data'][0] - \
                  ar_base.__array_interface__['data'][0]

        return _deconstructed_shared_array((ar.dtype, ar.shape, ar_base._shared_fname, ownership_out,
                                            offset, ar.strides))

    def _shared_array_reconstruct(X):
        dtype, dims, fname, ownership, offset, strides = X
        mem = posix_ipc.SharedMemory(fname)
        mapfile = mmap.mmap(mem.fd, mem.size)
        size = reduce(np.multiply, dims)
        # new_ar =  np.memmap(mem.fd, dtype=dtype, shape=dims, mode='r+').view(SimArray)
        new_ar = np.frombuffer(
            mapfile, dtype=dtype, count=size, offset=offset).reshape(dims).view(SimArray)
        new_ar.strides = strides
        mem.close_fd()
        new_ar._shared_fname = fname
        new_ar._shared_del = ownership
        return new_ar

    def _shared_array_unlink(X) :
        if isinstance(X,str):
            name = X
        else :
            name = X._shared_fname

        try:
            posix_ipc.unlink_shared_memory(name)
        except (posix_ipc.ExistentialError, OSError) :
            pass

    def _recursive_shared_array_deconstruct(input, transfer_ownership=False) :
        """Works through items in input, deconstructing any shared memory arrays
        into transferrable references"""
        output = []
        for item in input:
            if isinstance(item, SimArray):
                item = _shared_array_deconstruct(item, transfer_ownership)
            elif isinstance(item, list) or isinstance(item, tuple):
                item = _recursive_shared_array_deconstruct(item, transfer_ownership)
            output.append(item)
        return output

    def _recursive_shared_array_reconstruct(input):
        """Works through items in input, reconstructing any shared memory arrays
        from transferrable references"""
        output = []
        for item in input:
            if isinstance(item, _deconstructed_shared_array):
                item = _shared_array_reconstruct(item)
            elif isinstance(item, list) or isinstance(item, tuple):
                item = _recursive_shared_array_reconstruct(item)
            output.append(item)
        return output

[docs]    def shared_array_remote(fn):
        """A decorator for functions returning a new function that is
        suitable for use remotely. Inputs to and outputs from the function
        can be transferred efficiently if they are backed onto shared
        memory. Ownership of any shared memory returned by the function
        is transferred."""

        @functools.wraps(fn)
        def new_fn(args, **kwargs):
            try:
                import signal
                assert hasattr(
                    args, '__len__'), "Function must be called from remote_map to use shared arrays"
                assert args[
                    0] == '__pynbody_remote_array__', "Function must be called from remote_map to use shared arrays"
                args = _recursive_shared_array_reconstruct(args)
                signal.signal(signal.SIGINT, signal.SIG_DFL)
                output = fn(*args[1:], **kwargs)
                signal.signal(signal.SIGINT, signal.SIG_IGN)
                return _recursive_shared_array_deconstruct([output], True)[0]
            except KeyboardInterrupt:
                signal.signal(signal.SIGINT, signal.SIG_IGN)
                raise RemoteKeyboardInterrupt()
        new_fn.__pynbody_remote_array__ = True

        return new_fn

[docs]    def remote_map(pool, fn, *iterables):
        """A replacement for python's in-built map function, sending out tasks
        to the pool and performing the magic required to transport shared memory arrays
        correctly. The function *fn* must be wrapped with the *shared_array_remote*
        decorator to interface correctly with this magic."""

        assert getattr(fn, '__pynbody_remote_array__',
                       False), "Function must be wrapped with shared_array_remote to use shared arrays"
        iterables_deconstructed = _recursive_shared_array_deconstruct(
            iterables)
        try:
            results = pool.map(fn, list(zip(
                ['__pynbody_remote_array__'] * len(iterables_deconstructed[0]), *iterables_deconstructed)))
        except RemoteKeyboardInterrupt:
            raise KeyboardInterrupt
        return _recursive_shared_array_reconstruct(results)

[docs]    @atexit.register
    def exit_cleanup():
        """Clean up any shared memory that has not yet been freed. In
        theory this should not be required, but it is here as a safety
        net."""

        global _all_shared_arrays

        for fname in _all_shared_arrays:
            try:
                posix_ipc.unlink_shared_memory(fname)
            except (posix_ipc.ExistentialError, OSError):
                pass
Source code for pynbody.array

Navigation

Related Topics