Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions pytools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@

.. autofunction:: resolve_name

Hashing
-------

.. autofunction:: unordered_hash

Type Variables Used
-------------------

Expand Down Expand Up @@ -2616,6 +2621,51 @@ def resolve_name(name):
# }}}


# {{{ unordered_hash

def unordered_hash(hash_instance, iterable, hash_constructor=None):
"""Using a hash algorithm given by the parameter-less constructor
*hash_constructor*, return a hash object whose internal state
depends on the entries of *iterable*, but not their order. If *hash*
is the instance returned by evaluating ``hash_constructor()``, then
the each entry *i* of the iterable must permit ``hash.upate(i)`` to
succeed. An example of *hash_constructor* is ``hashlib.sha256``
from :mod:`hashlib`. ``hash.digest_size`` must also be defined.
If *hash_constructor* is not provided, ``hash_instance.name`` is
used to deduce it.

:returns: the updated *hash_instance*.

.. warning::

The construction used in this function is likely not cryptographically
secure. Do not use this function in a security-relevant context.

.. versionadded:: 2021.2
"""

if hash_constructor is None:
from functools import partial
import hashlib
hash_constructor = partial(hashlib.new, hash_instance.name)

h_int = 0
for i in iterable:
h_i = hash_constructor()
h_i.update(i)
# Using sys.byteorder (for efficiency) here technically makes the
# hash system-dependent (which it should not be), however the
# effect of this is undone by the to_bytes conversion below, while
# left invariant by the intervening XOR operations (which do not
# mix adjacent bits).
h_int = h_int ^ int.from_bytes(h_i.digest(), sys.byteorder)

hash_instance.update(h_int.to_bytes(hash_instance.digest_size, sys.byteorder))
return hash_instance

# }}}


def _test():
import doctest
doctest.testmod()
Expand Down
61 changes: 51 additions & 10 deletions pytools/persistent_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,37 @@ def error_clean_up(self):
# {{{ key generation

class KeyBuilder:
"""A (stateless) object that computes hashes of objects fed to it. Subclassing
this class permits customizing the computation of hash keys.

.. automethod:: __call__
.. automethod:: rec
.. staticmethod:: new_hash()

Return a new hash instance following the protocol of the ones
from :mod:`hashlib`. This will permit switching to different
hash algorithms in the future. Subclasses are expected to use
this to create new hashes. Not doing so is deprecated and
may stop working as early as 2022.

.. versionadded:: 2021.2
"""

# this exists so that we can (conceivably) switch algorithms at some point
# down the road
new_hash = hashlib.sha256

def rec(self, key_hash, key):
"""
:arg key_hash: the hash object to be updated with the hash of *key*.
:arg key: the (immutable) Python object to be hashed.
:returns: the updated *key_hash*

.. versionchanged:: 2021.2

Now returns the updated *key_hash*.
"""

digest = None

try:
Expand All @@ -187,7 +217,7 @@ def rec(self, key_hash, key):
except AttributeError:
pass
else:
inner_key_hash = hashlib.sha256()
inner_key_hash = self.new_hash()
method(inner_key_hash, self)
digest = inner_key_hash.digest()

Expand All @@ -205,7 +235,7 @@ def rec(self, key_hash, key):
method = self.update_for_specific_dtype

if method is not None:
inner_key_hash = hashlib.sha256()
inner_key_hash = self.new_hash()
method(inner_key_hash, key)
digest = inner_key_hash.digest()

Expand All @@ -222,24 +252,32 @@ def rec(self, key_hash, key):
pass

key_hash.update(digest)
return key_hash

def __call__(self, key):
key_hash = hashlib.sha256()
key_hash = self.new_hash()
self.rec(key_hash, key)
return key_hash.hexdigest()

# {{{ updaters

@staticmethod
def update_for_int(key_hash, key):
key_hash.update(str(key).encode("utf8"))
sz = 8
while True:
try:
key_hash.update(key.to_bytes(sz, byteorder="little", signed=True))
return
except OverflowError:
sz *= 2

update_for_long = update_for_int
update_for_bool = update_for_int
@staticmethod
def update_for_bool(key_hash, key):
key_hash.update(str(key).encode("utf8"))

@staticmethod
def update_for_float(key_hash, key):
key_hash.update(repr(key).encode("utf8"))
key_hash.update(key.hex().encode("utf8"))

@staticmethod
def update_for_str(key_hash, key):
Expand All @@ -254,8 +292,11 @@ def update_for_tuple(self, key_hash, key):
self.rec(key_hash, obj_i)

def update_for_frozenset(self, key_hash, key):
for set_key in sorted(key):
self.rec(key_hash, set_key)
from pytools import unordered_hash

unordered_hash(
key_hash,
(self.rec(self.new_hash(), key_i).digest() for key_i in key))

@staticmethod
def update_for_NoneType(key_hash, key): # noqa
Expand Down Expand Up @@ -426,7 +467,7 @@ def __init__(self, identifier, key_builder=None, container_dir=None):
import appdirs
container_dir = join(
appdirs.user_cache_dir("pytools", "pytools"),
"pdict-v3-{}-py{}".format(
"pdict-v4-{}-py{}".format(
identifier,
".".join(str(i) for i in sys.version_info)))

Expand Down
2 changes: 1 addition & 1 deletion pytools/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION = (2021, 1, 2)
VERSION = (2021, 2)
VERSION_STATUS = ""
VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
3 changes: 2 additions & 1 deletion test/test_persistent_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def rand_str(n=20):
for i in range(n))

keys = [
(randrange(2000), rand_str(), None, SomeTag(rand_str()))
(randrange(2000)-1000, rand_str(), None, SomeTag(rand_str()),
frozenset({"abc", 123}))
for i in range(20)]
values = [randrange(2000) for i in range(20)]

Expand Down
45 changes: 45 additions & 0 deletions test/test_pytools.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
__copyright__ = "Copyright (C) 2009-2021 Andreas Kloeckner"

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""


import sys
import pytest

Expand Down Expand Up @@ -412,6 +435,28 @@ class BestInClassRibbon(FairRibbon, UniqueTag):
t4.without_tags(red_ribbon)


def test_unordered_hash():
import random
import hashlib

# FIXME: Use randbytes once >=3.9 is OK
lst = [bytes([random.randrange(256) for _ in range(20)])
for _ in range(200)]
lorig = lst[:]
random.shuffle(lst)

from pytools import unordered_hash
assert (unordered_hash(hashlib.sha256(), lorig).digest()
== unordered_hash(hashlib.sha256(), lst).digest())
assert (unordered_hash(hashlib.sha256(), lorig).digest()
== unordered_hash(hashlib.sha256(), lorig).digest())
assert (unordered_hash(hashlib.sha256(), lorig).digest()
!= unordered_hash(hashlib.sha256(), lorig[:-1]).digest())
lst[0] = b"aksdjfla;sdfjafd"
assert (unordered_hash(hashlib.sha256(), lorig).digest()
!= unordered_hash(hashlib.sha256(), lst).digest())


if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
Expand Down