Skip to content

Commit 4f1c6e0

Browse files
committed
Rewrite hash calculation code in rust
1 parent 7a4eb8a commit 4f1c6e0

File tree

2 files changed

+205
-142
lines changed

2 files changed

+205
-142
lines changed

claripy/ast/base.py

Lines changed: 12 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import builtins
12
import itertools
23
import logging
34
import math
@@ -17,17 +18,6 @@
1718
if TYPE_CHECKING:
1819
from claripy.annotation import Annotation
1920

20-
try:
21-
import _pickle as pickle
22-
except ImportError:
23-
import pickle
24-
25-
try:
26-
# Python's build-in MD5 is about 2x faster than hashlib.md5 on short bytestrings
27-
import _md5 as md5
28-
except ImportError:
29-
import hashlib as md5
30-
3121
l = logging.getLogger("claripy.ast")
3222

3323
WORKER = bool(os.environ.get("WORKER", False))
@@ -69,7 +59,7 @@ def _make_name(name: str, size: int, explicit_name: bool = False, prefix: str =
6959
return name
7060

7161

72-
def _d(h, cls, state):
62+
def _unpickle(h, cls, state):
7363
"""
7464
This function is the deserializer for ASTs.
7565
It exists to work around the fact that pickle will (normally) call __new__() with no arguments during
@@ -132,9 +122,6 @@ def __new__(cls, op, args, add_variables=None, hash=None, **kwargs): # pylint:d
132122
:param annotations: A frozenset of annotations applied onto this AST.
133123
"""
134124

135-
# if any(isinstance(a, BackendObject) for a in args):
136-
# raise Exception('asdf')
137-
138125
a_args = args if type(args) is tuple else tuple(args)
139126

140127
# initialize the following properties: symbolic, variables and errored
@@ -252,17 +239,17 @@ def __new__(cls, op, args, add_variables=None, hash=None, **kwargs): # pylint:d
252239
elif op in {"BVS", "BVV", "BoolS", "BoolV", "FPS", "FPV"} and not annotations:
253240
if op == "FPV" and a_args[0] == 0.0 and math.copysign(1, a_args[0]) < 0:
254241
# Python does not distinguish between +0.0 and -0.0 so we add sign to tuple to distinguish
255-
h = (op, kwargs.get("length", None), ("-", *a_args))
242+
h = builtins.hash((op, kwargs.get("length", None), ("-", *a_args)))
256243
elif op == "FPV" and math.isnan(a_args[0]):
257244
# cannot compare nans
258-
h = (op, kwargs.get("length", None), ("nan",) + a_args[1:])
245+
h = builtins.hash((op, kwargs.get("length", None), ("nan",) + a_args[1:]))
259246
else:
260-
h = (op, kwargs.get("length", None), a_args)
247+
h = builtins.hash((op, kwargs.get("length", None), a_args))
261248

262249
cache = cls._leaf_cache
263250
else:
264251
h = Base._calc_hash(op, a_args, kwargs) if hash is None else hash
265-
self = cache.get(h, None)
252+
self = cache.get(h & 0x7FFF_FFFF_FFFF_FFFF, None)
266253
if self is None:
267254
self = super().__new__(
268255
cls,
@@ -282,8 +269,8 @@ def __new__(cls, op, args, add_variables=None, hash=None, **kwargs): # pylint:d
282269
relocatable_annotations=relocatable_annotations,
283270
**kwargs,
284271
)
285-
self._hash = h
286-
cache[h] = self
272+
self._hash = h & 0x7FFF_FFFF_FFFF_FFFF
273+
cache[self._hash] = self
287274
# else:
288275
# if self.args != a_args or self.op != op or self.variables != kwargs['variables']:
289276
# raise Exception("CRAP -- hash collision")
@@ -296,7 +283,7 @@ def __init_with_annotations__(
296283
):
297284
cache = cls._hash_cache
298285
h = Base._calc_hash(op, a_args, kwargs)
299-
self = cache.get(h, None)
286+
self = cache.get(h & 0x7FFF_FFFF_FFFF_FFFF, None)
300287
if self is not None:
301288
return self
302289

@@ -318,15 +305,15 @@ def __init_with_annotations__(
318305
**kwargs,
319306
)
320307

321-
self._hash = h
322-
cache[h] = self
308+
self._hash = h & 0x7FFF_FFFF_FFFF_FFFF
309+
cache[self._hash] = self
323310

324311
return self
325312

326313
def __reduce__(self):
327314
# HASHCONS: these attributes key the cache
328315
# BEFORE CHANGING THIS, SEE ALL OTHER INSTANCES OF "HASHCONS" IN THIS FILE
329-
return _d, (
316+
return _unpickle, (
330317
self._hash,
331318
self.__class__,
332319
(self.op, self.args, self.length, self.variables, self.symbolic, self.annotations),
@@ -335,113 +322,6 @@ def __reduce__(self):
335322
def __init__(self, *args, **kwargs):
336323
pass
337324

338-
@staticmethod
339-
def _calc_hash(op, args, keywords):
340-
"""
341-
Calculates the hash of an AST, given the operation, args, and kwargs.
342-
343-
:param op: The operation.
344-
:param args: The arguments to the operation.
345-
:param keywords: A dict including the 'symbolic', 'variables', and 'length' items.
346-
:returns: a hash.
347-
348-
We do it using md5 to avoid hash collisions.
349-
(hash(-1) == hash(-2), for example)
350-
"""
351-
args_tup = tuple(a if type(a) in (int, float) else getattr(a, "_hash", hash(a)) for a in args)
352-
# HASHCONS: these attributes key the cache
353-
# BEFORE CHANGING THIS, SEE ALL OTHER INSTANCES OF "HASHCONS" IN THIS FILE
354-
355-
to_hash = Base._ast_serialize(op, args_tup, keywords)
356-
if to_hash is None:
357-
# fall back to pickle.dumps
358-
to_hash = (
359-
op,
360-
args_tup,
361-
str(keywords.get("length", None)),
362-
hash(keywords["variables"]),
363-
keywords["symbolic"],
364-
hash(keywords.get("annotations", None)),
365-
)
366-
to_hash = pickle.dumps(to_hash, -1)
367-
368-
# Why do we use md5 when it's broken? Because speed is more important
369-
# than cryptographic integrity here. Then again, look at all those
370-
# allocations we're doing here... fast python is painful.
371-
hd = md5.md5(to_hash).digest()
372-
return md5_unpacker.unpack(hd)[0] # 64 bits
373-
374-
@staticmethod
375-
def _arg_serialize(arg) -> bytes | None:
376-
if arg is None:
377-
return b"\x0f"
378-
elif arg is True:
379-
return b"\x1f"
380-
elif arg is False:
381-
return b"\x2e"
382-
elif isinstance(arg, int):
383-
if arg < 0:
384-
if arg >= -0x7FFF:
385-
return b"-" + struct.pack("<h", arg)
386-
elif arg >= -0x7FFF_FFFF:
387-
return b"-" + struct.pack("<i", arg)
388-
elif arg >= -0x7FFF_FFFF_FFFF_FFFF:
389-
return b"-" + struct.pack("<q", arg)
390-
return None
391-
else:
392-
if arg <= 0xFFFF:
393-
return struct.pack("<H", arg)
394-
elif arg <= 0xFFFF_FFFF:
395-
return struct.pack("<I", arg)
396-
elif arg <= 0xFFFF_FFFF_FFFF_FFFF:
397-
return struct.pack("<Q", arg)
398-
return None
399-
elif isinstance(arg, str):
400-
return arg.encode()
401-
elif isinstance(arg, float):
402-
return struct.pack("f", arg)
403-
elif isinstance(arg, tuple):
404-
arr = []
405-
for elem in arg:
406-
b = Base._arg_serialize(elem)
407-
if b is None:
408-
return None
409-
arr.append(b)
410-
return b"".join(arr)
411-
412-
return None
413-
414-
@staticmethod
415-
def _ast_serialize(op: str, args_tup, keywords) -> bytes | None:
416-
"""
417-
Serialize the AST and get a bytestring for hashing.
418-
419-
:param op: The operator.
420-
:param args_tup: A tuple of arguments.
421-
:param keywords: A dict of keywords.
422-
:return: The serialized bytestring.
423-
"""
424-
425-
serialized_args = Base._arg_serialize(args_tup)
426-
if serialized_args is None:
427-
return None
428-
429-
if "length" in keywords:
430-
length = Base._arg_serialize(keywords["length"])
431-
if length is None:
432-
return None
433-
else:
434-
length = b"none"
435-
436-
variables = struct.pack("<Q", hash(keywords["variables"]) & 0xFFFF_FFFF_FFFF_FFFF)
437-
symbolic = b"\x01" if keywords["symbolic"] else b"\x00"
438-
if "annotations" in keywords:
439-
annotations = struct.pack("<Q", hash(keywords["annotations"]) & 0xFFFF_FFFF_FFFF_FFFF)
440-
else:
441-
annotations = b"\xf9"
442-
443-
return op.encode() + serialized_args + length + variables + symbolic + annotations
444-
445325
# pylint:disable=attribute-defined-outside-init
446326
def __a_init__(
447327
self,
@@ -523,12 +403,6 @@ def _encoded_name(self):
523403
# Collapsing and simplification
524404
#
525405

526-
# def _models_for(self, backend):
527-
# for a in self.args:
528-
# backend.convert_expr(a)
529-
# else:
530-
# yield backend.convert(a)
531-
532406
def make_like(self: T, op: str, args: Iterable, **kwargs) -> T:
533407
# Try to simplify the expression again
534408
simplified = simplifications.simpleton.simplify(op, args) if kwargs.pop("simplify", False) is True else None

0 commit comments

Comments
 (0)