Skip to content

Commit ce12905

Browse files
committed
Fix memoisation of lazy parser & bump version
Reported by @Rafiot: the lazy parser is not memoised, this has limited effect on the basic / pure Python parser as its initialisation is trivial, but it *significantly* impact the re2 and regex parsers as they need to process regexes into a filter tree. The memoization was mistakenly removed in #230: while refactoring initialisation I removed the setting of the `parser` global. - add a test to ensure the parser is correctly memoized, not re-instantiated every time - reinstate setting the global - add a mutex on `__getattr__`, it should only be used on first access and avoids two threads creating an expensive parser at the same time (which is a waste of CPU) Fixes #253
1 parent 5f5f338 commit ce12905

File tree

3 files changed

+39
-9
lines changed

3 files changed

+39
-9
lines changed

Diff for: pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
55
[project]
66
name = "ua-parser"
77
description = "Python port of Browserscope's user agent parser"
8-
version = "1.0.0"
8+
version = "1.0.1"
99
readme = "README.rst"
1010
requires-python = ">=3.9"
1111
dependencies = ["ua-parser-builtins"]

Diff for: src/ua_parser/__init__.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
]
4242

4343
import importlib.util
44-
from typing import Callable, Optional
44+
import threading
45+
from typing import Callable, Optional, cast
4546

4647
from .basic import Resolver as BasicResolver
4748
from .caching import CachingResolver, S3Fifo as Cache
@@ -78,7 +79,7 @@
7879
)
7980

8081

81-
VERSION = (1, 0, 0)
82+
VERSION = (1, 0, 1)
8283

8384

8485
class Parser:
@@ -135,15 +136,27 @@ def parse_device(self: Resolver, ua: str) -> Optional[Device]:
135136
initialisation, rather than pay for it at first call.
136137
"""
137138

139+
_lazy_globals_lock = threading.Lock()
140+
138141

139142
def __getattr__(name: str) -> Parser:
140143
global parser
141-
if name == "parser":
142-
if RegexResolver or Re2Resolver or IS_GRAAL:
143-
matchers = load_lazy_builtins()
144-
else:
145-
matchers = load_builtins()
146-
return Parser.from_matchers(matchers)
144+
with _lazy_globals_lock:
145+
if name == "parser":
146+
# if two threads access `ua_parser.parser` before it's
147+
# initialised, the second one will wait until the first
148+
# one's finished by which time the parser global should be
149+
# set and can be returned with no extra work
150+
if p := globals().get("parser"):
151+
return cast(Parser, p)
152+
153+
if RegexResolver or Re2Resolver or IS_GRAAL:
154+
matchers = load_lazy_builtins()
155+
else:
156+
matchers = load_builtins()
157+
parser = Parser.from_matchers(matchers)
158+
return parser
159+
147160
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
148161

149162

Diff for: tests/test_convenience_parser.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
1+
import ua_parser
12
from ua_parser import Domain, Parser, PartialResult, Result
23

34

5+
def test_parser_memoized() -> None:
6+
"""The global parser should be lazily instantiated but memoized"""
7+
# ensure there is no global parser
8+
vars(ua_parser).pop("parser", None)
9+
10+
p1 = ua_parser.parser
11+
p2 = ua_parser.parser
12+
13+
assert p1 is p2
14+
15+
# force the creation of a clean parser
16+
del ua_parser.parser
17+
p3 = ua_parser.parser
18+
assert p3 is not p1
19+
20+
421
def resolver(s: str, d: Domain) -> PartialResult:
522
return PartialResult(d, None, None, None, s)
623

0 commit comments

Comments
 (0)