I have an old piece of python code that parses a rigorously formatted text file (storing strings for localisation). Because the structure is hierarchical and some elements may or may not be present, the programme uses nested defaultdicts to represent it. Something like:
terms = defaultdict(lambda: defaultdict(str)) # dict<key, dict<lang, translation>>
Because these dictionaries are not typed (as in their members can be any type) and because they are nested, and because I need to add another level to that hierarchy, I decided to add typing to this programme:
from typing import Tuple, Dict, Set, List, NewType
Key = NewType('Key', str)
Lang = NewType('Lang', str)
Translation = NewType('Translation', str)
PLIndex = NewType('PLIndex', int)
However, I can't for the life of me figure out how to rewrite the terms = line above to make those nested defaultdicts typed.
What I ended up doing is just basically wrapping dict into my types, which doesn't look too good:
class Forms:
def __init__(self):
self.dct: Dict[PLIndex, Translation] = {}
def __getitem__(self, item: PLIndex) -> Translation:
return self.dct[item]
def __setitem__(self, key: PLIndex, value: Translation) -> None:
self.dct[key] = value
class Translations:
def __init__(self):
self.dct: Dict[Lang, Forms] = {}
def __getitem__(self, item: Lang) -> Forms:
if item not in self.dct:
self.dct[item] = Forms()
return self.dct[item]
def __setitem__(self, key: Lang, value: Forms) -> None:
self.dct[key] = value
def items(self):
return self.dct.items()
class Terms:
def __init__(self):
self.dct: Dict[Key, Translations] = {}
def __getitem__(self, item: Key) -> Translations:
if item not in self.dct:
self.dct[item] = Translations()
return self.dct[item]
def __setitem__(self, key: Key, value: Translations) -> None:
self.dct[key] = value
def __len__(self):
return len(self.dct)
def items(self):
return self.dct.items()
...
terms = Terms()
Is there a way I could declare my Forms, Translations and other types as just NewTypes for dict/defaultdict and be able to rewrite the terms = in a way that would enforce correct types for the nested dictionaries? Or could I extend dict/defaultdict (instead of wrapping them) and be able to enforce correct types? Or is there a better way altogether?
Wrapping dicts seems to me pointless code (since it does not add any new functionality but you still have to maintain it) and I would avoid it if possible.
Now, the following is working for me:
from collections import defaultdict
from typing import Tuple, Dict, DefaultDict, Set, List, NewType
Key = NewType('Key', str)
Lang = NewType('Lang', str)
Translation = NewType('Translation', str)
PLIndex = NewType('PLIndex', int)
FormsDict = DefaultDict[PLIndex, Translation]
TranslationsDict = DefaultDict[Lang, FormsDict]
TermsDict = DefaultDict[Key, TranslationsDict]
terms: TermsDict = defaultdict( # TermsDict
lambda: defaultdict( # TranslationsDict
lambda: defaultdict( # FormsDict
lambda: Translation("") # Default value "" (as Translation)
)
)
)
I have tested this with mypy --strict and it passes validation. Using this with defaultdict and still passing validation, it seems that you will need cast
from typing import cast
terms[Key("key1")].update(
cast(TranslationsDict, {
Lang("en_GB.UTF-8"): cast(FormsDict, {
PLIndex(100): Translation("key1")
})
})
)
print(terms)
Output:
defaultdict(<function <lambda> at 0x107d31cb0>, {
'key1': defaultdict(<function <lambda>.<locals>.<lambda> at 0x107d31d40>, {
'en_GB.UTF-8': {100: 'key1'}})})
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With