diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 9017ec5a8..07d1f5eec 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -20,13 +20,18 @@ import sys import tempfile import warnings +from collections.abc import Generator from configparser import RawConfigParser +from dataclasses import dataclass from io import StringIO +from pathlib import Path +from string import Formatter from typing import BinaryIO, Iterable, Literal from babel import Locale, localedata from babel import __version__ as VERSION from babel.core import UnknownLocaleError +from babel.messages import Message from babel.messages.catalog import DEFAULT_HEADER, Catalog from babel.messages.extract import ( DEFAULT_KEYWORDS, @@ -852,6 +857,127 @@ def run(self): return +class LintCatalog(CommandMixin): + description = 'check message catalogs for common problems' + user_options = [ + ('input-paths=', None, + 'files or directories that should be checked. Separate multiple ' + 'files or directories with commas(,)'), # TODO: Support repetition of this argument + ] + as_args = 'input-paths' + + @dataclass(frozen=True) + class MessagePair: + original: str + translated: str + plural_number: int | None = None + + def initialize_options(self): + self.input_paths: list[str] = None + + def finalize_options(self): + if not self.input_paths: + raise OptionError("no input files or directories specified") + + def run(self): + for input_path in self.input_paths: + path = Path(input_path) + if path.is_dir(): + self._lint_directory(path) + else: + self._lint_file(path) + + def _lint_directory(self, directory: Path) -> None: + for path in Path(directory).rglob('*.po'): + if path.is_file(): + self._lint_file(path) + + def _lint_file(self, path: Path) -> None: + with open(path, 'rb') as f: + catalog = read_po(f) + + for msg in catalog: + if not msg.id: + continue + + all_strings = msg.string if isinstance(msg.string, tuple) else (msg.string,) + if not any(all_strings): # Not translated, skip. + continue + + for msg_pair in self._iter_msg_pairs(msg, num_plurals=catalog.num_plurals): + orig_placeholders = self._extract_placeholders(msg_pair.original) + trans_placeholders = self._extract_placeholders(msg_pair.translated) + if orig_placeholders != trans_placeholders: + formatted = self._format_message(orig_placeholders, trans_placeholders, msg_pair.plural_number) + print(f'{path}:{msg.lineno}: {formatted}') + + def _format_message( + self, + original_placeholders: set[str], + translated_placeholders: set[str], + plural_number: int | None, + ) -> str: + def _sort_and_format(placeholders: set[str]) -> str: + return ', '.join(sorted(placeholders)) + + msgid = 'msgid' if plural_number is None else 'msgid_plural' + msgstr = 'msgstr' if plural_number is None else f'msgstr[{plural_number}]' + + msg = f'placeholders in {msgid} differ from placeholders in {msgstr}:\n' + if only_in_msgid := original_placeholders - translated_placeholders: + formatted = _sort_and_format(only_in_msgid) + msg += f'\tplaceholders in {msgid} but missing in {msgstr}: {formatted}' + if only_in_msgstr := translated_placeholders - original_placeholders: + formatted = _sort_and_format(only_in_msgstr) + msg += f'\n\tplaceholders in {msgstr} but missing in {msgid}: {formatted}' + return msg + + def _iter_msg_pairs(self, msg: Message, *, num_plurals: int) -> Generator[LintCatalog.MessagePair, None, None]: + """Iterate over all (original, translated) message pairs in a given message. + + For singular messages, this produces a single pair (original, translated). + For plural messages, this produces a pair for each plural form. For example, + for a language with 4 plural forms, this will generate: + + (orig_singular, trans_singular), + (orig_plural, trans_plural_1), + (orig_plural, trans_plural_2), + (orig_plural, trans_plural_3) + + For languages with nplurals=1, this generates a single pair: + + (orig_plural, trans_plural) + """ + if not msg.pluralizable: + yield self.MessagePair(msg.id, msg.string) + elif num_plurals == 1: + # Pluralized messages with nplurals=1 should be compared against the 'msgid_plural'. + yield self.MessagePair(msg.id[1], msg.string[0], plural_number=0) + else: + # Pluralized messages with nplurals>1 should compare 'msgstr[0]' against the singular and + # any other 'msgstr[X]' against 'msgid_plural'. + yield self.MessagePair(msg.id[0], msg.string[0]) + for i, string in enumerate(msg.string[1:], start=1): + yield self.MessagePair(msg.id[1], string, plural_number=i) + + def _extract_placeholders(self, string: str) -> set[str]: + fmt = Formatter() + try: + parsed = list(fmt.parse(string)) + except ValueError: + return set() + return {self._unparse_placeholder(field_name, conversion, format_spec) + for _, field_name, format_spec, conversion in parsed if field_name is not None} + + def _unparse_placeholder( + self, + field_name: str, + conversion: str | None = None, + format_spec: str | None = None, + ) -> str: + return f'{{{field_name}{"!" + conversion if conversion else ""}{":" + format_spec if format_spec else ""}}}' + + class CommandLineInterface: """Command-line interface. @@ -866,6 +992,7 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', + 'lint': 'check message catalogs for common problems', } command_classes = { @@ -873,6 +1000,7 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, + 'lint': LintCatalog, } log = None # Replaced on instance level diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index b05f9f683..81cfe3b3c 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -27,7 +27,7 @@ from babel import __version__ as VERSION from babel.dates import format_datetime -from babel.messages import Catalog, extract, frontend +from babel.messages import Catalog, Message, extract, frontend from babel.messages.frontend import ( BaseError, CommandLineInterface, @@ -715,6 +715,120 @@ def test_supports_width(self): assert expected_content == actual_content +MessagePair = frontend.LintCatalog.MessagePair + +class TestLintCatalog: + + def test_no_directory_or_input_file_specified(self): + cmd = frontend.LintCatalog() + with pytest.raises(OptionError): + cmd.finalize_options() + + @pytest.mark.parametrize(['string', 'expected'], [ + ('', set()), + ('{', set()), + ('}', set()), + ('{}', {'{}'}), + ('{} {', set()), + ('{{}}', set()), + ('{foo}', {'{foo}'}), + ('{foo} {bar}', {'{foo}', '{bar}'}), + ('{foo:.2f}', {'{foo:.2f}'}), + ('{foo!r:.2f=}', {'{foo!r:.2f=}'}), + ]) + def test__extract_placeholders(self, string, expected): + cmd = frontend.LintCatalog() + assert cmd._extract_placeholders(string) == expected + + @pytest.mark.parametrize(['num_plurals', 'message', 'expected'], [ + (3, ('foo', 'bar'), [MessagePair('foo', 'bar')]), + (3, (['foo', 'foos'], ['bar', 'bars 1', 'bars 2']), [ + MessagePair('foo', 'bar'), + MessagePair('foos', 'bars 1', plural_number=1), + MessagePair('foos', 'bars 2', plural_number=2), + ]), + (1, (['foo', 'foos'], ['bars']), [MessagePair('foos', 'bars', plural_number=0)]), + ]) + def test__iter_msg_pairs(self, num_plurals, message, expected): + cmd = frontend.LintCatalog() + msg = Message(id=message[0], string=message[1]) + msg_pairs = list(cmd._iter_msg_pairs(msg, num_plurals=num_plurals)) + assert msg_pairs == expected + + def test_lint_singular(self, tmp_path, capsys): + cmd = frontend.LintCatalog() + po_file = tmp_path / 'messages.po' + cmd.input_paths = [po_file] + po_file.write_text(r""" +msgid "{foo}" +msgstr "{bar} {baz}" + +msgid "{foo} {bar}" +msgstr "{bar} {baz}" +""") + + cmd.run() + captured = capsys.readouterr() + assert captured.err == '' + assert captured.out == (f"{po_file}:2: placeholders in msgid differ from placeholders in msgstr:\n" + "\tplaceholders in msgid but missing in msgstr: {foo}\n" + "\tplaceholders in msgstr but missing in msgid: {bar}, {baz}\n" + f"{po_file}:5: placeholders in msgid differ from placeholders in msgstr:\n" + "\tplaceholders in msgid but missing in msgstr: {foo}\n" + "\tplaceholders in msgstr but missing in msgid: {baz}\n") + + def test_lint_many_plurals(self, tmp_path, capsys): + cmd = frontend.LintCatalog() + po_file = tmp_path / 'lint.po' + cmd.input_paths = [po_file] + po_file.write_text(r""" +msgid "" +msgstr "" +"Language: cs_CZ\n" + +msgid "You have {count} new message." +msgid_plural "You have {count} new messages." +msgstr[0] "You have {foo} new message." +msgstr[1] "You have {bar} new messages." +msgstr[2] "You have {baz} new messages." +""") + + cmd.run() + captured = capsys.readouterr() + assert captured.err == '' + assert captured.out == (f"{po_file}:6: placeholders in msgid differ from placeholders in msgstr:\n" + "\tplaceholders in msgid but missing in msgstr: {count}\n" + "\tplaceholders in msgstr but missing in msgid: {foo}\n" + f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[1]:\n" + "\tplaceholders in msgid_plural but missing in msgstr[1]: {count}\n" + "\tplaceholders in msgstr[1] but missing in msgid_plural: {bar}\n" + f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[2]:\n" + "\tplaceholders in msgid_plural but missing in msgstr[2]: {count}\n" + "\tplaceholders in msgstr[2] but missing in msgid_plural: {baz}\n") + + def test_lint_one_plural(self, tmp_path, capsys): + cmd = frontend.LintCatalog() + po_file = tmp_path / 'lint.po' + cmd.input_paths = [po_file] + po_file.write_text(r""" +msgid "" +msgstr "" +"Language: zh_TW\n" + +msgid "You have {count} new message." +msgid_plural "You have {count} new messages." +msgstr[0] "You have {foo} new messages." +""") + + cmd.run() + captured = capsys.readouterr() + assert captured.err == '' + assert captured.out == (f"{po_file}:6: placeholders in msgid_plural differ from placeholders in msgstr[0]:\n" + "\tplaceholders in msgid_plural but missing in msgstr[0]: {count}\n" + "\tplaceholders in msgstr[0] but missing in msgid_plural: {foo}\n") + + + class CommandLineInterfaceTestCase(unittest.TestCase): def setUp(self):