diff --git a/lib/expo/message.ex b/lib/expo/message.ex index fd56ecc..a4a6b7c 100644 --- a/lib/expo/message.ex +++ b/lib/expo/message.ex @@ -24,9 +24,7 @@ defmodule Expo.Message do See `key/1`. """ - @opaque key :: - {msgctxt :: String.t(), - msgid :: String.t() | {msgid :: String.t(), msgid_plural :: String.t()}} + @opaque key :: Singular.key() | Plural.key() @doc """ Returns a "key" that can be used to identify a message. @@ -147,4 +145,46 @@ defmodule Expo.Message do when mod in [Singular, Plural] do mod.source_line_number(message, block, default) end + + @doc """ + Merges two messages. + + ## Examples + + iex> a = %Expo.Message.Singular{msgid: ["test"], flags: ["one"]} + ...> b = %Expo.Message.Singular{msgid: ["test"], flags: ["two"]} + ...> Expo.Message.merge(a, b) + %Expo.Message.Singular{msgid: ["test"], flags: ["one", "two"]} + + iex> a = %Expo.Message.Singular{msgid: ["test"]} + ...> b = %Expo.Message.Plural{msgid: ["test"], msgid_plural: ["tests"]} + ...> Expo.Message.merge(a, b) + %Expo.Message.Plural{msgid: ["test"], msgid_plural: ["tests"]} + + """ + @doc since: "0.5.0" + @spec merge(Singular.t(), Singular.t()) :: Singular.t() + @spec merge(t(), Plural.t()) :: Plural.t() + @spec merge(Plural.t(), t()) :: Plural.t() + def merge(%mod{} = message_1, %mod{} = message_2), do: mod.merge(message_1, message_2) + + def merge(%Singular{} = message_1, %Plural{} = message_2), + do: Plural.merge(singular_to_plural(message_1), message_2) + + def merge(%Plural{} = message_1, %Singular{} = message_2), + do: Plural.merge(message_1, singular_to_plural(message_2)) + + defp singular_to_plural(%Singular{msgstr: msgstr} = singular) do + msgstr = if IO.iodata_length(msgstr) > 0, do: %{0 => msgstr}, else: %{} + + struct!( + Plural, + singular + |> Map.from_struct() + |> Map.merge(%{ + msgstr: msgstr, + msgid_plural: [] + }) + ) + end end diff --git a/lib/expo/message/plural.ex b/lib/expo/message/plural.ex index f5d647c..d6f16e7 100644 --- a/lib/expo/message/plural.ex +++ b/lib/expo/message/plural.ex @@ -21,6 +21,8 @@ defmodule Expo.Message.Plural do @opaque meta :: %{optional(:source_line) => %{block() => non_neg_integer()}} + @opaque key :: {msgctxt :: String.t(), msgid :: String.t()} + @type t :: %__MODULE__{ msgid: Message.msgid(), msgid_plural: [Message.msgid()], @@ -83,14 +85,13 @@ defmodule Expo.Message.Plural do ## Examples iex> Plural.key(%Plural{msgid: ["cat"], msgid_plural: ["cats"]}) - {"", {"cat", "cats"}} + {"", "cat"} """ @doc since: "0.5.0" - @spec key(t()) :: {String.t(), {String.t(), String.t()}} - def key(%__MODULE__{msgctxt: msgctxt, msgid: msgid, msgid_plural: msgid_plural} = _message) do - {IO.iodata_to_binary(msgctxt || []), - {IO.iodata_to_binary(msgid), IO.iodata_to_binary(msgid_plural)}} + @spec key(t()) :: key() + def key(%__MODULE__{msgctxt: msgctxt, msgid: msgid} = _message) do + {IO.iodata_to_binary(msgctxt || []), IO.iodata_to_binary(msgid)} end @doc """ @@ -170,4 +171,43 @@ defmodule Expo.Message.Plural do (is_tuple(block) and elem(block, 0) == :msgstr and is_integer(elem(block, 1))) do meta[:source_line][block] || default end + + @doc """ + Merges two plural messages. + + ## Examples + + iex> a = %Expo.Message.Plural{msgid: ["test"], msgid_plural: ["one"], flags: ["one"], msgstr: %{0 => "une"}} + ...> b = %Expo.Message.Plural{msgid: ["test"], msgid_plural: ["two"], flags: ["two"], msgstr: %{2 => "deux"}} + ...> Expo.Message.Plural.merge(a, b) + %Expo.Message.Plural{msgid: ["test"], msgid_plural: ["two"], flags: ["one", "two"], msgstr: %{0 => "une", 2 => "deux"}} + + """ + @doc since: "0.5.0" + @spec merge(t(), t()) :: t() + def merge(message_1, message_2) do + Map.merge(message_1, message_2, fn + key, value_1, value_2 when key in [:msgid, :msgid_plural] -> + if IO.iodata_length(value_2) > 0, do: value_2, else: value_1 + + :msgctxt, _msgctxt_a, msgctxt_b -> + msgctxt_b + + key, value_1, value_2 + when key in [:comments, :extracted_comments, :flags, :previous_messages, :references] -> + Enum.concat(value_1, value_2) + + :msgstr, msgstr_a, msgstr_b -> + merge_msgstr(msgstr_a, msgstr_b) + + _key, _value_1, value_2 -> + value_2 + end) + end + + defp merge_msgstr(msgstrs_1, msgstrs_2) do + Map.merge(msgstrs_1, msgstrs_2, fn _key, msgstr_1, msgstr_2 -> + if IO.iodata_length(msgstr_2) > 0, do: msgstr_2, else: msgstr_1 + end) + end end diff --git a/lib/expo/message/singular.ex b/lib/expo/message/singular.ex index ecf12ee..24b8c3a 100644 --- a/lib/expo/message/singular.ex +++ b/lib/expo/message/singular.ex @@ -20,6 +20,8 @@ defmodule Expo.Message.Singular do @opaque meta :: %{optional(:source_line) => %{block() => non_neg_integer()}} + @opaque key :: {msgctxt :: String.t(), msgid :: String.t()} + @type t :: %__MODULE__{ msgid: Message.msgid(), msgstr: Message.msgstr(), @@ -85,7 +87,7 @@ defmodule Expo.Message.Singular do {"context", "foo"} """ - @spec key(t()) :: {String.t(), String.t()} + @spec key(t()) :: key() def key(%__MODULE__{msgctxt: msgctxt, msgid: msgid} = _message) do {IO.iodata_to_binary(msgctxt || []), IO.iodata_to_binary(msgid)} end @@ -160,4 +162,34 @@ defmodule Expo.Message.Singular do when block in [:msgid, :msgstr, :msgctxt] do meta[:source_line][block] || default end + + @doc """ + Merges two singular messages. + + ## Examples + + iex> a = %Expo.Message.Singular{msgid: ["test"], flags: ["one"]} + ...> b = %Expo.Message.Singular{msgid: ["test"], flags: ["two"]} + ...> Expo.Message.Singular.merge(a, b) + %Expo.Message.Singular{msgid: ["test"], flags: ["one", "two"]} + + """ + @doc since: "0.5.0" + @spec merge(t(), t()) :: t() + def merge(message_1, message_2) do + Map.merge(message_1, message_2, fn + key, value_1, value_2 when key in [:msgid, :msgstr] -> + if IO.iodata_length(value_2) > 0, do: value_2, else: value_1 + + :msgctxt, _msgctxt_a, msgctxt_b -> + msgctxt_b + + key, value_1, value_2 + when key in [:comments, :extracted_comments, :flags, :previous_messages, :references] -> + Enum.concat(value_1, value_2) + + _key, _value_1, value_2 -> + value_2 + end) + end end diff --git a/lib/expo/po.ex b/lib/expo/po.ex index 766a9a1..cd37896 100644 --- a/lib/expo/po.ex +++ b/lib/expo/po.ex @@ -105,6 +105,7 @@ defmodule Expo.PO do ...> msgstr "" ...> \""") ** (Expo.PO.DuplicateMessagesError) 4: found duplicate on line 4 for msgid: 'test' + Run mix expo.msguniq with the input file to merge the duplicates """ @spec parse_string!(String.t(), [parse_option()]) :: Messages.t() diff --git a/lib/expo/po/duplicate_translations_error.ex b/lib/expo/po/duplicate_translations_error.ex index 851ea84..9d79839 100644 --- a/lib/expo/po/duplicate_translations_error.ex +++ b/lib/expo/po/duplicate_translations_error.ex @@ -3,19 +3,37 @@ defmodule Expo.PO.DuplicateMessagesError do An error raised when duplicate messages are detected. """ + alias Expo.Message + alias Expo.Messages + @type t :: %__MODULE__{ file: Path.t() | nil, - duplicates: [{message :: String.t(), line :: pos_integer, original_line: pos_integer}] + duplicates: [ + {message :: Message.t(), error_message :: String.t(), line :: pos_integer, + original_line: pos_integer} + ], + catalogue: Messages.t() } - defexception [:file, :duplicates] + defexception [:file, :duplicates, :catalogue] @impl Exception def message(%__MODULE__{file: file, duplicates: duplicates}) do - prefix = if file, do: "#{Path.relative_to_cwd(file)}:", else: "" + file = if file, do: Path.relative_to_cwd(file) + + prefix = if file, do: [file, ":"], else: [] + + fix_description = + if file, + do: ["Run mix expo.msguniq ", file, " to merge the duplicates"], + else: "Run mix expo.msguniq with the input file to merge the duplicates" - Enum.map_join(duplicates, "\n", fn {message, new_line, _old_line} -> - "#{prefix}#{new_line}: #{message}" - end) + IO.iodata_to_binary([ + Enum.map(duplicates, fn {_message, error_message, new_line, _old_line} -> + [prefix, Integer.to_string(new_line), ": ", error_message] + end), + "\n", + fix_description + ]) end end diff --git a/lib/expo/po/parser.ex b/lib/expo/po/parser.ex index d0fbc68..ece931e 100644 --- a/lib/expo/po/parser.ex +++ b/lib/expo/po/parser.ex @@ -13,15 +13,9 @@ defmodule Expo.PO.Parser do content = prune_bom(content, Keyword.get(opts, :file, "nofile")) with {:ok, tokens} <- tokenize(content), - {:ok, top_comments, headers, messages} <- parse_tokens(tokens) do - po = %Messages{ - headers: headers, - messages: messages, - top_comments: top_comments, - file: Keyword.get(opts, :file) - } - - {:ok, po} + {:ok, po} <- parse_tokens(tokens), + {:ok, po} <- check_for_duplicates(po) do + {:ok, %Messages{po | file: Keyword.get(opts, :file)}} else {:error, %mod{} = error} when mod in [SyntaxError, DuplicateMessagesError] -> {:error, %{error | file: opts[:file]}} @@ -36,8 +30,15 @@ defmodule Expo.PO.Parser do end defp parse_tokens(tokens) when is_list(tokens) do - case :expo_po_parser.parse(tokens) do - {:ok, po_entries} -> parse_yecc_result(po_entries) + with {:ok, po_entries} <- :expo_po_parser.parse(tokens), + {:ok, top_comments, headers, messages} <- parse_yecc_result(po_entries) do + {:ok, + %Messages{ + headers: headers, + messages: messages, + top_comments: top_comments + }} + else {:error, _reason} = error -> parse_error(error) end end @@ -49,10 +50,8 @@ defmodule Expo.PO.Parser do defp parse_yecc_result({:messages, messages}) do unpacked_messages = Enum.map(messages, &unpack_comments/1) - with :ok <- check_for_duplicates(messages) do - {headers, top_comments, messages} = Util.extract_meta_headers(unpacked_messages) - {:ok, top_comments, headers, messages} - end + {headers, top_comments, messages} = Util.extract_meta_headers(unpacked_messages) + {:ok, top_comments, headers, messages} end defp unpack_comments(message) do @@ -120,42 +119,61 @@ defmodule Expo.PO.Parser do end) end - defp check_for_duplicates(messages, existing \\ %{}, duplicates \\ []) + defp check_for_duplicates(messages, existing \\ %{}, duplicates \\ [], keep \\ []) - defp check_for_duplicates([message | messages], existing, duplicates) do + defp check_for_duplicates( + %Messages{messages: [message | messages]} = po, + existing, + duplicates, + keep + ) do key = Message.key(message) source_line = Message.source_line_number(message, :msgid) - duplicates = + {duplicates, keep} = case Map.fetch(existing, key) do {:ok, old_line} -> - [ - build_duplicated_error(message, old_line, source_line) - | duplicates - ] + {[ + build_duplicated_error(message, old_line, source_line) + | duplicates + ], keep} :error -> - duplicates + {duplicates, [message | keep]} end - check_for_duplicates(messages, Map.put_new(existing, key, source_line), duplicates) + check_for_duplicates( + %Messages{po | messages: messages}, + Map.put_new(existing, key, source_line), + duplicates, + keep + ) end - defp check_for_duplicates([], _existing, []), do: :ok + defp check_for_duplicates(%Messages{messages: []} = po, _existing, [], keep), + do: {:ok, %Messages{po | messages: Enum.reverse(keep)}} - defp check_for_duplicates([], _existing, duplicates), - do: {:error, %DuplicateMessagesError{duplicates: Enum.reverse(duplicates)}} + defp check_for_duplicates(%Messages{messages: []} = po, _existing, duplicates, keep), + do: + {:error, + %DuplicateMessagesError{ + duplicates: Enum.reverse(duplicates), + catalogue: %Messages{po | messages: Enum.reverse(keep)} + }} - defp build_duplicated_error(%Message.Singular{} = t, old_line, new_line) do - id = IO.iodata_to_binary(t.msgid) - {"found duplicate on line #{new_line} for msgid: '#{id}'", new_line, old_line} + defp build_duplicated_error(%Message.Singular{} = message, old_line, new_line) do + id = IO.iodata_to_binary(message.msgid) + {message, "found duplicate on line #{new_line} for msgid: '#{id}'", new_line, old_line} end - defp build_duplicated_error(%Message.Plural{} = t, old_line, new_line) do - id = IO.iodata_to_binary(t.msgid) - idp = IO.iodata_to_binary(t.msgid_plural) - msg = "found duplicate on line #{new_line} for msgid: '#{id}' and msgid_plural: '#{idp}'" - {msg, new_line, old_line} + defp build_duplicated_error(%Message.Plural{} = message, old_line, new_line) do + id = IO.iodata_to_binary(message.msgid) + idp = IO.iodata_to_binary(message.msgid_plural) + + error_message = + "found duplicate on line #{new_line} for msgid: '#{id}' and msgid_plural: '#{idp}'" + + {message, error_message, new_line, old_line} end defp strip_leading(subject, to_strip) do diff --git a/lib/mix/tasks/expo.msguniq.ex b/lib/mix/tasks/expo.msguniq.ex new file mode 100644 index 0000000..e907444 --- /dev/null +++ b/lib/mix/tasks/expo.msguniq.ex @@ -0,0 +1,102 @@ +defmodule Mix.Tasks.Expo.Msguniq do + @shortdoc "Unify duplicate translations in message catalog" + + @moduledoc """ + Unifies duplicate translations in the given PO file. + + By default, this task outputs the file on standard output. If you want to + *overwrite* the given PO file, pass in the `--output` flag. + + ## Usage + + mix expo.msguniq PO_FILE [--output=OUTPUT_FILE] + + ## Options + + * `--output-file` (`-o`) - Default: `-` - File to store the output at. `-` for + STDOUT. + + """ + @moduledoc since: "0.5.0" + + use Mix.Task + + alias Expo.Message + alias Expo.Messages + alias Expo.PO + alias Expo.PO.DuplicateMessagesError + + @switches [ + output_file: :string + ] + @aliases [ + o: :output_file + ] + @default_options [output_file: "-"] + + @impl Mix.Task + def run(args) do + Application.ensure_all_started(:expo) + {opts, argv} = OptionParser.parse!(args, switches: @switches, aliases: @aliases) + + opts = Keyword.merge(@default_options, opts) + + output = + case opts[:output_file] do + "-" -> IO.stream(:stdio, :line) + file -> File.stream!(file) + end + + file = + case argv do + [] -> + Mix.raise(""" + mix expo.msguniq failed due to missing po file path argument + """) + + [_file_one, _file_two | _other_files] -> + Mix.raise(""" + mix expo.msguniq failed due to multiple po file path arguments + Only one is currently supported + """) + + [file] -> + file + end + + case PO.parse_file(file) do + {:ok, _messages} -> + :ok + + {:error, %DuplicateMessagesError{duplicates: duplicates, catalogue: catalogue}} -> + po = + duplicates + |> Enum.reduce(catalogue, &merge_duplicate/2) + |> PO.compose() + + _output = Enum.into(po, output) + + IO.puts(:stderr, IO.ANSI.format("Merged #{length(duplicates)} translations")) + + {:error, error} -> + raise error + end + end + + defp merge_duplicate( + {duplicate, _error_message, _line, _original_line}, + %Messages{messages: messages} = po + ) do + %Messages{ + po + | messages: + Enum.map(messages, fn message -> + if Message.key(message) == Message.key(duplicate) do + Message.merge(message, duplicate) + else + message + end + end) + } + end +end diff --git a/test/expo/message_test.exs b/test/expo/message_test.exs index 5c6188c..28ca41d 100644 --- a/test/expo/message_test.exs +++ b/test/expo/message_test.exs @@ -13,13 +13,13 @@ defmodule Expo.MessageTest do end test "plural" do - assert {"", {"foo", "foos"}} = + assert {"", "foo"} = Message.key(%Message.Plural{ msgid: ["foo"], msgid_plural: ["foos"] }) - assert {"ctxt", {"foo", "foos"}} = + assert {"ctxt", "foo"} = Message.key(%Message.Plural{ msgctxt: "ctxt", msgid: ["foo"], diff --git a/test/expo/po_test.exs b/test/expo/po_test.exs index 05f8963..1474fb0 100644 --- a/test/expo/po_test.exs +++ b/test/expo/po_test.exs @@ -478,11 +478,11 @@ defmodule Expo.POTest do messages: [ %Message.Singular{msgid: ["a", "b"], msgstr: ["a", "b"]}, %Expo.Message.Plural{ - msgid: ["a", "b"], - msgid_plural: ["a", "bs"], + msgid: ["c", "d"], + msgid_plural: ["c", "ds"], msgstr: %{ - 0 => ["a", "b"], - 1 => ["a", "bs"] + 0 => ["c", "d"], + 1 => ["c", "ds"] } } ] @@ -493,14 +493,14 @@ defmodule Expo.POTest do msgstr "a" "b" - msgid "a" - "b" - msgid_plural "a" - "bs" - msgstr[0] "a" - "b" - msgstr[1] "a" - "bs" + msgid "c" + "d" + msgid_plural "c" + "ds" + msgstr[0] "c" + "d" + msgstr[1] "c" + "ds" """) end @@ -509,7 +509,7 @@ defmodule Expo.POTest do %Messages{ messages: [ %Message.Singular{ - msgid: ["hel", "l", "o"], + msgid: ["h", "i"], msgstr: ["ciao"], comments: [" comment"], obsolete: true @@ -525,8 +525,8 @@ defmodule Expo.POTest do }} = PO.parse_string(""" # comment - #~ msgid "hel" "l" - #~ "o" + #~ msgid "h" + #~ "i" #~ msgstr "ciao" # comment @@ -655,9 +655,12 @@ defmodule Expo.POTest do assert {:error, %DuplicateMessagesError{ duplicates: [ - {"found duplicate on line 4 for msgid: 'foo'", 4, 1}, - {"found duplicate on line 7 for msgid: 'foo'", 7, 1} - ] + {%Message.Singular{msgstr: ["baz"]}, + "found duplicate on line 4 for msgid: 'foo'", 4, 1}, + {%Message.Singular{msgstr: ["bong"]}, + "found duplicate on line 7 for msgid: 'foo'", 7, 1} + ], + catalogue: %Messages{messages: [%Message.Singular{msgstr: ["bar"]}]} }} = PO.parse_string(""" msgid "foo" @@ -673,7 +676,7 @@ defmodule Expo.POTest do # Works if the msgid is split differently as well assert {:error, %DuplicateMessagesError{ - duplicates: [{"found duplicate on line 4 for msgid: 'foo'", 4, 1}] + duplicates: [{_message, "found duplicate on line 4 for msgid: 'foo'", 4, 1}] }} = PO.parse_string(""" msgid "foo" "" @@ -682,14 +685,33 @@ defmodule Expo.POTest do msgid "" "foo" msgstr "baz" """) + + # Detects duplicates for plural messages as well + assert {:error, + %DuplicateMessagesError{ + duplicates: [ + {_message, + "found duplicate on line 4 for msgid: 'foo' and msgid_plural: 'foos'", 4, 1} + ] + }} = + PO.parse_string(""" + msgid "foo" + msgstr "bar" + + msgid "foo" + msgid_plural "foos" + msgstr[0] "baz" + """) end test "duplicated plural messages cause an error" do assert {:error, %DuplicateMessagesError{ duplicates: [ - {"found duplicate on line 5 for msgid: 'foo' and msgid_plural: 'foos'", 5, 1} - ] + {%Message.Plural{msgstr: %{0 => ["baz"]}}, + "found duplicate on line 5 for msgid: 'foo' and msgid_plural: 'foos'", 5, 1} + ], + catalogue: %Messages{messages: [%Message.Plural{msgstr: %{0 => ["bar"]}}]} }} = PO.parse_string(""" msgid "foo" @@ -899,7 +921,8 @@ defmodule Expo.POTest do test "file with duplicate messages" do fixture_path = "test/fixtures/po/duplicate_messages.po" - msg = "file:4: found duplicate on line 4 for msgid: 'test'" + msg = + "file:4: found duplicate on line 4 for msgid: 'test'\nRun mix expo.msguniq file to merge the duplicates" assert_raise DuplicateMessagesError, msg, fn -> PO.parse_string!(File.read!(fixture_path), file: "file") @@ -1037,7 +1060,9 @@ defmodule Expo.POTest do test "file with duplicate messages" do fixture_path = "test/fixtures/po/duplicate_messages.po" - message = "#{fixture_path}:4: found duplicate on line 4 for msgid: 'test'" + + message = + "#{fixture_path}:4: found duplicate on line 4 for msgid: 'test'\nRun mix expo.msguniq #{fixture_path} to merge the duplicates" assert_raise DuplicateMessagesError, message, fn -> PO.parse_file!(fixture_path) diff --git a/test/mix/tasks/expo.msguniq_test.exs b/test/mix/tasks/expo.msguniq_test.exs new file mode 100644 index 0000000..f19da84 --- /dev/null +++ b/test/mix/tasks/expo.msguniq_test.exs @@ -0,0 +1,74 @@ +defmodule Mix.Tasks.Expo.MsguniqTest do + use ExUnit.Case, async: true + + import ExUnit.CaptureIO + + alias Expo.Message + alias Expo.Messages + alias Expo.PO + alias Expo.PO.SyntaxError + alias Mix.Tasks.Expo.Msguniq + + setup do + temp_file = Path.join(System.tmp_dir!(), make_ref() |> :erlang.phash2() |> to_string()) + + on_exit(fn -> File.rm(temp_file) end) + + {:ok, temp_file: temp_file} + end + + test "leaves file without duplicates as is", %{temp_file: temp_file} do + po_path = "test/fixtures/po/valid.po" + + File.cp!(po_path, temp_file) + + assert capture_io(fn -> + Msguniq.run([temp_file]) + end) == "" + end + + test "merges duplicates into output file", %{temp_file: temp_file} do + po_path = "test/fixtures/po/duplicate_messages.po" + + assert capture_io(:stderr, fn -> + Msguniq.run([po_path, "--output-file", temp_file]) + end) =~ "Merged 1 translation" + + assert {:ok, %Messages{messages: [%Message.Singular{msgid: ["test"]}]}} = + PO.parse_file(temp_file) + end + + test "merges duplicates into stdout" do + po_path = "test/fixtures/po/duplicate_messages.po" + + output = + capture_io(fn -> + assert capture_io(:stderr, fn -> + Msguniq.run([po_path]) + end) =~ "Merged 1 translation" + end) + + assert {:ok, %Messages{messages: [%Message.Singular{msgid: ["test"]}]}} = + PO.parse_string(output) + end + + test "crashes with syntax error", %{temp_file: temp_file} do + File.write!(temp_file, "invalid") + + assert_raise SyntaxError, fn -> + Msguniq.run([temp_file]) + end + end + + test "errors with missing file" do + assert_raise Mix.Error, + "mix expo.msguniq failed due to missing po file path argument\n", + fn -> Msguniq.run([]) end + end + + test "errors with multiple files" do + assert_raise Mix.Error, + "mix expo.msguniq failed due to multiple po file path arguments\nOnly one is currently supported\n", + fn -> Msguniq.run(["file_one", "file_two"]) end + end +end