diff --git a/pkgs/development/python-modules/markitdown/default.nix b/pkgs/development/python-modules/markitdown/default.nix new file mode 100644 index 00000000000000..7a61ddd5484527 --- /dev/null +++ b/pkgs/development/python-modules/markitdown/default.nix @@ -0,0 +1,73 @@ +{ + lib, + buildPythonPackage, + fetchFromGitHub, + hatchling, + beautifulsoup4, + mammoth, + markdownify, + numpy, + openpyxl, + pandas, + pathvalidate, + pdfminer-six, + puremagic, + pydub, + python-pptx, + requests, + speechrecognition, + youtube-transcript-api, + pytestCheckHook, +}: + +buildPythonPackage { + pname = "markitdown"; + version = "unstable-2024-12-15"; + pyproject = true; + + src = fetchFromGitHub { + owner = "microsoft"; + repo = "markitdown"; + rev = "81e3f24acd0049a59cd2dcb2d01d0a98cc57c734"; + hash = "sha256-ejj7ARvLDzB1WRNkc1zUtzYAujZVysB0MkY/PQKv/nQ="; + }; + + build-system = [ + hatchling + ]; + + dependencies = [ + beautifulsoup4 + mammoth + markdownify + numpy + openpyxl + pandas + pathvalidate + pdfminer-six + puremagic + pydub + python-pptx + requests + speechrecognition + youtube-transcript-api + ]; + + pythonImportsCheck = [ + "markitdown" + ]; + + nativeCheckInputs = [ pytestCheckHook ]; + + disabledTests = [ + # Require network access + "test_markitdown_remote" + ]; + + meta = { + description = "Python tool for converting files and office documents to Markdown"; + homepage = "https://github.com/microsoft/markitdown"; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ drupol ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 2cde190b84c89e..8a36ec46fd2cdf 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -7855,6 +7855,8 @@ self: super: with self; { markdownify = callPackage ../development/python-modules/markdownify { }; + markitdown = callPackage ../development/python-modules/markitdown { }; + marko = callPackage ../development/python-modules/marko { }; markupsafe = callPackage ../development/python-modules/markupsafe { };