diff --git a/pkgs/development/python-modules/markitdown/default.nix b/pkgs/development/python-modules/markitdown/default.nix new file mode 100644 index 0000000000000..a52844307a577 --- /dev/null +++ b/pkgs/development/python-modules/markitdown/default.nix @@ -0,0 +1,76 @@ +{ + lib, + buildPythonPackage, + fetchFromGitHub, + hatchling, + beautifulsoup4, + ffmpeg-headless, + mammoth, + markdownify, + numpy, + openai, + openpyxl, + pandas, + pathvalidate, + pdfminer-six, + puremagic, + pydub, + python-pptx, + requests, + speechrecognition, + youtube-transcript-api, + pytestCheckHook, + gitUpdater, +}: + +buildPythonPackage { + pname = "markitdown"; + version = "unstable-2024-12-18"; + pyproject = true; + + src = fetchFromGitHub { + owner = "microsoft"; + repo = "markitdown"; + rev = "3ce21a47abed0e4db162de1088d661887ae076ff"; + hash = "sha256-5YafFL8OHNcGgB/qH6CmX0rTith1ZSRNIa+ktl4Ffvg="; + }; + + build-system = [ hatchling ]; + + dependencies = [ + beautifulsoup4 + ffmpeg-headless + mammoth + markdownify + numpy + openai + openpyxl + pandas + pathvalidate + pdfminer-six + puremagic + pydub + python-pptx + requests + speechrecognition + youtube-transcript-api + ]; + + pythonImportsCheck = [ "markitdown" ]; + + nativeCheckInputs = [ pytestCheckHook ]; + + disabledTests = [ + # Require network access + "test_markitdown_remote" + ]; + + passthru.updateScripts = gitUpdater { }; + + meta = { + description = "Python tool for converting files and office documents to Markdown"; + homepage = "https://github.com/microsoft/markitdown"; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ drupol ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 4a807b9781e5c..fb8e06097ad75 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -7869,6 +7869,8 @@ self: super: with self; { markdownify = callPackage ../development/python-modules/markdownify { }; + markitdown = callPackage ../development/python-modules/markitdown { }; + marko = callPackage ../development/python-modules/marko { }; markupsafe = callPackage ../development/python-modules/markupsafe { };