diff --git a/browser/ai_chat/ai_chat_ui_browsertest.cc b/browser/ai_chat/ai_chat_ui_browsertest.cc index ccaa588a28cd..c9621d212ff6 100644 --- a/browser/ai_chat/ai_chat_ui_browsertest.cc +++ b/browser/ai_chat/ai_chat_ui_browsertest.cc @@ -42,7 +42,6 @@ #include "net/test/embedded_test_server/http_response.h" #include "printing/buildflags/buildflags.h" #include "services/network/public/cpp/network_switches.h" -#include "services/screen_ai/buildflags/buildflags.h" #include "ui/compositor/compositor_switches.h" #include "url/gurl.h" @@ -50,18 +49,6 @@ #include "chrome/browser/printing/test_print_preview_observer.h" #endif -#if BUILDFLAG(ENABLE_SCREEN_AI_BROWSERTESTS) && !BUILDFLAG(USE_FAKE_SCREEN_AI) -#define PDF_OCR_INTEGRATION_TEST_ENABLED -#endif - -#if defined(PDF_OCR_INTEGRATION_TEST_ENABLED) -#include "chrome/browser/screen_ai/screen_ai_install_state.h" -#include "components/strings/grit/components_strings.h" -#include "services/screen_ai/public/cpp/utilities.h" -#include "ui/accessibility/accessibility_features.h" -#include "ui/accessibility/ax_features.mojom-features.h" -#endif // defined(PDF_OCR_INTEGRATION_TEST_ENABLED) - namespace { constexpr char kEmbeddedTestServerDirectory[] = "leo"; @@ -310,9 +297,10 @@ IN_PROC_BROWSER_TEST_F(AIChatUIBrowserTest, ExtractionPrintDialog) { #endif // BUILDFLAG(IS_WIN) && defined(ADDRESS_SANITIZER) && // defined(ARCH_CPU_64_BITS) IN_PROC_BROWSER_TEST_F(AIChatUIBrowserTest, MAYBE_PrintPreviewFallback) { - // Falls back when there is no regular DOM content - // pdf test will be in UpstreamPDFIntegratoinTest since we enable upstream pdf - // ocr for all pdf files + NavigateURL(https_server_.GetURL("a.com", "/text_in_image.pdf"), false); + FetchPageContent( + FROM_HERE, "This is the way.\n\nI have spoken.\nWherever I Go, He Goes."); + NavigateURL(https_server_.GetURL("a.com", "/canvas.html"), false); FetchPageContent(FROM_HERE, "this is the way"); @@ -395,110 +383,3 @@ IN_PROC_BROWSER_TEST_F(AIChatUIBrowserTest, {{"test query", "test summary"}, {"test query 2", "test summary 2"}})); } - -#if defined(PDF_OCR_INTEGRATION_TEST_ENABLED) -// Test ai chat integration with upstream kPdfOcr -class UpstreamPDFIntegratoinTest : public AIChatUIBrowserTest { - public: - UpstreamPDFIntegratoinTest() - : embedded_test_server_(net::EmbeddedTestServer::TYPE_HTTPS) { - feature_list_.InitWithFeatures( - {::features::kPdfOcr, ::features::kScreenAITestMode, - ax::mojom::features::kScreenAIOCREnabled}, - {}); - } - - void SetUpOnMainThread() override { - AIChatUIBrowserTest::SetUpOnMainThread(); - - content::SetupCrossSiteRedirector(&embedded_test_server_); - - base::FilePath test_data_dir; - test_data_dir = base::PathService::CheckedGet(chrome::DIR_TEST_DATA); - test_data_dir = - test_data_dir.AppendASCII("pdf").AppendASCII("accessibility"); - embedded_test_server_.ServeFilesFromDirectory(test_data_dir); - ASSERT_TRUE(embedded_test_server_.Start()); - - screen_ai::ScreenAIInstallState::GetInstance()->SetComponentFolder( - screen_ai::GetComponentBinaryPathForTests().DirName()); - } - - void SetUpCommandLine(base::CommandLine* command_line) override { - AIChatUIBrowserTest::SetUpCommandLine(command_line); - command_line->RemoveSwitch(network::switches::kHostResolverRules); - } - - void FetchPageContentAndWaitForOCR( - const base::Location& location, - std::string_view expected_text, - int ocr_status_message_id = IDS_PDF_OCR_COMPLETED) { - SCOPED_TRACE(testing::Message() << location.ToString()); - base::RunLoop run_loop; - chat_tab_helper_->GetPageContent( - base::BindLambdaForTesting( - [&run_loop, expected_text](std::string text, bool is_video, - std::string invalidation_token) { - EXPECT_FALSE(is_video); - EXPECT_EQ(text, expected_text); - run_loop.Quit(); - }), - ""); - auto inner_web_contents = GetActiveWebContents()->GetInnerWebContents(); - ASSERT_TRUE(inner_web_contents.size() == 1); - WaitForAccessibilityTreeToContainNodeWithName( - inner_web_contents[0], l10n_util::GetStringUTF8(ocr_status_message_id)); - run_loop.Run(); - } - - protected: - net::test_server::EmbeddedTestServer embedded_test_server_; - base::test::ScopedFeatureList feature_list_; -}; - -IN_PROC_BROWSER_TEST_F(UpstreamPDFIntegratoinTest, PDFOcr) { - // Single paragraph - NavigateURL( - embedded_test_server_.GetURL("a.com", "/hello-world-in-image.pdf")); - FetchPageContentAndWaitForOCR(FROM_HERE, "Hello, world!"); - - // Multiple paragraphs - NavigateURL(embedded_test_server_.GetURL( - "a.com", "/inaccessible-text-in-three-page.pdf")); - FetchPageContentAndWaitForOCR(FROM_HERE, - "Hello, world!\n" - "Paragraph 1 on Page 2\n" - "Paragraph 2 on Page 2\n" - "Paragraph 1 on Page 3\n" - "Paragraph 2 on Page 3"); -} - -#if BUILDFLAG(ENABLE_TEXT_RECOGNITION) && BUILDFLAG(ENABLE_PRINT_PREVIEW) -IN_PROC_BROWSER_TEST_F(UpstreamPDFIntegratoinTest, - PDFOcrFailed_PrintPreviewFallback) { - // Fallback to print preview extraction when upstream pdf ocr has empty - // results. - NavigateURL(https_server_.GetURL("b.com", "/text_in_image.pdf"), false); - FetchPageContentAndWaitForOCR( - FROM_HERE, "This is the way.\n\nI have spoken.\nWherever I Go, He Goes.", - IDS_PDF_OCR_NO_RESULT); -} -#endif // BUILDFLAG(ENABLE_TEXT_RECOGNITION) && BUILDFLAG(ENABLE_PRINT_PREVIEW) - -IN_PROC_BROWSER_TEST_F(UpstreamPDFIntegratoinTest, PDFOcrWithBlankPage) { - // Single paragraph - NavigateURL( - https_server_.GetURL("a.com", "/hello-world-in-image-has-blank.pdf")); - FetchPageContentAndWaitForOCR(FROM_HERE, "Hello, world!"); - - // Multiple paragraphs - NavigateURL(https_server_.GetURL( - "a.com", "/inaccessible-text-in-three-page-has-blank.pdf")); - FetchPageContentAndWaitForOCR(FROM_HERE, - "Hello, world!\n\n" - "Paragraph 1 on Page 2\n" - "Paragraph 2 on Page 2\n\n" - "Paragraph 1 on Page 3\n" - "Paragraph 2 on Page 3"); -} -#endif // defined(PDF_OCR_INTEGRATION_TEST_ENABLED) diff --git a/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.cc b/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.cc new file mode 100644 index 000000000000..5d1490e2d4dd --- /dev/null +++ b/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.cc @@ -0,0 +1,16 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#include "chrome/browser/screen_ai/screen_ai_install_state.h" + +#define ShouldInstall ShouldInstall_ChromiumImpl +#include "src/chrome/browser/screen_ai/screen_ai_install_state.cc" +#undef ShouldInstall + +namespace screen_ai { +bool ScreenAIInstallState::ShouldInstall(PrefService* local_state) { + return false; +} +} // namespace screen_ai diff --git a/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.h b/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.h new file mode 100644 index 000000000000..22d61067b929 --- /dev/null +++ b/chromium_src/chrome/browser/screen_ai/screen_ai_install_state.h @@ -0,0 +1,17 @@ +/* Copyright (c) 2024 The Brave Authors. All rights reserved. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef BRAVE_CHROMIUM_SRC_CHROME_BROWSER_SCREEN_AI_SCREEN_AI_INSTALL_STATE_H_ +#define BRAVE_CHROMIUM_SRC_CHROME_BROWSER_SCREEN_AI_SCREEN_AI_INSTALL_STATE_H_ + +#define ShouldInstall \ + ShouldInstall_ChromiumImpl(PrefService* local_state); \ + static bool ShouldInstall + +#include "src/chrome/browser/screen_ai/screen_ai_install_state.h" // IWYU pragma: export + +#undef ShouldInstall + +#endif // BRAVE_CHROMIUM_SRC_CHROME_BROWSER_SCREEN_AI_SCREEN_AI_INSTALL_STATE_H_ diff --git a/chromium_src/pdf/pdfium/DEPS b/chromium_src/pdf/pdfium/DEPS deleted file mode 100644 index e59caad0df40..000000000000 --- a/chromium_src/pdf/pdfium/DEPS +++ /dev/null @@ -1,3 +0,0 @@ -include_rules = [ - "+third_party/pdfium/public", -] diff --git a/chromium_src/pdf/pdfium/pdfium_page.cc b/chromium_src/pdf/pdfium/pdfium_page.cc deleted file mode 100644 index c7d5f5ed0bc2..000000000000 --- a/chromium_src/pdf/pdfium/pdfium_page.cc +++ /dev/null @@ -1,17 +0,0 @@ -/* Copyright (c) 2024 The Brave Authors. All rights reserved. - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this file, - * You can obtain one at https://mozilla.org/MPL/2.0/. */ - -#include "third_party/pdfium/public/fpdf_catalog.h" - -#define FPDFCatalog_IsTagged \ - false; \ - if (!page_object_count) { \ - images_.push_back(Image()); \ - return; \ - } \ - is_tagged = FPDFCatalog_IsTagged - -#include "src/pdf/pdfium/pdfium_page.cc" -#undef FPDFCatalog_IsTagged diff --git a/components/ai_chat/content/browser/ai_chat_tab_helper.cc b/components/ai_chat/content/browser/ai_chat_tab_helper.cc index bcead876887e..e0720927357b 100644 --- a/components/ai_chat/content/browser/ai_chat_tab_helper.cc +++ b/components/ai_chat/content/browser/ai_chat_tab_helper.cc @@ -52,9 +52,7 @@ void AIChatTabHelper::PDFA11yInfoLoadObserver::AccessibilityEventReceived( for (const auto& node : update.nodes) { const auto& node_name = node.GetStringAttribute(ax::mojom::StringAttribute::kName); - if (node_name == l10n_util::GetStringUTF8(IDS_PDF_LOADED_TO_A11Y_TREE) || - node_name == l10n_util::GetStringUTF8(IDS_PDF_OCR_COMPLETED) || - node_name == l10n_util::GetStringUTF8(IDS_PDF_OCR_NO_RESULT)) { + if (node_name == l10n_util::GetStringUTF8(IDS_PDF_LOADED_TO_A11Y_TREE)) { // features::kUseMoveNotCopyInMergeTreeUpdate updates a11y tree after // `AccessibilityEventReceived` so we cannot assume changes are // reflected upon receiving updates. @@ -251,20 +249,6 @@ void AIChatTabHelper::GetPageContent(GetPageContentCallback callback, bool is_pdf = IsPdf(web_contents()); if (is_pdf && !is_pdf_a11y_info_loaded_) { SetPendingGetContentCallback(std::move(callback)); - // PdfAccessibilityTree::AccessibilityModeChanged handles kPDFOcr changes - // with |always_load_or_reload_accessibility| is true - if (inner_web_contents_) { - auto current_mode = inner_web_contents_->GetAccessibilityMode(); - if (!current_mode.has_mode(ui::AXMode::kPDFOcr)) { - current_mode |= ui::AXMode::kPDFOcr; - scoped_accessibility_mode_ = - content::BrowserAccessibilityState::GetInstance() - ->CreateScopedModeForWebContents(inner_web_contents_, - current_mode); - } - pdf_load_observer_ = - std::make_unique(inner_web_contents_, this); - } // Manually check when pdf extraction requested so we don't always rely on // a11y events to prevent stale callback. It can happens during background // pdf tab loading or bug in upstream kPdfOCR that an empty page in pdf will diff --git a/components/ai_chat/content/browser/ai_chat_tab_helper.h b/components/ai_chat/content/browser/ai_chat_tab_helper.h index 78b4a2bd3968..3817840e6a83 100644 --- a/components/ai_chat/content/browser/ai_chat_tab_helper.h +++ b/components/ai_chat/content/browser/ai_chat_tab_helper.h @@ -29,7 +29,7 @@ class ScopedAccessibilityMode; } class AIChatUIBrowserTest; -class UpstreamPDFIntegratoinTest; + namespace ai_chat { class AIChatMetrics; @@ -110,7 +110,6 @@ class AIChatTabHelper : public content::WebContentsObserver, friend class content::WebContentsUserData; friend class ::AIChatUIBrowserTest; friend class AIChatTabHelperUnitTest; - friend class ::UpstreamPDFIntegratoinTest; // To observe PDF InnerWebContents for "Finished loading PDF" event which // means PDF content has been loaded to an accessibility tree. diff --git a/components/ai_chat/content/browser/pdf_utils.cc b/components/ai_chat/content/browser/pdf_utils.cc index b2439d5f16a3..94c28018416e 100644 --- a/components/ai_chat/content/browser/pdf_utils.cc +++ b/components/ai_chat/content/browser/pdf_utils.cc @@ -35,64 +35,6 @@ ui::AXNode* FindPdfRoot(const ui::AXNode* start_node) { return nullptr; } -#if BUILDFLAG(ENABLE_PDF) -ui::AXNode* FindOcrResultBeginBanner(const ui::AXNode* start_node) { - if (!start_node) { - return nullptr; - } - for (const auto& node : start_node->GetAllChildren()) { - if (node->GetStringAttribute(ax::mojom::StringAttribute::kName) == - l10n_util::GetStringUTF8(IDS_PDF_OCR_RESULT_BEGIN) && - node->GetParent()->GetRole() == ax::mojom::Role::kBanner) { - return node->GetParent(); - } - ui::AXNode* result = FindOcrResultBeginBanner(node); - if (result) { - return result; - } - } - return nullptr; -} - -bool IsOcrResultEndContentInfo(const ui::AXNode* node) { - if (!node) { - return false; - } - if (node->GetRole() == ax::mojom::Role::kContentInfo && - node->GetChildCount() == 1u && - node->GetFirstChild()->GetStringAttribute( - ax::mojom::StringAttribute::kName) == - l10n_util::GetStringUTF8(IDS_PDF_OCR_RESULT_END)) { - return true; - } - return false; -} -#endif - -std::optional GetOcrResult(const ui::AXNode* start_node) { -#if BUILDFLAG(ENABLE_PDF) - if (!start_node) { - return std::nullopt; - } - const auto* ocr_result_begin_banner = FindOcrResultBeginBanner(start_node); - if (!ocr_result_begin_banner) { - return std::nullopt; - } - std::string result; - ui::AXNode* node = ocr_result_begin_banner->GetNextSibling(); - while (node && !IsOcrResultEndContentInfo(node)) { - base::StrAppend( - &result, - {node->GetTextContentUTF8(), - IsOcrResultEndContentInfo(node->GetNextSibling()) ? "" : "\n"}); - node = node->GetNextSibling(); - } - return result; -#else - return std::nullopt; -#endif -} - } // namespace bool IsPdf(content::WebContents* web_contents) { @@ -132,9 +74,7 @@ bool IsPdfLoaded(const ui::AXNode* pdf_root) { const auto& name = pdf_root->GetChildAtIndex(0)->GetChildAtIndex(0)->GetStringAttribute( ax::mojom::StringAttribute::kName); - if (name == l10n_util::GetStringUTF8(IDS_PDF_OCR_IN_PROGRESS) || - name == l10n_util::GetStringUTF8(IDS_PDF_LOADING_TO_A11Y_TREE) || - name == l10n_util::GetStringUTF8(IDS_PDF_OCR_FEATURE_ALERT)) { + if (name == l10n_util::GetStringUTF8(IDS_PDF_LOADING_TO_A11Y_TREE)) { return false; } #endif @@ -153,10 +93,8 @@ std::string ExtractPdfContent(const ui::AXNode* pdf_root) { for (auto it = children.cbegin() + 1; it != children.cend(); ++it) { const ui::AXNode* node = *it; if (node->GetRole() == ax::mojom::Role::kRegion) { - auto ocr_result = GetOcrResult(node); - base::StrAppend(&pdf_content, - {ocr_result ? *ocr_result : node->GetTextContentUTF8(), - it == children.cend() - 1 ? "" : "\n"}); + base::StrAppend(&pdf_content, {node->GetTextContentUTF8(), + it == children.cend() - 1 ? "" : "\n"}); } } return pdf_content; diff --git a/test/data/leo/hello-world-in-image-has-blank.pdf b/test/data/leo/hello-world-in-image-has-blank.pdf deleted file mode 100644 index 53d1ddbf7c5c..000000000000 Binary files a/test/data/leo/hello-world-in-image-has-blank.pdf and /dev/null differ diff --git a/test/data/leo/inaccessible-text-in-three-page-has-blank.pdf b/test/data/leo/inaccessible-text-in-three-page-has-blank.pdf deleted file mode 100644 index 471d9f259638..000000000000 Binary files a/test/data/leo/inaccessible-text-in-three-page-has-blank.pdf and /dev/null differ