From c5d9c578d0a8168cbad60e5752d0dbc06c1454ce Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 14 Nov 2024 11:27:51 +0000 Subject: [PATCH 01/22] Upgrade AWS and Mongo Nuget packages --- Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj index 8dc01b7..64e5217 100644 --- a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj +++ b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj @@ -14,14 +14,14 @@ - - + + - + From eef793acdbeab70128e3df080886a216a5e16310 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 14 Nov 2024 12:04:30 +0000 Subject: [PATCH 02/22] Upgraded all projects from .Net 6 to .Net 8. Also upgraded MSTest related Nuget packages. --- .../NationalArchives.Taxonomy.Batch.UnitTests.csproj | 8 ++++---- .../NationalArchives.Taxonomy.Batch.Update.Elastic.csproj | 2 +- .../NationalArchives.Taxonomy.Batch.csproj | 4 ++-- .../Properties/PublishProfiles/FolderProfile.pubxml | 2 +- .../NationalArchives.Taxonomy.Common.UnitTests.csproj | 6 +++--- TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj | 4 ++-- .../NationalArchives.Taxonomy.CLI.UnitTests.csproj | 8 ++++---- tna.taxonomy.api/tna.taxonomy.api.csproj | 4 ++-- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj b/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj index 96c5ab6..9e78b48 100644 --- a/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj +++ b/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj @@ -1,14 +1,14 @@  - net6.0 + net8.0 false - - - + + + diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj index 1066b92..ea38947 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 true diff --git a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj index 788eb17..1e8b785 100644 --- a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj +++ b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj @@ -2,8 +2,8 @@ Exe - net6.0 - win10-x64 + net8.0 + win-x64 diff --git a/NationalArchives.Taxonomy.Batch/Properties/PublishProfiles/FolderProfile.pubxml b/NationalArchives.Taxonomy.Batch/Properties/PublishProfiles/FolderProfile.pubxml index aa76ae4..5600817 100644 --- a/NationalArchives.Taxonomy.Batch/Properties/PublishProfiles/FolderProfile.pubxml +++ b/NationalArchives.Taxonomy.Batch/Properties/PublishProfiles/FolderProfile.pubxml @@ -9,7 +9,7 @@ https://go.microsoft.com/fwlink/?LinkID=208121. C:\Projects\taxonomy-elastic\NationalArchives.Taxonomy.Batch\bin\Release\net5.0\publish\win-x64\fw-dep FileSystem net5.0 - win10-x64 + win-x64 false False False diff --git a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj index 7b0e1fc..22be38d 100644 --- a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj +++ b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj @@ -8,9 +8,9 @@ - - - + + + diff --git a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj index addc3d3..52a161c 100644 --- a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj +++ b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj @@ -2,10 +2,10 @@ Exe - net6.0 + net8.0 NationalArchives.Taxonomy.CLI NationalArchives.Taxonomy.CLI - win10-x64;win7-x86 + win-x64;win-x86 diff --git a/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj b/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj index 305615a..617b89e 100644 --- a/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj +++ b/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj @@ -1,14 +1,14 @@  - net6.0 + net8.0 false - - - + + + diff --git a/tna.taxonomy.api/tna.taxonomy.api.csproj b/tna.taxonomy.api/tna.taxonomy.api.csproj index 7514f2d..2e5e2b8 100644 --- a/tna.taxonomy.api/tna.taxonomy.api.csproj +++ b/tna.taxonomy.api/tna.taxonomy.api.csproj @@ -1,7 +1,7 @@ - + - net6.0 + net8.0 enable enable From 39bde28ec51349ec339d19dc4f31b320a182560d Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 14 Nov 2024 13:29:02 +0000 Subject: [PATCH 03/22] Upgraed automapper package versions and removed automapper DI package as DI now incorporated in Automapper itself. --- .../NationalArchives.Taxonomy.Batch.csproj | 2 +- .../NationalArchives.Taxonomy.Common.UnitTests.csproj | 2 +- Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj | 4 ++-- TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj | 2 +- tna.taxonomy.api/tna.taxonomy.api.csproj | 3 +-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj index 1e8b785..566af2d 100644 --- a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj +++ b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj @@ -11,7 +11,7 @@ - + diff --git a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj index 22be38d..9292c40 100644 --- a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj +++ b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 false diff --git a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj index 64e5217..7c2f790 100644 --- a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj +++ b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj @@ -1,7 +1,7 @@  - netstandard2.0 + netstandard2.1 NationalArchives.Taxonomy.Common NationalArchives.Taxonomy.Common @@ -13,7 +13,7 @@ - + diff --git a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj index 52a161c..dd2dd61 100644 --- a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj +++ b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj @@ -9,7 +9,7 @@ - + diff --git a/tna.taxonomy.api/tna.taxonomy.api.csproj b/tna.taxonomy.api/tna.taxonomy.api.csproj index 2e5e2b8..e5282b4 100644 --- a/tna.taxonomy.api/tna.taxonomy.api.csproj +++ b/tna.taxonomy.api/tna.taxonomy.api.csproj @@ -7,8 +7,7 @@ - - + From 5cb8200eabcb5f4d8e0ca4977dfa45238ee8976c Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 14 Nov 2024 17:11:05 +0000 Subject: [PATCH 04/22] Various further refactoring related to move from Elastic to Open Search --- .../NationalArchives.ActiveMQ.csproj | 4 +- .../DailyUpdateTest.cs | 2 +- ...s.Taxonomy.Batch.Update.OpenSearch.csproj} | 0 ...ateParams.cs => OpenSearchUpdateParams.cs} | 2 +- .../Program.cs | 45 ++++---- .../Properties/Resources.Designer.cs | 14 +-- .../Properties/Resources.resx | 6 +- ...e.cs => UpdateOpenSearchWindowsService.cs} | 35 +++--- .../appsettings.json | 6 +- .../Producers/FullReindexCategoriser.cs | 6 +- .../Producers/FullReindexIaidProducer.cs | 30 +++--- .../Queues/FullReIndexIaidPcQueue.cs | 2 +- NationalArchives.Taxonomy.Batch/Program.cs | 38 +++---- .../Service/FullReindexService.cs | 30 ++---- .../appsettings.json | 8 +- .../ElasticCategoriserRepositoryTest.cs | 16 +-- .../QueryBasedCategoriserServiceTest.cs | 2 +- .../BusinessObjects/ISearchResult.cs | 2 +- .../BusinessObjects/InformationAssetView.cs | 2 +- .../MessageProcessingEventArgs.cs | 12 +-- ...rams.cs => OpenSearchAssetBrowseParams.cs} | 2 +- .../CategoryFromOpenSearch.cs} | 10 +- .../CategoryWithOpenSearchQuery.cs} | 6 +- .../{Elastic => OpenSearch}/ISearchCommon.cs | 0 .../{Elastic => OpenSearch}/IStringID.cs | 2 +- .../OpenSearchRecordAssetView.cs} | 9 +- .../{Elastic => OpenSearch}/Query.cs | 2 +- .../Repository/Common/CategorySource.cs | 2 +- .../Elastic/AbstractElasticRespository.cs | 19 ---- .../Elastic/AbstractOpenSearchRespository.cs | 19 ++++ .../Connection/ConnectionSettingsProvider.cs | 24 ++--- ...onnectElastic.cs => IConnectOpenSearch.cs} | 19 ++-- ....cs => IOpenSearchConnectionParameters.cs} | 4 +- ...cConnection.cs => OpenSearchConnection.cs} | 73 +++++++------ ...s.cs => OpenSearchConnectionParameters.cs} | 16 +-- ...hParameters.cs => OpenSearchParameters.cs} | 4 +- .../Elastic/ElasticSearchParamsBuilder.cs | 52 --------- .../Repository/Elastic/IIAViewRepository.cs | 4 +- ...s => IOpenSearchIAViewUpdateRepository.cs} | 4 +- ...ory.cs => OpenSearchCategoryRepository.cs} | 24 ++--- ...nstants.cs => OpenSearchFieldConstants.cs} | 4 +- ...itory.cs => OpenSearchIAViewRepository.cs} | 58 +++++----- ...cs => OpenSearchIAViewUpdateRepository.cs} | 37 ++++--- .../Elastic/OpenSearchParamsBuilder.cs | 52 +++++++++ .../Lucene/InMemoryCategoriserRepository.cs | 22 ++-- .../Repository/Lucene/LuceneHelperTools.cs | 10 +- Taxonomy.Common/Helpers/IAListFactory.cs | 6 +- Taxonomy.Common/Mappers/MappingProfile.cs | 10 +- .../NationalArchives.Taxonomy.Common.csproj | 8 +- .../Impl/InformationAssetViewService.cs | 6 +- .../Impl/QueryBasedCategoriserService.cs | 2 +- ...cService.cs => UpdateOpenSearchService.cs} | 47 ++++---- .../Interface/IInformationAssetViewService.cs | 4 +- ...Service.cs => IUpdateOpenSearchService.cs} | 4 +- Taxonomy.Common/TaxonomyErrorType.cs | 8 +- TaxonomyCLI/Program.cs | 32 +++--- TaxonomyCLI/appsettings.json | 6 +- ds-discovery-opensearch-taxonomy.sln | 23 ++-- .../Controllers/TaxonomyController.cs | 2 +- tna.taxonomy.api/Program.cs | 32 +++--- .../Properties/Resources.Designer.cs | 63 +++++++++++ tna.taxonomy.api/Properties/Resources.resx | 101 ++++++++++++++++++ tna.taxonomy.api/appsettings.json | 14 +-- tna.taxonomy.api/tna.taxonomy.api.csproj | 17 ++- 64 files changed, 645 insertions(+), 480 deletions(-) rename NationalArchives.Taxonomy.Batch.Update.Elastic/{NationalArchives.Taxonomy.Batch.Update.Elastic.csproj => NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj} (100%) rename NationalArchives.Taxonomy.Batch.Update.Elastic/{ElasticUpdateParams.cs => OpenSearchUpdateParams.cs} (82%) rename NationalArchives.Taxonomy.Batch.Update.Elastic/Service/{UpdateElasticWindowsService.cs => UpdateOpenSearchWindowsService.cs} (56%) rename Taxonomy.Common/BusinessObjects/{ElasticAssetBrowseParams.cs => OpenSearchAssetBrowseParams.cs} (90%) rename Taxonomy.Common/DataObjects/{Elastic/CategoryFromElastic.cs => OpenSearch/CategoryFromOpenSearch.cs} (53%) rename Taxonomy.Common/DataObjects/{Elastic/CategoryWithElasticQuery.cs => OpenSearch/CategoryWithOpenSearchQuery.cs} (63%) rename Taxonomy.Common/DataObjects/{Elastic => OpenSearch}/ISearchCommon.cs (100%) rename Taxonomy.Common/DataObjects/{Elastic => OpenSearch}/IStringID.cs (69%) rename Taxonomy.Common/DataObjects/{Elastic/ElasticRecordAssetView.cs => OpenSearch/OpenSearchRecordAssetView.cs} (92%) rename Taxonomy.Common/DataObjects/{Elastic => OpenSearch}/Query.cs (90%) delete mode 100644 Taxonomy.Common/Domain/Repository/Elastic/AbstractElasticRespository.cs create mode 100644 Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs rename Taxonomy.Common/Domain/Repository/Elastic/Connection/{IConnectElastic.cs => IConnectOpenSearch.cs} (50%) rename Taxonomy.Common/Domain/Repository/Elastic/Connection/{IElasticConnectionParameters.cs => IOpenSearchConnectionParameters.cs} (61%) rename Taxonomy.Common/Domain/Repository/Elastic/Connection/{ElasticConnection.cs => OpenSearchConnection.cs} (83%) rename Taxonomy.Common/Domain/Repository/Elastic/Connection/{ElasticConnectionParameters.cs => OpenSearchConnectionParameters.cs} (64%) rename Taxonomy.Common/Domain/Repository/Elastic/Connection/{ElasticSearchParameters.cs => OpenSearchParameters.cs} (97%) delete mode 100644 Taxonomy.Common/Domain/Repository/Elastic/ElasticSearchParamsBuilder.cs rename Taxonomy.Common/Domain/Repository/Elastic/{IElasticIAViewUpdateRepository.cs => IOpenSearchIAViewUpdateRepository.cs} (73%) rename Taxonomy.Common/Domain/Repository/Elastic/{ElasticCategoryRepository.cs => OpenSearchCategoryRepository.cs} (74%) rename Taxonomy.Common/Domain/Repository/Elastic/{ElasticFieldConstants.cs => OpenSearchFieldConstants.cs} (88%) rename Taxonomy.Common/Domain/Repository/Elastic/{ElasticIAViewRepository.cs => OpenSearchIAViewRepository.cs} (72%) rename Taxonomy.Common/Domain/Repository/Elastic/{ElasticIAViewUpdateRepository.cs => OpenSearchIAViewUpdateRepository.cs} (60%) create mode 100644 Taxonomy.Common/Domain/Repository/Elastic/OpenSearchParamsBuilder.cs rename Taxonomy.Common/Service/Impl/{UpdateElasticService.cs => UpdateOpenSearchService.cs} (79%) rename Taxonomy.Common/Service/Interface/{IUpdateElasticService.cs => IUpdateOpenSearchService.cs} (54%) create mode 100644 tna.taxonomy.api/Properties/Resources.Designer.cs create mode 100644 tna.taxonomy.api/Properties/Resources.resx diff --git a/NationalArchives.ActiveMQ/NationalArchives.ActiveMQ.csproj b/NationalArchives.ActiveMQ/NationalArchives.ActiveMQ.csproj index 1a376d6..93e2f2a 100644 --- a/NationalArchives.ActiveMQ/NationalArchives.ActiveMQ.csproj +++ b/NationalArchives.ActiveMQ/NationalArchives.ActiveMQ.csproj @@ -18,8 +18,8 @@ v1.4: Compilation changes from x64 to Any CPU for compatibiity with Elastic Sear v1.5: Updated Nuget Packages v1.6 Reverted Apache.NMS and Apache.NMS.ActiveMQ from v2.0 to 1.8 due to assembly ;pad/resolution/impelementation issues. https://tna.visualstudio.com/Discovery/_versionControl?path=$/Discovery/Projects/ActiveMQ - ActiveMQ queue elastic taxonomy - "Copyright © The National Archives 2021" + ActiveMQ queue Open Search taxonomy + "Copyright © The National Archives 2024" true 1.6.0.0 diff --git a/NationalArchives.Taxonomy.Batch.UnitTests/DailyUpdateTest.cs b/NationalArchives.Taxonomy.Batch.UnitTests/DailyUpdateTest.cs index f5bd32d..b5282b0 100644 --- a/NationalArchives.Taxonomy.Batch.UnitTests/DailyUpdateTest.cs +++ b/NationalArchives.Taxonomy.Batch.UnitTests/DailyUpdateTest.cs @@ -21,7 +21,7 @@ public class UnitTest1 ISourceIaidInputQueueConsumer _iaidConsumer = Substitute.For(); - IUpdateElasticService _updateElasticService = Substitute.For(); + IUpdateOpenSearchService _updateOpenSearchService = Substitute.For(); //[TestMethod] diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj similarity index 100% rename from NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.Elastic.csproj rename to NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/ElasticUpdateParams.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs similarity index 82% rename from NationalArchives.Taxonomy.Batch.Update.Elastic/ElasticUpdateParams.cs rename to NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs index eb4848d..a4a6f20 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/ElasticUpdateParams.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs @@ -4,7 +4,7 @@ namespace NationalArchives.Taxonomy.Batch { - internal sealed class ElasticUpdateParams + internal sealed class OpenSearchUpdateParams { public uint BulkUpdateBatchSize { get; set; } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index b97928e..70ac3f5 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -1,25 +1,22 @@ -using AutoMapper; -using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.EventLog; -using NationalArchives.Taxonomy.Batch.Update.Elastic.Service; +using NationalArchives.Taxonomy.Batch.Update.OpenSearch.Service; using NationalArchives.Taxonomy.Common.Domain.Queue; -using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; -using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Service.Impl; using NationalArchives.Taxonomy.Common.Service.Interface; using NLog.Extensions.Logging; using System; using System.Text; -namespace NationalArchives.Taxonomy.Batch.Update.Elastic +namespace NationalArchives.Taxonomy.Batch.Update.OpenSearch { class Program { - private const string EVENT_SOURCE = "Taxonomy Elastic Search Update"; + private const string EVENT_SOURCE = "Taxonomy Open Search Update"; public static void Main(string[] args) { @@ -33,14 +30,14 @@ public static void Main(string[] args) using (var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole().AddDebug().AddEventSourceLogger().AddEventLog(eventLogSettings))) { serviceLogger = loggerFactory.CreateLogger(); - serviceLogger.LogInformation("Starting the taxonomy elastic update service."); + serviceLogger.LogInformation("Starting the taxonomy Open Search update service."); } CreateHostBuilder(args).Build().Run(); } catch (Exception e) { - StringBuilder sb = new StringBuilder("An error occurred whilst initialising or running the taxonomy elastic search update:"); + StringBuilder sb = new StringBuilder("An error occurred whilst initialising or running the taxonomy Open Search update:"); sb.Append("\n"); sb.Append("Error: " + e.Message); sb.Append("\n"); @@ -75,12 +72,12 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic { IConfiguration config = context.Configuration; - var elasticUpdateParams = config.GetSection(nameof(ElasticUpdateParams)).Get(); + var openSearchUpdateParams = config.GetSection(nameof(OpenSearchUpdateParams)).Get(); var stagingQueueParams = config.GetSection(nameof(UpdateStagingQueueParams)).Get(); - var updateElasticConnParams = config.GetSection(nameof(UpdateElasticConnectionParameters)).Get(); + var updateOpenSearchConnParams = config.GetSection(nameof(UpdateOpenSearchConnectionParameters)).Get(); - services.AddSingleton(typeof(ILogger), typeof(Logger)); - services.AddSingleton(typeof(ILogger), typeof(Logger)); + services.AddSingleton(typeof(ILogger), typeof(Logger)); + services.AddSingleton(typeof(ILogger), typeof(Logger)); //Staging queue for updates. Needs to be a singleton or we get multiple consumers! @@ -89,24 +86,24 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic return new ActiveMqUpdateReceiver(stagingQueueParams); }); - services.AddTransient((ctx) => + services.AddTransient((ctx) => { - return new ElasticIAViewUpdateRepository(updateElasticConnParams); + return new OpenSearchIAViewUpdateRepository(updateOpenSearchConnParams); }); - services.AddSingleton((ctx) => + services.AddSingleton((ctx) => { - uint bulkUpdateBatchSize = elasticUpdateParams.BulkUpdateBatchSize; - uint queueFetchWaitTime = elasticUpdateParams.QueueFetchSleepTime; - Console.WriteLine($"Using a batch size of {bulkUpdateBatchSize} and a queue fetch interval of {queueFetchWaitTime} sceonds for Elastic bulk updates."); + uint bulkUpdateBatchSize = openSearchUpdateParams.BulkUpdateBatchSize; + uint queueFetchWaitTime = openSearchUpdateParams.QueueFetchSleepTime; + Console.WriteLine($"Using a batch size of {bulkUpdateBatchSize} and a queue fetch interval of {queueFetchWaitTime} sceonds for Open Search bulk updates."); IUpdateStagingQueueReceiver interimQueue = ctx.GetRequiredService(); - IElasticIAViewUpdateRepository updateRepo = ctx.GetRequiredService(); - ILogger logger = ctx.GetRequiredService>(); - return new UpdateElasticService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime); + IOpenSearchIAViewUpdateRepository updateRepo = ctx.GetRequiredService(); + ILogger logger = ctx.GetRequiredService>(); + return new UpdateOpenSearchService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime); }); - services.AddHostedService(); + services.AddHostedService(); ServiceProvider provider = services.BuildServiceProvider(); } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.Designer.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.Designer.cs index 2f1fb3b..fadf336 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.Designer.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace NationalArchives.Taxonomy.Batch.Update.Elastic.Properties { +namespace NationalArchives.Taxonomy.Batch.Update.OpenSearch.Properties { using System; @@ -19,7 +19,7 @@ namespace NationalArchives.Taxonomy.Batch.Update.Elastic.Properties { // class via a tool like ResGen or Visual Studio. // To add or remove a member, edit your .ResX file then rerun ResGen // with the /str option, or rebuild your VS project. - [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")] + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "17.0.0.0")] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] internal class Resources { @@ -39,7 +39,7 @@ internal Resources() { internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("NationalArchives.Taxonomy.Batch.Update.Elastic.Properties.Resources", typeof(Resources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("NationalArchives.Taxonomy.Batch.Update.OpenSearch.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; @@ -61,16 +61,16 @@ internal Resources() { } /// - /// Looks up a localized string similar to Flushing remaining categorisation updates from the queue to the Elastic Search database.. + /// Looks up a localized string similar to Flushing remaining categorisation updates from the queue to the Open Search database.. /// - internal static string FlushRemaingUpdatesToElasticMsg { + internal static string FlushRemaingUpdatesToOpenSearchMsg { get { - return ResourceManager.GetString("FlushRemaingUpdatesToElasticMsg", resourceCulture); + return ResourceManager.GetString("FlushRemaingUpdatesToOpenSearchMsg", resourceCulture); } } /// - /// Looks up a localized string similar to The Elastic update worker reports that no more categorisation results are available on the queue. The service will now stop.. + /// Looks up a localized string similar to The Open Search update worker reports that no more categorisation results are available on the queue. The service will now stop.. /// internal static string NoMoreCategorisationResultsOnQueueMsg { get { diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.resx b/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.resx index c37b4fd..bfddc89 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.resx +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/Resources.resx @@ -117,10 +117,10 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - - Flushing remaining categorisation updates from the queue to the Elastic Search database. + + Flushing remaining categorisation updates from the queue to the Open Search database. - The Elastic update worker reports that no more categorisation results are available on the queue. The service will now stop. + The Open Search update worker reports that no more categorisation results are available on the queue. The service will now stop. \ No newline at end of file diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateElasticWindowsService.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs similarity index 56% rename from NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateElasticWindowsService.cs rename to NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs index 7127ebb..d8a2375 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateElasticWindowsService.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs @@ -6,17 +6,17 @@ using System.Threading; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Batch.Update.Elastic.Service +namespace NationalArchives.Taxonomy.Batch.Update.OpenSearch.Service { - internal class UpdateElasticWindowsService : BackgroundService + internal class UpdateOpenSearchWindowsService : BackgroundService { private readonly IHostApplicationLifetime _hostApplicationLifetime; - private readonly IUpdateElasticService _updateElasticService; - private readonly ILogger _logger; + private readonly IUpdateOpenSearchService _updateOpenSearchService; + private readonly ILogger _logger; - public UpdateElasticWindowsService(IUpdateElasticService updateElasticService, ILogger logger, IHostApplicationLifetime hostApplicationLifetime) + public UpdateOpenSearchWindowsService(IUpdateOpenSearchService updateOpenSearchService, ILogger logger, IHostApplicationLifetime hostApplicationLifetime) { - _updateElasticService = updateElasticService; + _updateOpenSearchService = updateOpenSearchService; _logger = logger; _hostApplicationLifetime = hostApplicationLifetime; } @@ -26,9 +26,9 @@ public override Task StopAsync(CancellationToken cancellationToken) try { - _logger.LogInformation(Properties.Resources.FlushRemaingUpdatesToElasticMsg); - _updateElasticService.Flush(); - _logger.LogInformation("Stopping the Elastic Update Windows Service."); + _logger.LogInformation(Properties.Resources.FlushRemaingUpdatesToOpenSearchMsg); + _updateOpenSearchService.Flush(); + _logger.LogInformation("Stopping the Open Search Update Windows Service."); base.StopAsync(cancellationToken); @@ -47,7 +47,7 @@ protected override Task ExecuteAsync(CancellationToken stoppingToken) _hostApplicationLifetime.ApplicationStopping.Register(OnStopping); _hostApplicationLifetime.ApplicationStopped.Register(OnStopped); - Task updateTask = Task.Run(() => _updateElasticService.Init()); + Task updateTask = Task.Run(() => _updateOpenSearchService.Init()); TaskAwaiter awaiter = updateTask.GetAwaiter(); awaiter.OnCompleted(() => OutputCompletion(updateTask)); @@ -59,34 +59,33 @@ private void OutputCompletion(Task task) { if (task.IsCanceled) { - _logger.LogInformation("Elastic search update service is stopping due to cancellation."); + _logger.LogInformation("Open Search update service is stopping due to cancellation."); } else if (task.IsFaulted) { - _logger.LogError("The Elastic search update service is stopping due to an exception."); - + _logger.LogError("The Open Search update service is stopping due to an exception."); } else { - _logger.LogInformation("Processing of Elastic search updates completed."); - _logger.LogInformation("The Elastic search updates service is stopping."); + _logger.LogInformation("Processing of Open Search updates completed."); + _logger.LogInformation("The Open Search updates service is stopping."); } _hostApplicationLifetime.StopApplication(); } private void OnStarted() { - _logger.LogInformation("Taxonomy Elastic Search Updates Service has started."); + _logger.LogInformation("Taxonomy Open Search Updates Service has started."); } private void OnStopping() { - _logger.LogInformation("Taxonomy Elastic Search Updates Service is stopping."); + _logger.LogInformation("Taxonomy Open Search Updates Service is stopping."); } private void OnStopped() { - _logger.LogInformation("Taxonomy Elastic Search Updates Service is stopped."); + _logger.LogInformation("Taxonomy Open Search Updates Service is stopped."); } } } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index 4c8aab8..7b6b90e 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -17,16 +17,16 @@ "IncludeScopes": true } }, - "ElasticUpdateParams": { + "OpenSearchUpdateParams": { "BulkUpdateBatchSize": "1000", "QueueFetchSleepTime": "500" }, - "UpdateElasticConnectionParameters": { + "UpdateOpenSearchConnectionParameters": { "Scheme": "https", "Host": "vpc-", "Port": "443", "IndexDatabase": "discovery_records", - "ElasticAwsParams": { + "OpenSearchAwsParams": { "UseAwsConnection": "true", "Region": "eu-west-2", "RoleArn": "arn:aws:iam::968803923593:role/elastic_update_role", diff --git a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs index 16a4b6c..a449b01 100644 --- a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs +++ b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs @@ -166,7 +166,7 @@ private void ProcessMultiBatch(uint batchSize, CancellationToken token) if (_taxonomyExceptionThreshold > 0 && _taxonomyExceptionCount >= _taxonomyExceptionThreshold) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); } else { @@ -181,7 +181,7 @@ private void ProcessMultiBatch(uint batchSize, CancellationToken token) if (_taxonomyExceptionThreshold > 0 && _taxonomyExceptionCount >= _taxonomyExceptionThreshold) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); } else { @@ -331,7 +331,7 @@ private void ProcessSingleBatch(CancellationToken token) if(_taxonomyExceptionThreshold > 0 && _taxonomyExceptionCount >= _taxonomyExceptionThreshold) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, $"Processing cannot continue as the configured taxonomy exception count of {_taxonomyExceptionThreshold} has been reached.", ex); } } catch (Exception ex) diff --git a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexIaidProducer.cs b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexIaidProducer.cs index 5f53e2d..d183867 100644 --- a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexIaidProducer.cs +++ b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexIaidProducer.cs @@ -21,19 +21,19 @@ internal class FullReindexIaidProducer : IIAIDProducer private FullReIndexIaidPcQueue _pcQueue; private readonly IInformationAssetViewService _iaViewService; private readonly ILogger _logger; - private ElasticAssetBrowseParams _elasticAssetBrowseParams; + private OpenSearchAssetBrowseParams _openSearchAssetBrowseParams; internal EventHandler ProcessingCompleted; internal EventHandler FatalException; private int _totalCount; - public FullReindexIaidProducer(FullReIndexIaidPcQueue pcQueue, IInformationAssetViewService iaViewService, ElasticAssetBrowseParams elasticAssetFetchParams,ILogger logger) + public FullReindexIaidProducer(FullReIndexIaidPcQueue pcQueue, IInformationAssetViewService iaViewService, OpenSearchAssetBrowseParams openSearchAssetFetchParams,ILogger logger) { _pcQueue = pcQueue; _iaViewService = iaViewService; _logger = logger; - _elasticAssetBrowseParams = elasticAssetFetchParams; + _openSearchAssetBrowseParams = openSearchAssetFetchParams; } public async Task InitAsync(CancellationToken token) @@ -49,18 +49,18 @@ public async Task InitAsync(CancellationToken token) int totalScrollResults = 0; - InformationAssetScrollList informationAssetsList = _iaViewService.BrowseAllDocReferences(browseParams: _elasticAssetBrowseParams, scrollId: null); + InformationAssetScrollList informationAssetsList = _iaViewService.BrowseAllDocReferences(browseParams: _openSearchAssetBrowseParams, scrollId: null); string scrollId = informationAssetsList.ScrollId; if (String.IsNullOrEmpty(scrollId)) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "Error scrolling IAIDS in Elastic. Could not retrieve Elastic Scroll ID"); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_SCROLL_EXCEPTION, "Error scrolling IAIDS in Open Search. Could not retrieve Open Search Scroll ID"); } if (informationAssetsList.ScrollResults.Count == 0) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "No results received on initial scroll request."); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_SCROLL_EXCEPTION, "No results received on initial scroll request."); } else { @@ -71,7 +71,7 @@ public async Task InitAsync(CancellationToken token) int scrollCount = 1; _logger.LogInformation($"Scroll iteration {scrollCount}. Results this iteration: {informationAssetsList.ScrollResults.Count}. Total results so far: {totalScrollResults}"); - if (_elasticAssetBrowseParams.LogFetchedAssetIds) + if (_openSearchAssetBrowseParams.LogFetchedAssetIds) { LogScrollResults(informationAssetsList, scrollCount); } @@ -83,7 +83,7 @@ public async Task InitAsync(CancellationToken token) { scrollCount++; - informationAssetsList = _iaViewService.BrowseAllDocReferences(browseParams: _elasticAssetBrowseParams, scrollId: scrollId); + informationAssetsList = _iaViewService.BrowseAllDocReferences(browseParams: _openSearchAssetBrowseParams, scrollId: scrollId); if (informationAssetsList.ScrollResults?.Count > 0) { @@ -91,7 +91,7 @@ public async Task InitAsync(CancellationToken token) if (String.IsNullOrEmpty(informationAssetsList.ScrollId)) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "error during scrolling IAIDs from Elastic - could not retrieve scroll ID."); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_SCROLL_EXCEPTION, "error during scrolling IAIDs from Open Search - could not retrieve scroll ID."); } else { @@ -100,7 +100,7 @@ public async Task InitAsync(CancellationToken token) } _logger.LogInformation($"Scroll iteration {scrollCount}. Results this iteration: {informationAssetsList.ScrollResults.Count}. Total results so far: {totalScrollResults}"); - if (_elasticAssetBrowseParams.LogFetchedAssetIds) + if (_openSearchAssetBrowseParams.LogFetchedAssetIds) { LogScrollResults(informationAssetsList, scrollCount); } @@ -108,14 +108,14 @@ public async Task InitAsync(CancellationToken token) } else { - StringBuilder sb = new StringBuilder($"No asset identifier results received from Elastic Search scroll cursor on scroll count {scrollCount}. Total results so far {totalScrollResults}. "); + StringBuilder sb = new StringBuilder($"No asset identifier results received from Open Search scroll cursor on scroll count {scrollCount}. Total results so far {totalScrollResults}. "); if (token.IsCancellationRequested) { - sb.Append("Cancellation of Elastic Search scroll was requested."); + sb.Append("Cancellation of Open Search scroll was requested."); } else { - sb.Append("Cancellation of Elastic Search scroll was not requested."); + sb.Append("Cancellation of Open Search scroll was not requested."); } _logger.LogInformation(sb.ToString()); } @@ -142,12 +142,12 @@ public async Task InitAsync(CancellationToken token) if (!token.IsCancellationRequested) { - string completionMessage = "Fetch of IAIDs from Elastic search to processing queue completed."; + string completionMessage = "Fetch of IAIDs from Open Search to processing queue completed."; _logger.LogInformation(completionMessage); } else { - string cancelMessage = "Fetch of source information assets from Elastic search for categorisation was cancelled by the caller."; + string cancelMessage = "Fetch of source information assets from Open Search for categorisation was cancelled by the caller."; throw new OperationCanceledException(cancelMessage, token); } } diff --git a/NationalArchives.Taxonomy.Batch/FullReindex/Queues/FullReIndexIaidPcQueue.cs b/NationalArchives.Taxonomy.Batch/FullReindex/Queues/FullReIndexIaidPcQueue.cs index f9c1c66..03c6bc1 100644 --- a/NationalArchives.Taxonomy.Batch/FullReindex/Queues/FullReIndexIaidPcQueue.cs +++ b/NationalArchives.Taxonomy.Batch/FullReindex/Queues/FullReIndexIaidPcQueue.cs @@ -4,7 +4,7 @@ namespace NationalArchives.Taxonomy.Batch.FullReindex.Queues { /// - /// Can be used if required to store IAIDs from a producer, e.g. an Elastic scroll cursor, if we need to fetch them this way + /// Can be used if required to store IAIDs from a producer, e.g. an Open Search scroll cursor, if we need to fetch them this way /// instead of using an existing queue. /// /// diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index 5e2bb94..ca7cc26 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -11,10 +11,10 @@ using NationalArchives.Taxonomy.Batch.FullReindex.Queues; using NationalArchives.Taxonomy.Batch.Service; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Queue; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Lucene; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; using NationalArchives.Taxonomy.Common.Service; @@ -121,7 +121,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic } - DiscoverySearchElasticConnectionParameters discoverySearchElasticConnParams = config.GetSection("DiscoveryElasticParams").Get(); + DiscoveryOpenSearchConnectionParameters discoveryOpenSearchConnParams = config.GetSection("DiscoveryOpenSearchParams").Get(); services.AddSingleton(categorisationParams); @@ -135,10 +135,10 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton(updateStagingQueueParams); // IAIDs connection info - services.AddTransient>((ctx) => + services.AddTransient>((ctx) => { - IConnectElastic recordAssetsElasticConnection = new ElasticConnection(discoverySearchElasticConnParams); - return recordAssetsElasticConnection; + IConnectOpenSearch recordAssetsOpenSearchConnection = new OpenSearchConnection(discoveryOpenSearchConnParams); + return recordAssetsOpenSearchConnection; }); @@ -147,33 +147,33 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); + IConnectOpenSearch openSearchConnectionInfo = ctx.GetRequiredService>(); LuceneHelperTools luceneHelperTools = ctx.GetRequiredService(); - ElasticIAViewRepository iaRepo = new ElasticIAViewRepository(elasticConnectionInfo, luceneHelperTools, mapper); + OpenSearchIAViewRepository iaRepo = new OpenSearchIAViewRepository(openSearchConnectionInfo, luceneHelperTools, mapper); return iaRepo; }); CategorySource categorySource = (CategorySource)Enum.Parse(typeof(CategorySource), config.GetValue("CategorySource")); - // Get the categories form either Mongo or Elastic + // Get the categories form either Mongo or open Search switch(categorySource) { - case CategorySource.Elastic: + case CategorySource.OpenSearch: // Categories connection info - services.AddTransient>((ctx) => + services.AddTransient>((ctx) => { - CategoryDataElasticConnectionParameters categoryDataElasticConnParams = config.GetSection("CategoryElasticParams").Get(); - IConnectElastic categoriesElasticConnection = new ElasticConnection(categoryDataElasticConnParams); - return categoriesElasticConnection; + CategoryDataOpenSearchConnectionParameters categoryDataOpenSearchConnParams = config.GetSection("CategoryOpenSearchParams").Get(); + IConnectOpenSearch categoriesOpenSearchConnection = new OpenSearchConnection(categoryDataOpenSearchConnParams); + return categoriesOpenSearchConnection; }); // category list repo using category connection info. - services.AddTransient((ctx) => + services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); - ElasticCategoryRepository categoryRepo = new ElasticCategoryRepository(elasticConnectionInfo, mapper); + IConnectOpenSearch openSearchConnectionInfo = ctx.GetRequiredService>(); + OpenSearchCategoryRepository categoryRepo = new OpenSearchCategoryRepository(openSearchConnectionInfo, mapper); return categoryRepo; }); @@ -277,7 +277,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic return new FullReIndexIaidPcQueue(qparams.MaxSize); }); // => FullReindexService - var eElasticAssetBrowseParams = config.GetSection("ElasticAssetFetchParams").Get(); + var openSearchAssetBrowseParams = config.GetSection("OpenSearchAssetFetchParams").Get(); services.AddSingleton((ctx) => { @@ -285,7 +285,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic var logger = ctx.GetRequiredService>(); var reindexQueue = ctx.GetRequiredService>(); - return new FullReindexIaidProducer(reindexQueue, iaViewService, eElasticAssetBrowseParams, logger); + return new FullReindexIaidProducer(reindexQueue, iaViewService, openSearchAssetBrowseParams, logger); }); services.AddHostedService(); diff --git a/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs b/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs index d269f15..3132a85 100644 --- a/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs +++ b/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs @@ -21,7 +21,7 @@ internal class FullReindexService : BackgroundService private readonly ICategoriserService _categoriserService; private readonly ILogger _logger; private readonly IHostApplicationLifetime _hostApplicationLifetime; - private readonly FullReindexIaidProducer _iaidsFromElasticProducer; + private readonly FullReindexIaidProducer _iaidsFromOpenSearchProducer; private readonly FullReIndexIaidPcQueue _reindexIaidQueue; private readonly IUpdateStagingQueueSender _updateStagingQueueSender; @@ -52,7 +52,7 @@ public FullReindexService(FullReindexIaidProducer iaidProducer, _categoriserStartDelay = catParams.CategoriserStartDelay; _logIndividualCategorisationResults = catParams.LogEachCategorisationResult; - _iaidsFromElasticProducer = iaidProducer; + _iaidsFromOpenSearchProducer = iaidProducer; _reindexIaidQueue = reindexIaidQueue; @@ -67,8 +67,6 @@ public FullReindexService(FullReindexIaidProducer iaidProducer, _hostApplicationLifetime = hostApplicationLifetime; } - - public override Task StopAsync(CancellationToken cancellationToken) { _updateStagingQueueSender?.Dispose(); @@ -83,9 +81,9 @@ public override void Dispose() base.Dispose(); } - private void FullReindexElasticFetch_ProcessingCompleted(object sender, MessageProcessingEventArgs e) + private void FullReindexOpenSearchFetch_ProcessingCompleted(object sender, MessageProcessingEventArgs e) { - _logger.LogInformation("Elastic search fetch completed."); + _logger.LogInformation("Open Search fetch completed."); } public void IncrementCategorisationCount(int newResults) @@ -119,14 +117,14 @@ protected override Task ExecuteAsync(CancellationToken stoppingToken) Action updateQueueProgress = (i, j) => _logger.LogInformation($"{i} assets processed and taxonomy results send to the external update queue. There are currently {j} taxonomy results in the internal update queue."); - Task iaidProducerTask = _iaidsFromElasticProducer.InitAsync(stoppingToken); + Task iaidProducerTask = _iaidsFromOpenSearchProducer.InitAsync(stoppingToken); tasks.Add(iaidProducerTask); TaskAwaiter iaidFetchawaiter = iaidProducerTask.GetAwaiter(); iaidFetchawaiter.OnCompleted(() => { if (iaidProducerTask.Exception != null) { - string msg = "Error retrieving Information assets from Elastic Search. Please check the logs for errors."; + string msg = "Error retrieving Information assets from Open Search. Please check the logs for errors."; _StopMessage = msg; _logger.LogError(msg); _logger.LogError(iaidProducerTask.Exception.Message); @@ -140,11 +138,11 @@ protected override Task ExecuteAsync(CancellationToken stoppingToken) } else if (iaidProducerTask.IsCanceled) { - _logger.LogInformation("Fetch of information asset IDs from Elastic search was cancelled. Please check the logs for errors."); + _logger.LogInformation("Fetch of information asset IDs from Open Search was cancelled. Please check the logs for errors."); } else { - _logger.LogInformation($"Completed fetch of asset identifiers from Elastic Search. {_iaidsFromElasticProducer.TotalIdentifiersFetched} IAIDs were fetched, current queue size is {_iaidsFromElasticProducer.CurrentQueueSize}"); + _logger.LogInformation($"Completed fetch of asset identifiers from Open Search. {_iaidsFromOpenSearchProducer.TotalIdentifiersFetched} IAIDs were fetched, current queue size is {_iaidsFromOpenSearchProducer.CurrentQueueSize}"); } } @@ -166,10 +164,8 @@ void LogInnerExceptions(IEnumerable innerExceptions) } - // wait a while to allow some iaids to be fetched from Elastic (set this via appsettings as desired). - // NOw we can start the staging queue, followed - - + // wait a while to allow some iaids to be fetched from Open Search (set this via appsettings as desired). + // Now we can start the staging queue, followed if (_categoriserStartDelay >= 0) { @@ -220,8 +216,6 @@ void LogInnerExceptions(IEnumerable innerExceptions) } ); - - // Start the full reindex categorisation _categorisationStartTime = DateTime.Now; @@ -277,12 +271,11 @@ void LogInnerExceptions(IEnumerable innerExceptions) return Task.CompletedTask; } - catch (Exception e) { _logger.LogCritical(e.Message); _logger.LogCritical("Fatal exception occured during processing, please check the logs for details."); - _logger.LogCritical("Cancelling document feed from elastic search following exception"); + _logger.LogCritical("Cancelling document feed from Open Search following exception"); StopApplication(); return Task.FromException(e); } @@ -312,7 +305,6 @@ private void StopApplication() } } - private void OnStarted() { _logger.LogInformation("Taxonomy Generator Full Reindex Service has started."); diff --git a/NationalArchives.Taxonomy.Batch/appsettings.json b/NationalArchives.Taxonomy.Batch/appsettings.json index 38bf3f4..ab073f5 100644 --- a/NationalArchives.Taxonomy.Batch/appsettings.json +++ b/NationalArchives.Taxonomy.Batch/appsettings.json @@ -17,12 +17,12 @@ "IncludeScopes": true } }, - "DiscoveryElasticParams": { + "DiscoveryOpenSearchParams": { "Scheme": "https", "Host": "vpc-", "Port": "443", "IndexDatabase": "discovery_records", - "ElasticAwsParams": { + "OpenSearchAwsParams": { "UseAwsConnection": "true", "Region": "eu-west-2", "RoleArn": "arn:aws:iam::xxx:role/elastic_update_role", @@ -31,7 +31,7 @@ //we've to move away from using this and use the profile to access AWS' } }, - "ElasticAssetFetchParams": { + "OpenSearchAssetFetchParams": { "PageSize": "1000", "ScrollTimeout": "10000", "HeldByCode": "TNA", @@ -75,7 +75,7 @@ "CategoryMongoParams": { "ConnectionString": "mongodb+srv://XX.mongodb.net", "DatabaseName": "taxonomy", - "CollectionName": "elastic-categories" + "CollectionName": "categories" }, "CategorySource": "Mongo", "OperationMode": "Daily_Update" // "Daily_Update" or "Full_Reindex" diff --git a/NationalArchives.Taxonomy.Common.UnitTests/ElasticCategoriserRepositoryTest.cs b/NationalArchives.Taxonomy.Common.UnitTests/ElasticCategoriserRepositoryTest.cs index f32f9f0..4d18396 100644 --- a/NationalArchives.Taxonomy.Common.UnitTests/ElasticCategoriserRepositoryTest.cs +++ b/NationalArchives.Taxonomy.Common.UnitTests/ElasticCategoriserRepositoryTest.cs @@ -48,11 +48,11 @@ public void FindRelevantCategoriesForDocument_TextGen() var luceneHelperTools = new LuceneHelperTools(defaultField, analyzer, _queryFields); - var elasticCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser :analyzer, luceneHelperTools: luceneHelperTools, logger: null); + var openSearchCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser :analyzer, luceneHelperTools: luceneHelperTools, logger: null); InformationAssetView iaView = GetInformationAssetView(); - IList categorisationResults = elasticCategoryRepository.FindRelevantCategoriesForDocument(iaView, PopulateCategories()); + IList categorisationResults = openSearchCategoryRepository.FindRelevantCategoriesForDocument(iaView, PopulateCategories()); Assert.IsNotNull(categorisationResults); Trace.WriteLine(categorisationResults.Count + " caetgories found to match."); @@ -69,11 +69,11 @@ public void FindRelevantCategoriesForDocument_TextNoCasNoPunc() var luceneHelperTools = new LuceneHelperTools(defaultField, analyzer, _queryFields); - var elasticCategoryRepository = new InMemoryCategoriserRepository( analyzer, luceneHelperTools: luceneHelperTools, logger: null); + var openSearchCategoryRepository = new InMemoryCategoriserRepository( analyzer, luceneHelperTools: luceneHelperTools, logger: null); InformationAssetView iaView = GetInformationAssetView(); - IList categorisationResults = elasticCategoryRepository.FindRelevantCategoriesForDocument(iaView, PopulateCategories()); + IList categorisationResults = openSearchCategoryRepository.FindRelevantCategoriesForDocument(iaView, PopulateCategories()); Assert.IsNotNull(categorisationResults); Trace.WriteLine(categorisationResults.Count + " categories found to match."); @@ -93,11 +93,11 @@ public void FindRelevantCategoriesForDocument_TextCasNoPunc() var luceneHelperTools = new LuceneHelperTools(defaultField, analyzer, _queryFields); - var elasticCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser: analyzer, luceneHelperTools: luceneHelperTools, logger: null); + var openSearchCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser: analyzer, luceneHelperTools: luceneHelperTools, logger: null); InformationAssetView iaView = GetInformationAssetView(); - IList categorisationResults = elasticCategoryRepository.FindRelevantCategoriesForDocument(iaView, _listOfCategories); + IList categorisationResults = openSearchCategoryRepository.FindRelevantCategoriesForDocument(iaView, _listOfCategories); Assert.IsNotNull(categorisationResults); Assert.IsFalse(categorisationResults.Count == 0); @@ -116,11 +116,11 @@ public void FindRelevantCategoriesForDocument_TextCasPunc() var luceneHelperTools = new LuceneHelperTools(defaultField, analyzer, _queryFields); - var elasticCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser: analyzer, luceneHelperTools: luceneHelperTools, logger: null); + var openSearchCategoryRepository = new InMemoryCategoriserRepository(iaViewIndexAnalyser: analyzer, luceneHelperTools: luceneHelperTools, logger: null); InformationAssetView iaView = GetInformationAssetView(); - IList categorisationResults = elasticCategoryRepository.FindRelevantCategoriesForDocument(iaView, _listOfCategories); + IList categorisationResults = openSearchCategoryRepository.FindRelevantCategoriesForDocument(iaView, _listOfCategories); Assert.IsNotNull(categorisationResults); Assert.IsFalse(categorisationResults.Count == 0); diff --git a/NationalArchives.Taxonomy.Common.UnitTests/QueryBasedCategoriserServiceTest.cs b/NationalArchives.Taxonomy.Common.UnitTests/QueryBasedCategoriserServiceTest.cs index f772b3c..45427f3 100644 --- a/NationalArchives.Taxonomy.Common.UnitTests/QueryBasedCategoriserServiceTest.cs +++ b/NationalArchives.Taxonomy.Common.UnitTests/QueryBasedCategoriserServiceTest.cs @@ -2,7 +2,7 @@ using NationalArchives.Taxonomy.Common.BusinessObjects; using NationalArchives.Taxonomy.Common.Domain; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Service; using NSubstitute; using System.Collections.Generic; diff --git a/Taxonomy.Common/BusinessObjects/ISearchResult.cs b/Taxonomy.Common/BusinessObjects/ISearchResult.cs index 0062aae..947ad3e 100644 --- a/Taxonomy.Common/BusinessObjects/ISearchResult.cs +++ b/Taxonomy.Common/BusinessObjects/ISearchResult.cs @@ -1,4 +1,4 @@ -namespace NationalArchives.Taxonomy.Common.DataObjects.Elastic +namespace NationalArchives.Taxonomy.Common.DataObjects.OpenSearch { public interface ISearchResult { diff --git a/Taxonomy.Common/BusinessObjects/InformationAssetView.cs b/Taxonomy.Common/BusinessObjects/InformationAssetView.cs index 01d3a07..af5e36b 100644 --- a/Taxonomy.Common/BusinessObjects/InformationAssetView.cs +++ b/Taxonomy.Common/BusinessObjects/InformationAssetView.cs @@ -1,4 +1,4 @@ -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; namespace NationalArchives.Taxonomy.Common.Domain { diff --git a/Taxonomy.Common/BusinessObjects/MessageProcessingEventArgs.cs b/Taxonomy.Common/BusinessObjects/MessageProcessingEventArgs.cs index 727b429..31654d4 100644 --- a/Taxonomy.Common/BusinessObjects/MessageProcessingEventArgs.cs +++ b/Taxonomy.Common/BusinessObjects/MessageProcessingEventArgs.cs @@ -2,26 +2,26 @@ using System.Collections.Generic; using System.Text; -namespace NationalArchives.Taxonomy.Common.DataObjects.Elastic +namespace NationalArchives.Taxonomy.Common.DataObjects.OpenSearch { - public sealed class ElasticUpdateEventArgs : EventArgs + public sealed class OpenSearchUpdateEventArgs : EventArgs { - public ElasticUpdateEventArgs() + public OpenSearchUpdateEventArgs() { } - public ElasticUpdateEventArgs(string msg, ElasticUpdateEventType eventType ) + public OpenSearchUpdateEventArgs(string msg, OpenSearchUpdateEventType eventType ) { Message = msg; EventType = eventType; } public string Message { get;} - public ElasticUpdateEventType EventType { get;} + public OpenSearchUpdateEventType EventType { get;} } - public enum ElasticUpdateEventType + public enum OpenSearchUpdateEventType { FATAL_EXCEPTION, PROCESSING_COMPLETE diff --git a/Taxonomy.Common/BusinessObjects/ElasticAssetBrowseParams.cs b/Taxonomy.Common/BusinessObjects/OpenSearchAssetBrowseParams.cs similarity index 90% rename from Taxonomy.Common/BusinessObjects/ElasticAssetBrowseParams.cs rename to Taxonomy.Common/BusinessObjects/OpenSearchAssetBrowseParams.cs index 4d0a101..574bea1 100644 --- a/Taxonomy.Common/BusinessObjects/ElasticAssetBrowseParams.cs +++ b/Taxonomy.Common/BusinessObjects/OpenSearchAssetBrowseParams.cs @@ -5,7 +5,7 @@ namespace NationalArchives.Taxonomy.Common.BusinessObjects { - public class ElasticAssetBrowseParams + public class OpenSearchAssetBrowseParams { public int PageSize { get; set; } public int ScrollTimeout { get; set; } diff --git a/Taxonomy.Common/DataObjects/Elastic/CategoryFromElastic.cs b/Taxonomy.Common/DataObjects/OpenSearch/CategoryFromOpenSearch.cs similarity index 53% rename from Taxonomy.Common/DataObjects/Elastic/CategoryFromElastic.cs rename to Taxonomy.Common/DataObjects/OpenSearch/CategoryFromOpenSearch.cs index d68cd5d..f2b61b4 100644 --- a/Taxonomy.Common/DataObjects/Elastic/CategoryFromElastic.cs +++ b/Taxonomy.Common/DataObjects/OpenSearch/CategoryFromOpenSearch.cs @@ -1,12 +1,6 @@ -using System; -using System.Collections.Generic; -using System.Text; -using Nest; -using Elasticsearch.Net; - -namespace NationalArchives.Taxonomy.Common.DataObjects.Elastic +namespace NationalArchives.Taxonomy.Common.DataObjects.OpenSearch { - public class CategoryFromElastic + public class CategoryFromOpenSearch { public string ID { get; set; } // i.e. Ciaid public string query_text { get; set; } diff --git a/Taxonomy.Common/DataObjects/Elastic/CategoryWithElasticQuery.cs b/Taxonomy.Common/DataObjects/OpenSearch/CategoryWithOpenSearchQuery.cs similarity index 63% rename from Taxonomy.Common/DataObjects/Elastic/CategoryWithElasticQuery.cs rename to Taxonomy.Common/DataObjects/OpenSearch/CategoryWithOpenSearchQuery.cs index 5b60e58..5880893 100644 --- a/Taxonomy.Common/DataObjects/Elastic/CategoryWithElasticQuery.cs +++ b/Taxonomy.Common/DataObjects/OpenSearch/CategoryWithOpenSearchQuery.cs @@ -1,13 +1,13 @@ using NationalArchives.Taxonomy.Common.BusinessObjects; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public class CategoryWithElasticQuery : Category + public class CategoryWithOpenSearchQuery : Category { private Query _parsedQuery; //TODO Replace Query with appropriate Elastic Search class... - public CategoryWithElasticQuery(Category category, Query parsedQuery) : base() + public CategoryWithOpenSearchQuery(Category category, Query parsedQuery) : base() { this._parsedQuery = parsedQuery; } diff --git a/Taxonomy.Common/DataObjects/Elastic/ISearchCommon.cs b/Taxonomy.Common/DataObjects/OpenSearch/ISearchCommon.cs similarity index 100% rename from Taxonomy.Common/DataObjects/Elastic/ISearchCommon.cs rename to Taxonomy.Common/DataObjects/OpenSearch/ISearchCommon.cs diff --git a/Taxonomy.Common/DataObjects/Elastic/IStringID.cs b/Taxonomy.Common/DataObjects/OpenSearch/IStringID.cs similarity index 69% rename from Taxonomy.Common/DataObjects/Elastic/IStringID.cs rename to Taxonomy.Common/DataObjects/OpenSearch/IStringID.cs index 02b5ecb..28f3d51 100644 --- a/Taxonomy.Common/DataObjects/Elastic/IStringID.cs +++ b/Taxonomy.Common/DataObjects/OpenSearch/IStringID.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Text; -namespace NationalArchives.Taxonomy.Common.DataObjects.Elastic +namespace NationalArchives.Taxonomy.Common.DataObjects.OpenSearch { public interface IStringID { diff --git a/Taxonomy.Common/DataObjects/Elastic/ElasticRecordAssetView.cs b/Taxonomy.Common/DataObjects/OpenSearch/OpenSearchRecordAssetView.cs similarity index 92% rename from Taxonomy.Common/DataObjects/Elastic/ElasticRecordAssetView.cs rename to Taxonomy.Common/DataObjects/OpenSearch/OpenSearchRecordAssetView.cs index c8a97c9..577f1c6 100644 --- a/Taxonomy.Common/DataObjects/Elastic/ElasticRecordAssetView.cs +++ b/Taxonomy.Common/DataObjects/OpenSearch/OpenSearchRecordAssetView.cs @@ -1,12 +1,13 @@ -using Nest; +//using Nest; +using OpenSearch.Client; using System; using System.Collections.Generic; using System.Text; -namespace NationalArchives.Taxonomy.Common.DataObjects.Elastic +namespace NationalArchives.Taxonomy.Common.DataObjects.OpenSearch { - [ElasticsearchType(Name = "recordassetview")] - public class ElasticRecordAssetView + //[OpenSearchType(Name = "recordassetview")] + public class OpenSearchRecordAssetView { public string ID { get; set; } = string.Empty; public double? Score { get; set; } diff --git a/Taxonomy.Common/DataObjects/Elastic/Query.cs b/Taxonomy.Common/DataObjects/OpenSearch/Query.cs similarity index 90% rename from Taxonomy.Common/DataObjects/Elastic/Query.cs rename to Taxonomy.Common/DataObjects/OpenSearch/Query.cs index 6823de3..0c93330 100644 --- a/Taxonomy.Common/DataObjects/Elastic/Query.cs +++ b/Taxonomy.Common/DataObjects/OpenSearch/Query.cs @@ -1,4 +1,4 @@ -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { public class Query { diff --git a/Taxonomy.Common/Domain/Repository/Common/CategorySource.cs b/Taxonomy.Common/Domain/Repository/Common/CategorySource.cs index 8a9c6ab..2ec0f8a 100644 --- a/Taxonomy.Common/Domain/Repository/Common/CategorySource.cs +++ b/Taxonomy.Common/Domain/Repository/Common/CategorySource.cs @@ -3,6 +3,6 @@ public enum CategorySource { Mongo, - Elastic + OpenSearch } } \ No newline at end of file diff --git a/Taxonomy.Common/Domain/Repository/Elastic/AbstractElasticRespository.cs b/Taxonomy.Common/Domain/Repository/Elastic/AbstractElasticRespository.cs deleted file mode 100644 index df27ce0..0000000 --- a/Taxonomy.Common/Domain/Repository/Elastic/AbstractElasticRespository.cs +++ /dev/null @@ -1,19 +0,0 @@ -using AutoMapper; -using Nest; -using System; -using System.Collections.Generic; -using System.Text; - -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic -{ - public class AbstractElasticRespository where T : class - { - protected readonly IConnectElastic _elasticConnection; - protected readonly IMapper _mapper; - public AbstractElasticRespository(IConnectElastic elasticConnection, IMapper mapper) - { - _elasticConnection = elasticConnection; - _mapper = mapper; - } - } -} diff --git a/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs b/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs new file mode 100644 index 0000000..d717a9e --- /dev/null +++ b/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs @@ -0,0 +1,19 @@ +using AutoMapper; +using Nest; +using System; +using System.Collections.Generic; +using System.Text; + +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch +{ + public class AbstractOpenSearchRespository where T : class + { + protected readonly IConnectOpenSearch _openSearchConnection; + protected readonly IMapper _mapper; + public AbstractOpenSearchRespository(IConnectOpenSearch openSearchConnection, IMapper mapper) + { + _openSearchConnection = openSearchConnection; + _mapper = mapper; + } + } +} diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ConnectionSettingsProvider.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/ConnectionSettingsProvider.cs index b4564f4..ef1bad4 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ConnectionSettingsProvider.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/ConnectionSettingsProvider.cs @@ -1,15 +1,15 @@ using Amazon; using Amazon.Runtime; -using Elasticsearch.Net; -using Elasticsearch.Net.Aws; -using Nest; -using Nest.JsonNetSerializer; using Newtonsoft.Json; +using OpenSearch.Client; +using OpenSearch.Client.JsonNetSerializer; +using OpenSearch.Net; +using OpenSearch.Net.Auth.AwsSigV4; using System; using System.Configuration; using System.Linq; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { internal static class ConnectionSettingsProvider { @@ -20,15 +20,15 @@ internal static class ConnectionSettingsProvider private static ConnectionSettings _connectionSettings; - public static ConnectionSettings GetConnectionSettings(ElasticConnectionParameters cParams) + public static ConnectionSettings GetConnectionSettings(OpenSearchConnectionParameters cParams) { - if (cParams.ElasticAwsParams?.UseAwsConnection ?? false) + if (cParams.OpenSearchAwsParams?.UseAwsConnection ?? false) { - string awsAccessKey = cParams.ElasticAwsParams.AccessKey; - string awsSecretKey = cParams.ElasticAwsParams.SecretKey; - string awsRoleArn = cParams.ElasticAwsParams.RoleArn; - string strRegion = cParams.ElasticAwsParams.Region; + string awsAccessKey = cParams.OpenSearchAwsParams.AccessKey; + string awsSecretKey = cParams.OpenSearchAwsParams.SecretKey; + string awsRoleArn = cParams.OpenSearchAwsParams.RoleArn; + string strRegion = cParams.OpenSearchAwsParams.Region; if (new[] { awsAccessKey, awsSecretKey, awsRoleArn, strRegion }.Any(s => String.IsNullOrWhiteSpace(s))) { @@ -44,7 +44,7 @@ public static ConnectionSettings GetConnectionSettings(ElasticConnectionParamete throw new ConfigurationErrorsException(AWS_UNKNOWN_REGION); } - using (IConnection httpConnection = new AwsHttpConnection(aWSAssumeRoleCredentials, awsRegion)) + using (IConnection httpConnection = new AwsSigV4HttpConnection(aWSAssumeRoleCredentials, awsRegion)) { using (IConnectionPool pool = new SingleNodeConnectionPool(cParams.Uri)) { diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectElastic.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectOpenSearch.cs similarity index 50% rename from Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectElastic.cs rename to Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectOpenSearch.cs index 740bb8b..41e2bc6 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectElastic.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/IConnectOpenSearch.cs @@ -1,31 +1,32 @@ using NationalArchives.Taxonomy.Common.BusinessObjects; -using Nest; +using OpenSearch.Client; +//using Nest; using System.Collections.Generic; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public interface IConnectElastic where T : class + public interface IConnectOpenSearch where T : class { - ISearchResponse Search(ElasticSearchParameters searchCommand); + ISearchResponse Search(OpenSearchParameters searchCommand); - Task> SearchAsync(ElasticSearchParameters searchCommand); + Task> SearchAsync(OpenSearchParameters searchCommand); IGetResponse Get(string id); Task> GetAsync(string id); - Task MultiGetAsync(string[] ids); + Task MultiGetAsync(string[] ids); - IIndexResponse IndexDocument(T documentToIndex, bool useInmemoryIndex); + IndexResponse IndexDocument(T documentToIndex, bool useInmemoryIndex); IList CategoryMultiSearch(QueryBase baseOrIdsQuery, IList sourceCategories, bool useInMemoryIndex, bool includeScores, int maxConcurrentQueries); - long Count(ElasticSearchParameters countCommand); + long Count(OpenSearchParameters countCommand); Task> ScrollAsync(int scrollTimeout, string scrollId); - Task ClearScroll(string scrollId); + Task ClearScroll(string scrollId); void DeleteDocumentFromIndex(string documentId, bool useInMemoryIndex); } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/IElasticConnectionParameters.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/IOpenSearchConnectionParameters.cs similarity index 61% rename from Taxonomy.Common/Domain/Repository/Elastic/Connection/IElasticConnectionParameters.cs rename to Taxonomy.Common/Domain/Repository/Elastic/Connection/IOpenSearchConnectionParameters.cs index e04e4e7..36de9b0 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/IElasticConnectionParameters.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/IOpenSearchConnectionParameters.cs @@ -1,8 +1,8 @@ using System; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public interface IElasticConnectionParameters + public interface IOpenSearchConnectionParameters { string Host { get; set; } string IndexDatabase { get; set; } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnection.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnection.cs similarity index 83% rename from Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnection.cs rename to Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnection.cs index 80b3171..6e95c28 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnection.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnection.cs @@ -1,6 +1,5 @@ -using Elasticsearch.Net; -using NationalArchives.Taxonomy.Common.BusinessObjects; -using Nest; +using NationalArchives.Taxonomy.Common.BusinessObjects; +using OpenSearch.Client; using System; using System.Collections.Generic; using System.Linq; @@ -8,40 +7,40 @@ using System.Threading; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public class ElasticConnection : IConnectElastic where T : class + public class OpenSearchConnection : IConnectOpenSearch where T : class { - private IElasticClient _elasticClient; - private IElasticClient _elasticClientInMemory; //TODO : Possibly use a separate connection + private IOpenSearchClient _openSearchClient; + private IOpenSearchClient _openSearchClientInMemory; //TODO : Possibly use a separate connection private ISearchRequest _searchRequest; - private ElasticConnectionParameters _parameters; + private OpenSearchConnectionParameters _parameters; private string _inMemoryIndexName; private const string HELD_BY_CODE = "HELD_BY_CODE"; private const string RESPSITORY = "RESPSITORY"; - public ElasticConnection(ElasticConnectionParameters elasticConnectionParameters) + public OpenSearchConnection(OpenSearchConnectionParameters openSearchConnectionParameters) { - _parameters = elasticConnectionParameters; + _parameters = openSearchConnectionParameters; //ServicePointManager.ServerCertificateValidationCallback += (sender, cert, chain, errors) => true; using (ConnectionSettings connectionSettings = ConnectionSettingsProvider.GetConnectionSettings(_parameters)) { //connectionSettings.DisableAutomaticProxyDetection(true); - _elasticClient = new ElasticClient(connectionSettings); + _openSearchClient = new OpenSearchClient(connectionSettings); } } - public long Count(ElasticSearchParameters searchParams) + public long Count(OpenSearchParameters searchParams) { var countRequest = new CountRequest(_parameters.IndexDatabase) { Query = SetupSearchRequest(searchParams) }; - var countResponse = _elasticClient.Count(countRequest); + var countResponse = _openSearchClient.Count(countRequest); if (!countResponse.IsValid) { @@ -55,7 +54,7 @@ public long Count(ElasticSearchParameters searchParams) } - public ISearchResponse Search(ElasticSearchParameters searchParams) + public ISearchResponse Search(OpenSearchParameters searchParams) { _searchRequest = new SearchRequest(_parameters.IndexDatabase) { @@ -64,14 +63,14 @@ public ISearchResponse Search(ElasticSearchParameters searchParams) Sort = SetSortOrder(searchParams.Sort) }; - var searchResponse = _elasticClient.Search(_searchRequest); + var searchResponse = _openSearchClient.Search(_searchRequest); RaiseExceptionIfResponseIsInvalid(searchResponse); return searchResponse; } - public async Task> SearchAsync(ElasticSearchParameters searchParams) + public async Task> SearchAsync(OpenSearchParameters searchParams) { _searchRequest = new SearchRequest(_parameters.IndexDatabase) { @@ -91,7 +90,7 @@ public async Task> SearchAsync(ElasticSearchParameters search try { - var searchResponse = await _elasticClient.SearchAsync(_searchRequest); + var searchResponse = await _openSearchClient.SearchAsync(_searchRequest); RaiseExceptionIfResponseIsInvalid(searchResponse); return searchResponse; } @@ -103,28 +102,28 @@ public async Task> SearchAsync(ElasticSearchParameters search public IGetResponse Get(string id) { - IGetResponse getResponse = _elasticClient.Get(id); + IGetResponse getResponse = _openSearchClient.Get(id); RaiseExceptionIfResponseIsInvalid(getResponse); return getResponse; } public async Task> GetAsync(string id) { - IGetResponse getResponse = await _elasticClient.GetAsync(id); + IGetResponse getResponse = await _openSearchClient.GetAsync(id); RaiseExceptionIfResponseIsInvalid(getResponse); return getResponse; } - public async Task MultiGetAsync(string[] ids) + public async Task MultiGetAsync(string[] ids) { - IMultiGetResponse multiGetResponse = await _elasticClient.MultiGetAsync(m => m.GetMany(ids)); + MultiGetResponse multiGetResponse = await _openSearchClient.MultiGetAsync(m => m.GetMany(ids)); RaiseExceptionIfResponseIsInvalid(multiGetResponse); return multiGetResponse; } - public IIndexResponse IndexDocument(T documentToIndex, bool useInmemoryIndex) + public IndexResponse IndexDocument(T documentToIndex, bool useInmemoryIndex) { - IIndexResponse indexResponse = null; + IndexResponse indexResponse = null; if (useInmemoryIndex) { @@ -140,20 +139,20 @@ public IIndexResponse IndexDocument(T documentToIndex, bool useInmemoryIndex) ICreateIndexRequest inMemoryIndexRequest = new CreateIndexRequest(inMemoryIndexName); inMemoryIndexRequest.Settings = indexSettingsInMemory; - var inMemoryIndex = _elasticClient.CreateIndex(inMemoryIndexRequest); + var inMemoryIndex = _openSearchClient.Indices.Create(inMemoryIndexRequest); _inMemoryIndexName = inMemoryIndex.ApiCall.Uri.PathAndQuery.Substring(1); } - indexResponse = _elasticClientInMemory.IndexDocument(documentToIndex); + indexResponse = _openSearchClientInMemory.IndexDocument(documentToIndex); //Required since the document is not immediately visible by default. // TODO May be possible in one call - see: // https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-refresh.html // But not sure if NEST client supports this? - _elasticClientInMemory.Refresh(inMemoryIndexName); + _openSearchClientInMemory.Indices.Refresh(inMemoryIndexName); } else { - indexResponse = _elasticClient.IndexDocument(documentToIndex); + indexResponse = _openSearchClient.IndexDocument(documentToIndex); } return indexResponse; @@ -165,12 +164,12 @@ public IList CategoryMultiSearch(QueryBase baseOrIdsQuery, // as this may be a legitimate outcome. Possibly add additional get query just to get the coument and throw exception // if no result. string indexToUse = useInMemoryIndex ? _inMemoryIndexName : _parameters.IndexDatabase; - IElasticClient targetElasticClient = useInMemoryIndex ? _elasticClientInMemory : _elasticClient; + IOpenSearchClient targetOpenSearchClient = useInMemoryIndex ? _openSearchClientInMemory : _openSearchClient; MultiSearchRequest multiSearchRequest = BuildMultiSearchRequest(baseOrIdsQuery: baseOrIdsQuery, sourceCategories: sourceCategories, indexName: indexToUse, includeScores: includeScores, maxConcurrent: maxConcurrentQueries); - IMultiSearchResponse topLevelResponse = targetElasticClient.MultiSearch(multiSearchRequest); // TODO : Async? + MultiSearchResponse topLevelResponse = targetOpenSearchClient.MultiSearch(multiSearchRequest); // TODO : Async? if(!topLevelResponse.IsValid) { @@ -207,19 +206,19 @@ public IList CategoryMultiSearch(QueryBase baseOrIdsQuery, public void DeleteDocumentFromIndex(string documentId, bool useInMemoryIndex) { string targetIndex = useInMemoryIndex ? _inMemoryIndexName : _parameters.IndexDatabase; - DeleteRequest request = new DeleteRequest(targetIndex, typeof(T).Name.ToLowerInvariant(), documentId); + DeleteRequest request = new DeleteRequest(targetIndex, documentId); new Thread(() => { // TODO: In high performance scenarios we don't want the caller // to wait for confirmation. But we do need to know about errors. // Log delete error if it occurs and signal the caller. - IDeleteResponse response = _elasticClient.Delete(request); + DeleteResponse response = _openSearchClient.Delete(request); //RaiseExceptionIfResponseIsInvalid(response); } ).Start(); } - private QueryContainer SetupSearchRequest(ElasticSearchParameters esSearchParams) + private QueryContainer SetupSearchRequest(OpenSearchParameters esSearchParams) { var booleanQuery = new BoolQuery(); var mustContainer = new List(); @@ -301,7 +300,7 @@ private IList SetSortOrder(IDictionary sortOpti foreach (var item in sortOptions) { - sort.Add(new SortField + sort.Add(new FieldSort { Field = item.Key, Order = item.Value == ResultsSortOrder.Ascending ? SortOrder.Ascending : SortOrder.Descending @@ -367,7 +366,7 @@ private MultiSearchRequest BuildMultiSearchRequest(QueryBase baseOrIdsQuery, ILi var queryContainer = new QueryContainer[] { baseOrIdsQuery, categoryQuery }; - var searchRequest = new SearchRequest(indexName, Types.All) + var searchRequest = new SearchRequest(indexName) { From = 0, Size = 1, // 1 presumably ? @@ -390,14 +389,14 @@ private MultiSearchRequest BuildMultiSearchRequest(QueryBase baseOrIdsQuery, ILi public async Task> ScrollAsync(int scrollTimeout, string scrollId) { - ISearchResponse loopingResponse = await _elasticClient.ScrollAsync(scrollTimeout, scrollId); + ISearchResponse loopingResponse = await _openSearchClient.ScrollAsync(scrollTimeout, scrollId); return loopingResponse; } - public async Task ClearScroll(string scrollId) + public async Task ClearScroll(string scrollId) { - IClearScrollResponse response = await _elasticClient.ClearScrollAsync(new ClearScrollRequest(scrollId)); + ClearScrollResponse response = await _openSearchClient.ClearScrollAsync(new ClearScrollRequest(scrollId)); return response; } } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnectionParameters.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnectionParameters.cs similarity index 64% rename from Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnectionParameters.cs rename to Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnectionParameters.cs index 913dcd6..e35ba1d 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticConnectionParameters.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchConnectionParameters.cs @@ -1,8 +1,8 @@ using System; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public abstract class ElasticConnectionParameters : IElasticConnectionParameters + public abstract class OpenSearchConnectionParameters : IOpenSearchConnectionParameters { public string Scheme { get; set; } public string Host { get; set; } @@ -28,31 +28,31 @@ public Uri Uri public int RequestTimeout { get; set; } - public ElasticAwsParams ElasticAwsParams { get; set; } + public OpenSearchAwsParams OpenSearchAwsParams { get; set; } } - public sealed class DiscoverySearchElasticConnectionParameters : ElasticConnectionParameters + public sealed class DiscoveryOpenSearchConnectionParameters : OpenSearchConnectionParameters { } - public sealed class CategoryDataElasticConnectionParameters : ElasticConnectionParameters + public sealed class CategoryDataOpenSearchConnectionParameters : OpenSearchConnectionParameters { } - public sealed class CategoriserElasticConnectionParameters : ElasticConnectionParameters + public sealed class CategoriserOpenSearchConnectionParameters : OpenSearchConnectionParameters { } - public sealed class UpdateElasticConnectionParameters : ElasticConnectionParameters + public sealed class UpdateOpenSearchConnectionParameters : OpenSearchConnectionParameters { } - public sealed class ElasticAwsParams + public sealed class OpenSearchAwsParams { public bool UseAwsConnection { get; set; } public string AccessKey { get; set; } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticSearchParameters.cs b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchParameters.cs similarity index 97% rename from Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticSearchParameters.cs rename to Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchParameters.cs index a26388f..89b098c 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/Connection/ElasticSearchParameters.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/Connection/OpenSearchParameters.cs @@ -2,9 +2,9 @@ using System; using System.Collections.Generic; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public class ElasticSearchParameters + public class OpenSearchParameters { private HeldByCode _helByCode; diff --git a/Taxonomy.Common/Domain/Repository/Elastic/ElasticSearchParamsBuilder.cs b/Taxonomy.Common/Domain/Repository/Elastic/ElasticSearchParamsBuilder.cs deleted file mode 100644 index b0416e3..0000000 --- a/Taxonomy.Common/Domain/Repository/Elastic/ElasticSearchParamsBuilder.cs +++ /dev/null @@ -1,52 +0,0 @@ -using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using System.Collections.Generic; - -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic -{ - internal class ElasticSearchParamsBuilder - { - public ElasticSearchParameters GetElasticSearchParametersForScroll(ElasticAssetBrowseParams elasticAssetBrowseParams) - { - ElasticSearchParameters searchParams = new ElasticSearchParameters() - { - Query = string.Empty, // Sends a match all query - PageSize = elasticAssetBrowseParams.PageSize, - Scroll = elasticAssetBrowseParams.ScrollTimeout, - IncludeSource = false, // Don't include the actual doc i.e. Elastic src as we only need the ID which we can get from the hit info. - HeldByCode = elasticAssetBrowseParams.HeldByCode - }; - - if (elasticAssetBrowseParams.HeldByCode != HeldByCode.ALL) - { - searchParams.FilterQueries.Add(new KeyValuePair>(ElasticFieldConstants.ES_HELD_BY_CODE, new[] { elasticAssetBrowseParams.HeldByCode.ToString() })); - } - - return searchParams; - } - - public ElasticSearchParameters GetElasticSearchParameters(int pagingOffset, int pageSize) - { - ElasticSearchParameters searchParams = new ElasticSearchParameters() { PagingOffset = pagingOffset, PageSize = pageSize }; - return searchParams; - } - - public ElasticSearchParameters GetElasticSearchParameters(string query, HeldByCode heldByCode, int pagingOffset, int pageSize) - { - ElasticSearchParameters searchParams = new ElasticSearchParameters() - { - Query = query, - PagingOffset = pagingOffset, - PageSize = pageSize, - HeldByCode = heldByCode - }; - - if (searchParams.HeldByCode != HeldByCode.ALL) - { - searchParams.FilterQueries.Add(new KeyValuePair>(ElasticFieldConstants.ES_HELD_BY_CODE, new[] { searchParams.HeldByCode.ToString() })); - } - - return searchParams; - } - } -} diff --git a/Taxonomy.Common/Domain/Repository/Elastic/IIAViewRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/IIAViewRepository.cs index 70c9276..2b30fd3 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/IIAViewRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/IIAViewRepository.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { public interface IIAViewRepository { @@ -12,6 +12,6 @@ public interface IIAViewRepository Task SearchDocByDocReference(string docReference); Task> SearchDocByMultipleDocReferences(string[] docReference); - InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParams browseParams, string scrollCursor); + InformationAssetScrollList BrowseAllDocReferences(OpenSearchAssetBrowseParams browseParams, string scrollCursor); } } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/IElasticIAViewUpdateRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs similarity index 73% rename from Taxonomy.Common/Domain/Repository/Elastic/IElasticIAViewUpdateRepository.cs rename to Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs index 1811b7a..dba5e80 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/IElasticIAViewUpdateRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs @@ -3,9 +3,9 @@ using System.Collections.Generic; using System.Text; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public interface IElasticIAViewUpdateRepository + public interface IOpenSearchIAViewUpdateRepository { IaidWithCategories GetByDocReference(string docReference); diff --git a/Taxonomy.Common/Domain/Repository/Elastic/ElasticCategoryRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchCategoryRepository.cs similarity index 74% rename from Taxonomy.Common/Domain/Repository/Elastic/ElasticCategoryRepository.cs rename to Taxonomy.Common/Domain/Repository/Elastic/OpenSearchCategoryRepository.cs index 3cbb9c8..3591e37 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/ElasticCategoryRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchCategoryRepository.cs @@ -1,26 +1,22 @@ using AutoMapper; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using Nest; +using OpenSearch.Client; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - - - public class ElasticCategoryRepository : AbstractElasticRespository, ICategoryRepository + public class OpenSearchCategoryRepository : AbstractOpenSearchRespository, ICategoryRepository { private const int MAX_CATEGORIES = 250; //TODO: Probably get the count dynamically and cache. - private static IList _categories; - - public ElasticCategoryRepository(IConnectElastic elasticConnection, IMapper mapper) : base(elasticConnection, mapper) + public OpenSearchCategoryRepository(IConnectOpenSearch openSearchConnection, IMapper mapper) : base(openSearchConnection, mapper) { } public long Count() @@ -42,17 +38,17 @@ public async Task> FindAll() try { - var elasticParamsBuilder = new ElasticSearchParamsBuilder(); + var openSearchParamsBuilder = new OpenSearchParamsBuilder(); - var elasticParams = elasticParamsBuilder.GetElasticSearchParameters(pagingOffset: 0, pageSize: MAX_CATEGORIES); + var openSearchParams = openSearchParamsBuilder.GetOpenSearchParameters(pagingOffset: 0, pageSize: MAX_CATEGORIES); - ISearchResponse elasticCategories = await _elasticConnection.SearchAsync(elasticParams); + ISearchResponse openSearchCategories = await _openSearchConnection.SearchAsync(openSearchParams); var categories = new List(); - foreach (var item in elasticCategories.Hits) + foreach (var item in openSearchCategories.Hits) { - CategoryFromElastic searchResult = item.Source; + CategoryFromOpenSearch searchResult = item.Source; var result = _mapper.Map(searchResult); result.Score = item.Score.HasValue ? (double)item.Score : 0; diff --git a/Taxonomy.Common/Domain/Repository/Elastic/ElasticFieldConstants.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchFieldConstants.cs similarity index 88% rename from Taxonomy.Common/Domain/Repository/Elastic/ElasticFieldConstants.cs rename to Taxonomy.Common/Domain/Repository/Elastic/OpenSearchFieldConstants.cs index b895bbb..7eb1efb 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/ElasticFieldConstants.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchFieldConstants.cs @@ -2,9 +2,9 @@ using System.Collections.Generic; using System.Text; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - internal static class ElasticFieldConstants + internal static class OpenSearchFieldConstants { public const string DESCRIPTION = "DESCRIPTION"; public const string CATALOGUE_REFERENCE = "CATALOGUE_REFERENCE"; diff --git a/Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewRepository.cs similarity index 72% rename from Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewRepository.cs rename to Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewRepository.cs index 1ae5112..4ccc94b 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewRepository.cs @@ -1,23 +1,25 @@ using AutoMapper; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; using NationalArchives.Taxonomy.Common.Domain.Repository.Lucene; using NationalArchives.Taxonomy.Common.Helpers; -using Nest; +using OpenSearch.Client; + +//using Nest; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading.Tasks; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public class ElasticIAViewRepository : AbstractElasticRespository, IIAViewRepository + public class OpenSearchIAViewRepository : AbstractOpenSearchRespository, IIAViewRepository { private LuceneHelperTools _luceneHelperTools; - public ElasticIAViewRepository(IConnectElastic elasticConnection, LuceneHelperTools luceneHelperTools, IMapper mapper) : base(elasticConnection, mapper) + public OpenSearchIAViewRepository(IConnectOpenSearch openSearchConnection, LuceneHelperTools luceneHelperTools, IMapper mapper) : base(openSearchConnection, mapper) { _luceneHelperTools = luceneHelperTools; } @@ -31,13 +33,13 @@ public async Task SearchDocByDocReference(string docRefere // So passing ElasticSearchResultAssetView will bring back a 404 even though though its a base type //IGetResponse searchResponse = await elasticClient.GetAsync(docReference); - IGetResponse response = await _elasticConnection.GetAsync(docReference); + IGetResponse response = await _openSearchConnection.GetAsync(docReference); if(!response.IsValid) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, $"Error retrieving document id {docReference} from Elasic Search", response.OriginalException); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, $"Error retrieving document id {docReference} from Open Search", response.OriginalException); } - ElasticRecordAssetView searchResult = response.Source; + OpenSearchRecordAssetView searchResult = response.Source; //Alternative using ISearchResponse instead of IGetResponse. NOt sure which is preferred // - Get is detailed at https://www.elastic.co/guide/en/elasticsearch/client/net-api/1.x/get.html @@ -64,18 +66,18 @@ public async Task> SearchDocByMultipleDocReferences( try { - IMultiGetResponse response = await _elasticConnection.MultiGetAsync(docReferences); + MultiGetResponse response = await _openSearchConnection.MultiGetAsync(docReferences); if (!response.IsValid) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, $"Error retrieving mutiple document request from Elasic Search. The IAIDs submitted were: {String.Join(";", docReferences)}", response.OriginalException); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, $"Error retrieving mutiple document request from Elasic Search. The IAIDs submitted were: {String.Join(";", docReferences)}", response.OriginalException); } - var results = response.GetMany(docReferences); + var results = response.GetMany(docReferences); foreach (var result in results) { - ElasticRecordAssetView searchResult = result.Source; + OpenSearchRecordAssetView searchResult = result.Source; var infoAsset = _mapper.Map(searchResult); informationAssets.Add(infoAsset); } @@ -104,8 +106,8 @@ public IList FindRelevantCategoriesForDocument(Information try { - ElasticRecordAssetView esAsset = _mapper.Map(iaView); - IIndexResponse response = _elasticConnection.IndexDocument(esAsset, useInmemoryIndex: true); + OpenSearchRecordAssetView esAsset = _mapper.Map(iaView); + IndexResponse response = _openSearchConnection.IndexDocument(esAsset, useInmemoryIndex: true); //Race condition in ES? //System.Threading.Thread.Sleep(1000); @@ -128,10 +130,10 @@ public IList FindRelevantCategoriesForDocument(Information //return matchedCategoriesInMemory; // TODO: Async ? - IList matchedCategories = _elasticConnection.CategoryMultiSearch(base_term_query, sourceCategories, true, includeScores, 50); + IList matchedCategories = _openSearchConnection.CategoryMultiSearch(base_term_query, sourceCategories, true, includeScores, 50); //TODO: Possibly mange within the connection itself. - _elasticConnection.DeleteDocumentFromIndex(iaView.DocReference, true); + _openSearchConnection.DeleteDocumentFromIndex(iaView.DocReference, true); return matchedCategories; @@ -147,8 +149,8 @@ public async Task> PerformSearch(St { try { - var elasticSearchParamsBuilder = new ElasticSearchParamsBuilder(); - ElasticSearchParameters searchParams = elasticSearchParamsBuilder.GetElasticSearchParameters(query: query, heldByCode: heldByCode, pageSize: limit, pagingOffset: offset); + var openSearchParamsBuilder = new OpenSearchParamsBuilder(); + OpenSearchParameters searchParams = openSearchParamsBuilder.GetOpenSearchParameters(query: query, heldByCode: heldByCode, pageSize: limit, pagingOffset: offset); var fieldList = new List(); @@ -164,11 +166,11 @@ public async Task> PerformSearch(St searchParams.SearchFields = fieldList; - ISearchResponse searchResponse = await _elasticConnection.SearchAsync(searchParams); + ISearchResponse searchResponse = await _openSearchConnection.SearchAsync(searchParams); Debug.Print(searchResponse.GetType().Name); - var paginatedListFactory = new IAListFactory(searchResponse, _mapper); + var paginatedListFactory = new IAListFactory(searchResponse, _mapper); var paginatedList = paginatedListFactory.CreatePaginatedList(limit: limit, offset: offset, minScore: minScore); return paginatedList; @@ -180,36 +182,36 @@ public async Task> PerformSearch(St } } - public InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParams browseParams, string scrollId = null) + public InformationAssetScrollList BrowseAllDocReferences(OpenSearchAssetBrowseParams browseParams, string scrollId = null) { try { if (String.IsNullOrWhiteSpace(scrollId)) { - var elasticParamsBuilder = new ElasticSearchParamsBuilder(); - ElasticSearchParameters searchParams = elasticParamsBuilder.GetElasticSearchParametersForScroll(browseParams); + var openSearchParamsBuilder = new OpenSearchParamsBuilder(); + OpenSearchParameters searchParams = openSearchParamsBuilder.GetSearchParametersForScroll(browseParams); // Get the first set of results which includes the sccroll ID to use in future requests. - Task> assetFetch = _elasticConnection.SearchAsync(searchParams); + Task> assetFetch = _openSearchConnection.SearchAsync(searchParams); var awaiter = assetFetch.GetAwaiter(); var searchResponse = awaiter.GetResult(); if(assetFetch.IsFaulted) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "Unable to fetch list of asset IDs on initial scroll request.", assetFetch.Exception.Flatten()); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_SCROLL_EXCEPTION, "Unable to fetch list of asset IDs on initial scroll request.", assetFetch.Exception.Flatten()); } if (String.IsNullOrEmpty(searchResponse.ScrollId)) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "Unable to retrieve scroll ID for paging information assets."); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_SCROLL_EXCEPTION, "Unable to retrieve scroll ID for paging information assets."); } return new InformationAssetScrollList(searchResponse.ScrollId, searchResponse.Hits.Select(h => h.Id).ToList()); } else // existing scroll request { - var scrollResponse = _elasticConnection.ScrollAsync(browseParams.PageSize, scrollId); + var scrollResponse = _openSearchConnection.ScrollAsync(browseParams.PageSize, scrollId); if (scrollResponse.Result.Hits.Any()) { return new InformationAssetScrollList (scrollId, scrollResponse.Result.Hits.Select(h => h.Id).ToList()); @@ -217,7 +219,7 @@ public InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParam else { //TODO: Async? - var response = _elasticConnection.ClearScroll(scrollId).Result; + var response = _openSearchConnection.ClearScroll(scrollId).Result; if(!response.IsValid) { //throw new TaxonomyException(TaxonomyErrorType.ELASTIC_SCROLL_EXCEPTION, "Error clearing Information Asset Scroll", response.OriginalException); diff --git a/Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewUpdateRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs similarity index 60% rename from Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewUpdateRepository.cs rename to Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs index d8b7d3b..249134a 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/ElasticIAViewUpdateRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs @@ -1,26 +1,25 @@ using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; -using Nest; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; +using OpenSearch.Client; using System; using System.Collections.Generic; using System.Linq; using System.Text; -namespace NationalArchives.Taxonomy.Common.Domain.Repository.Elastic +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { - public class ElasticIAViewUpdateRepository : IElasticIAViewUpdateRepository + public class OpenSearchIAViewUpdateRepository : IOpenSearchIAViewUpdateRepository { - //private ElasticConnectionParameters _parameters; - private ElasticClient _elasticClient; + private OpenSearchClient _openSearchClient; //TODO: Not using the IConnectElastic interface here, it just seems to get in the way, look at refactoring generally. // But see where we get to on using Lucene.net and the InfoAseet input source. - public ElasticIAViewUpdateRepository(ElasticConnectionParameters elasticConnectionParameters) + public OpenSearchIAViewUpdateRepository(OpenSearchConnectionParameters openSearchConnectionParameters) { - using (ConnectionSettings connectionSettings = ConnectionSettingsProvider.GetConnectionSettings(elasticConnectionParameters)) + using (ConnectionSettings connectionSettings = ConnectionSettingsProvider.GetConnectionSettings(openSearchConnectionParameters)) { connectionSettings.DefaultFieldNameInferrer(p => p); - _elasticClient = new ElasticClient(connectionSettings); + _openSearchClient = new OpenSearchClient(connectionSettings); }; } @@ -37,12 +36,12 @@ public void Save(IaidWithCategories iaidWithCategories) } var update = new { TAXONOMY_ID = iaidWithCategories.CategoryIds }; - var response = _elasticClient.Update(iaidWithCategories.Iaid, u => u.Doc(update).DocAsUpsert()); + var response = _openSearchClient.Update(iaidWithCategories.Iaid, u => u.Doc(update).DocAsUpsert()); if(!response.IsValid) { - string errorInfo = GetElasticErrorINfo(response); + string errorInfo = GetOpenSearchErrorInfo(response); - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_UPDATE_ERROR, errorInfo); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_UPDATE_ERROR, errorInfo); } } @@ -50,7 +49,7 @@ public void SaveAll(IEnumerable iaidsWithCategories) { if(iaidsWithCategories == null) { - throw new TaxonomyException("No IAID list with categories supplied to the elastic search update service."); + throw new TaxonomyException("No IAID list with categories supplied to the Open search update service."); } var descriptor = new BulkDescriptor(); @@ -58,21 +57,21 @@ public void SaveAll(IEnumerable iaidsWithCategories) foreach (var iaidWithCategories in iaidsWithCategories) { var doc = new { TAXONOMY_ID = iaidWithCategories.CategoryIds }; - descriptor.Update(u => u.Doc(doc).DocAsUpsert(true).Id(iaidWithCategories.Iaid)); + descriptor.Update(u => u.Doc(doc).DocAsUpsert(true).Id(iaidWithCategories.Iaid)); } //TODO: Async? - var response = _elasticClient.BulkAsync(descriptor).Result; + var response = _openSearchClient.BulkAsync(descriptor).Result; if (!response.IsValid) { - string errorInfo = GetElasticErrorINfo(response); - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_BULK_UPDATE_ERROR, errorInfo); + string errorInfo = GetOpenSearchErrorInfo(response); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_BULK_UPDATE_ERROR, errorInfo); } } - private String GetElasticErrorINfo(IResponse response) + private String GetOpenSearchErrorInfo(IResponse response) { - StringBuilder sb = new StringBuilder("Invalid update response from Elastic Search"); + StringBuilder sb = new StringBuilder("Invalid update response from Open Search"); sb.Append(Environment.NewLine); if (response.OriginalException != null) diff --git a/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchParamsBuilder.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchParamsBuilder.cs new file mode 100644 index 0000000..194af1f --- /dev/null +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchParamsBuilder.cs @@ -0,0 +1,52 @@ +using NationalArchives.Taxonomy.Common.BusinessObjects; +using NationalArchives.Taxonomy.Common.Domain.Repository.Common; +using System.Collections.Generic; + +namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch +{ + internal class OpenSearchParamsBuilder + { + public OpenSearchParameters GetSearchParametersForScroll(OpenSearchAssetBrowseParams openSearchAssetBrowseParams) + { + OpenSearchParameters searchParams = new OpenSearchParameters() + { + Query = string.Empty, // Sends a match all query + PageSize = openSearchAssetBrowseParams.PageSize, + Scroll = openSearchAssetBrowseParams.ScrollTimeout, + IncludeSource = false, // Don't include the actual doc i.e. src as we only need the ID which we can get from the hit info. + HeldByCode = openSearchAssetBrowseParams.HeldByCode + }; + + if (openSearchAssetBrowseParams.HeldByCode != HeldByCode.ALL) + { + searchParams.FilterQueries.Add(new KeyValuePair>(OpenSearchFieldConstants.ES_HELD_BY_CODE, new[] { openSearchAssetBrowseParams.HeldByCode.ToString() })); + } + + return searchParams; + } + + public OpenSearchParameters GetOpenSearchParameters(int pagingOffset, int pageSize) + { + OpenSearchParameters searchParams = new OpenSearchParameters() { PagingOffset = pagingOffset, PageSize = pageSize }; + return searchParams; + } + + public OpenSearchParameters GetOpenSearchParameters(string query, HeldByCode heldByCode, int pagingOffset, int pageSize) + { + OpenSearchParameters searchParams = new OpenSearchParameters() + { + Query = query, + PagingOffset = pagingOffset, + PageSize = pageSize, + HeldByCode = heldByCode + }; + + if (searchParams.HeldByCode != HeldByCode.ALL) + { + searchParams.FilterQueries.Add(new KeyValuePair>(OpenSearchFieldConstants.ES_HELD_BY_CODE, new[] { searchParams.HeldByCode.ToString() })); + } + + return searchParams; + } + } +} diff --git a/Taxonomy.Common/Domain/Repository/Lucene/InMemoryCategoriserRepository.cs b/Taxonomy.Common/Domain/Repository/Lucene/InMemoryCategoriserRepository.cs index 85020d5..701b6e8 100644 --- a/Taxonomy.Common/Domain/Repository/Lucene/InMemoryCategoriserRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Lucene/InMemoryCategoriserRepository.cs @@ -14,7 +14,7 @@ using System.Linq; using lnu = Lucene.Net.Util; using Microsoft.Extensions.Logging; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; namespace NationalArchives.Taxonomy.Common.Domain.Repository.Lucene { @@ -217,16 +217,16 @@ private Document GetLuceneDocumentFromIaView(InformationAssetView iaView, bool s switch(_iaViewIndexAnalyser) { case IAViewTextNoCasNoPuncAnalyser tncnp: - listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, ElasticFieldConstants.TEXT_NO_CAS_NO_PUNC)); + listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, OpenSearchFieldConstants.TEXT_NO_CAS_NO_PUNC)); break; case IAViewTextCasNoPuncAnalyser tcnp: - listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, ElasticFieldConstants.TEXT_CAS_NO_PUNC)); + listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, OpenSearchFieldConstants.TEXT_CAS_NO_PUNC)); break; case IAViewTextCasPuncAnalyser tcp: - listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, ElasticFieldConstants.TEXT_CAS_PUNC)); + listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, OpenSearchFieldConstants.TEXT_CAS_PUNC)); break; case IAViewTextGenAnalyser tg: - listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, ElasticFieldConstants.TEXT_GEN)); + listOfFields.AddRange(GetCopyIAViewFieldsToTaxonomyField(iaView, OpenSearchFieldConstants.TEXT_GEN)); break; default: listOfFields.AddRange(GetListOfUnmodifiedFieldsFromIAView(iaView)); @@ -262,18 +262,18 @@ private List GetCopyIAViewFieldsToTaxonomyField(InformationAssetView iaVi { listOfFields.Add(new TextField(targetCommonIndexField, iaView.Title, Field.Store.NO)); } - if (!String.IsNullOrWhiteSpace(iaView.ContextDescription) && queryFields.Contains(ElasticFieldConstants.CONTEXT, StringComparer.OrdinalIgnoreCase)) + if (!String.IsNullOrWhiteSpace(iaView.ContextDescription) && queryFields.Contains(OpenSearchFieldConstants.CONTEXT, StringComparer.OrdinalIgnoreCase)) { listOfFields.Add(new TextField(targetCommonIndexField, iaView.ContextDescription, Field.Store.NO)); } - if (iaView.CorpBodys != null && iaView.CorpBodys.Length > 0 && queryFields.Contains(ElasticFieldConstants.CORPORATE_BODY, StringComparer.OrdinalIgnoreCase)) + if (iaView.CorpBodys != null && iaView.CorpBodys.Length > 0 && queryFields.Contains(OpenSearchFieldConstants.CORPORATE_BODY, StringComparer.OrdinalIgnoreCase)) { foreach (string corpBody in iaView.CorpBodys) { listOfFields.Add(new TextField(targetCommonIndexField, corpBody, Field.Store.NO)); } } - if (iaView.Subjects != null && iaView.Subjects.Length > 0 && queryFields.Contains(ElasticFieldConstants.SUBJECT, StringComparer.OrdinalIgnoreCase)) + if (iaView.Subjects != null && iaView.Subjects.Length > 0 && queryFields.Contains(OpenSearchFieldConstants.SUBJECT, StringComparer.OrdinalIgnoreCase)) { foreach (string subject in iaView.Subjects) { @@ -281,21 +281,21 @@ private List GetCopyIAViewFieldsToTaxonomyField(InformationAssetView iaVi } } - if (iaView.Person_FullName != null && iaView.Person_FullName.Length > 0 && queryFields.Contains(ElasticFieldConstants.PERSON, StringComparer.OrdinalIgnoreCase)) + if (iaView.Person_FullName != null && iaView.Person_FullName.Length > 0 && queryFields.Contains(OpenSearchFieldConstants.PERSON, StringComparer.OrdinalIgnoreCase)) { foreach (string person in iaView.Person_FullName) { listOfFields.Add(new TextField(targetCommonIndexField, person, Field.Store.NO)); } } - if (iaView.Place_Name != null && iaView.Place_Name.Length > 0 && queryFields.Contains(ElasticFieldConstants.PLACE_NAME, StringComparer.OrdinalIgnoreCase)) + if (iaView.Place_Name != null && iaView.Place_Name.Length > 0 && queryFields.Contains(OpenSearchFieldConstants.PLACE_NAME, StringComparer.OrdinalIgnoreCase)) { foreach (string place in iaView.Place_Name) { listOfFields.Add(new TextField(targetCommonIndexField, place, Field.Store.NO)); } } - if (!String.IsNullOrWhiteSpace(iaView.CatDocRef) && queryFields.Contains(ElasticFieldConstants.CATALOGUE_REFERENCE, StringComparer.OrdinalIgnoreCase)) + if (!String.IsNullOrWhiteSpace(iaView.CatDocRef) && queryFields.Contains(OpenSearchFieldConstants.CATALOGUE_REFERENCE, StringComparer.OrdinalIgnoreCase)) { listOfFields.Add(new TextField(targetCommonIndexField, iaView.CatDocRef, Field.Store.NO)); } diff --git a/Taxonomy.Common/Domain/Repository/Lucene/LuceneHelperTools.cs b/Taxonomy.Common/Domain/Repository/Lucene/LuceneHelperTools.cs index 9ba424c..05736f1 100644 --- a/Taxonomy.Common/Domain/Repository/Lucene/LuceneHelperTools.cs +++ b/Taxonomy.Common/Domain/Repository/Lucene/LuceneHelperTools.cs @@ -9,7 +9,7 @@ using Lucene.Net.Util; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; -using el = NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using el = NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using System; using System.Collections.Generic; using System.Collections.ObjectModel; @@ -225,7 +225,7 @@ public static void ConfigureLuceneServices(CategoriserLuceneParams categoriserLu switch (defaultTaxonomyField) { - case el.ElasticFieldConstants.TEXT_NO_CAS_NO_PUNC: + case el.OpenSearchFieldConstants.TEXT_NO_CAS_NO_PUNC: // IAViewTextNoCasNoPuncAnalyser services.AddTransient((ctx) => { @@ -233,7 +233,7 @@ public static void ConfigureLuceneServices(CategoriserLuceneParams categoriserLu return new IAViewTextNoCasNoPuncAnalyser(synonymFilterFactory, wordDelimiterFilterFactory, AnalyzerType.INDEX, logger); }); break; - case el.ElasticFieldConstants.TEXT_CAS_NO_PUNC: + case el.OpenSearchFieldConstants.TEXT_CAS_NO_PUNC: // IAViewTextCasNoPuncAnalyser services.AddTransient((ctx) => { @@ -241,7 +241,7 @@ public static void ConfigureLuceneServices(CategoriserLuceneParams categoriserLu return new IAViewTextCasNoPuncAnalyser(synonymFilterFactory, wordDelimiterFilterFactory, AnalyzerType.INDEX, logger); }); break; - case el.ElasticFieldConstants.TEXT_CAS_PUNC: + case el.OpenSearchFieldConstants.TEXT_CAS_PUNC: // IAViewTextCasPuncAnalyser services.AddTransient((ctx) => { @@ -249,7 +249,7 @@ public static void ConfigureLuceneServices(CategoriserLuceneParams categoriserLu return new IAViewTextCasPuncAnalyser(stopFilterFactory, synonymFilterFactory, AnalyzerType.INDEX, logger); }); break; - case el.ElasticFieldConstants.TEXT_GEN: + case el.OpenSearchFieldConstants.TEXT_GEN: services.AddTransient((ctx) => { ILogger logger = ctx.GetRequiredService>(); diff --git a/Taxonomy.Common/Helpers/IAListFactory.cs b/Taxonomy.Common/Helpers/IAListFactory.cs index 5a19a5b..9d33158 100644 --- a/Taxonomy.Common/Helpers/IAListFactory.cs +++ b/Taxonomy.Common/Helpers/IAListFactory.cs @@ -1,7 +1,9 @@ using AutoMapper; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain; -using Nest; +using OpenSearch.Client; + +//using Nest; using System.Collections.Generic; namespace NationalArchives.Taxonomy.Common.Helpers diff --git a/Taxonomy.Common/Mappers/MappingProfile.cs b/Taxonomy.Common/Mappers/MappingProfile.cs index a2c509e..dcb3a8e 100644 --- a/Taxonomy.Common/Mappers/MappingProfile.cs +++ b/Taxonomy.Common/Mappers/MappingProfile.cs @@ -1,6 +1,6 @@ using AutoMapper; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.DataObjects.Mongo; using NationalArchives.Taxonomy.Common.Domain; @@ -10,7 +10,7 @@ class MappingProfile : Profile { public MappingProfile() { - CreateMap() + CreateMap() .ForMember(dest => dest.CoveringDates, opt => opt.MapFrom(source => source.COVERING_DATES)) .ForMember(dest => dest.Description, opt => opt.MapFrom(source => source.DESCRIPTION)) .ForMember(dest => dest.Title, opt => opt.MapFrom(source => source.TITLE)) @@ -25,12 +25,12 @@ public MappingProfile() .ForMember(dest => dest.Subjects, opt => opt.MapFrom(source => source.SUBJECT)) .ForMember(dest => dest.DocReference, opt => opt.MapFrom(source => source.ID)) .ForMember(dest => dest.ContextDescription, opt => opt.MapFrom(source => source.CONTEXT)).IncludeAllDerived() - .Include().ReverseMap(); + .Include().ReverseMap(); - CreateMap() + CreateMap() .ForMember(dest => dest.Score, opt => opt.MapFrom(source => source.Score)).ReverseMap(); - CreateMap() + CreateMap() .ForMember(dest => dest.Id, opt => opt.MapFrom(source => source.ID)) .ForMember(dest => dest.Title, opt => opt.MapFrom(source => source.title)) .ForMember(dest => dest.Query, opt => opt.MapFrom(source => source.query_text)) diff --git a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj index 7c2f790..1536ed1 100644 --- a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj +++ b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj @@ -16,20 +16,20 @@ - - - - + + + + diff --git a/Taxonomy.Common/Service/Impl/InformationAssetViewService.cs b/Taxonomy.Common/Service/Impl/InformationAssetViewService.cs index 5a8377d..66d6edb 100644 --- a/Taxonomy.Common/Service/Impl/InformationAssetViewService.cs +++ b/Taxonomy.Common/Service/Impl/InformationAssetViewService.cs @@ -1,7 +1,7 @@ using NationalArchives.Taxonomy.Common.BusinessObjects; using NationalArchives.Taxonomy.Common.Domain; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using System; using System.Diagnostics; using System.Threading.Tasks; @@ -20,7 +20,7 @@ public InformationAssetViewService(IIAViewRepository iAViewRepository, bool useD _useDfaultTaxonomyField = useDefaultTaxonomyFieldForApiSearch; } - public InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParams browseParams, string scrollId) + public InformationAssetScrollList BrowseAllDocReferences(OpenSearchAssetBrowseParams browseParams, string scrollId) { try { @@ -29,7 +29,7 @@ public InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParam } catch (Exception e) { - throw new TaxonomyException(TaxonomyErrorType.ELASTIC_INVALID_RESPONSE, "Error retrieving information asset IDs from Elastic Search", e); + throw new TaxonomyException(TaxonomyErrorType.OPEN_SEARCH_INVALID_RESPONSE, "Error retrieving information asset IDs from Elastic Search", e); } } diff --git a/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs b/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs index d7a5c0a..2a2da56 100644 --- a/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs +++ b/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs @@ -2,7 +2,7 @@ using NationalArchives.Taxonomy.Common.Domain; using NationalArchives.Taxonomy.Common.Domain.Queue; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; using System; using System.Collections.Generic; diff --git a/Taxonomy.Common/Service/Impl/UpdateElasticService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs similarity index 79% rename from Taxonomy.Common/Service/Impl/UpdateElasticService.cs rename to Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index d757fdc..73d553d 100644 --- a/Taxonomy.Common/Service/Impl/UpdateElasticService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -1,7 +1,7 @@ using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; using NationalArchives.Taxonomy.Common.Domain.Queue; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Helpers; using NationalArchives.Taxonomy.Common.Service.Interface; using System; @@ -12,10 +12,10 @@ namespace NationalArchives.Taxonomy.Common.Service.Impl { - public class UpdateElasticService : IUpdateElasticService + public class UpdateOpenSearchService : IUpdateOpenSearchService { private readonly IUpdateStagingQueueReceiver _interimUpdateQueue; - private readonly IElasticIAViewUpdateRepository _targetElasticRepository; + private readonly IOpenSearchIAViewUpdateRepository _targetOpenSearchRepository; private readonly Queue internalQueue = new Queue(); private readonly uint _batchSize; private readonly int _queueFetchWaitTime; @@ -30,17 +30,17 @@ public class UpdateElasticService : IUpdateElasticService public bool IsProcessingComplete { get => _isProcessingComplete; set => _isProcessingComplete = value; } - private DateTime _lastElasticUpdate = DateTime.Now; + private DateTime _lastOpenSearchUpdate = DateTime.Now; - public UpdateElasticService(IUpdateStagingQueueReceiver updateQueue, IElasticIAViewUpdateRepository targetElasticRepository, ILogger logger, uint batchSize = 1, uint queueFetchWaitTime = 1000) + public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSearchIAViewUpdateRepository targetOpenSearchRepository, ILogger logger, uint batchSize = 1, uint queueFetchWaitTime = 1000) { - if (updateQueue == null || targetElasticRepository == null) + if (updateQueue == null || targetOpenSearchRepository == null) { - throw new TaxonomyException("Input queue and target elastic repository are required."); + throw new TaxonomyException("Input queue and target Open Search repository are required."); } _interimUpdateQueue = updateQueue; - _targetElasticRepository = targetElasticRepository; + _targetOpenSearchRepository = targetOpenSearchRepository; _batchSize = batchSize; _queueFetchWaitTime = Convert.ToInt32(queueFetchWaitTime); _logger = logger; @@ -121,12 +121,12 @@ private void StartProcessing() Thread.Sleep(_queueFetchWaitTime); - TimeSpan timeSinceLastUpdate = DateTime.Now - _lastElasticUpdate; + TimeSpan timeSinceLastUpdate = DateTime.Now - _lastOpenSearchUpdate; if (internalQueue.Count >= _batchSize || ((internalQueue.Count > 0) && timeSinceLastUpdate >= TimeSpan.FromMinutes(5))) { - _lastElasticUpdate = DateTime.Now; - SubmitUpdatesToElasticDatabase(); + _lastOpenSearchUpdate = DateTime.Now; + SubmitUpdatesToOpenSearchDatabase(); } else @@ -138,7 +138,8 @@ private void StartProcessing() if (minutesSinceLastUpdate % 5 == 0 && minutesSinceLastUpdate > minutesSinceLastNoUpdatesLogMessage) { minutesSinceLastNoUpdatesLogMessage = minutesSinceLastUpdate; - _logger.LogInformation($"No Taxonomy updates have been received by the Elastic update service in the last {minutesSinceLastUpdate} minutes. Resetting the update counter."); + _logger.LogInformation($"No Taxonomy updates have been received by the Open Search" + + $" update service in the last {minutesSinceLastUpdate} minutes. Resetting the update counter."); } } } @@ -147,11 +148,11 @@ private void StartProcessing() if (nullCounter >= NULL_COUNTER_THRESHOLD) { IsProcessingComplete = true; - SubmitUpdatesToElasticDatabase(); - _logger.LogInformation("No more categorisation results found on update queue. Elastic Update service will now finish processing."); + SubmitUpdatesToOpenSearchDatabase(); + _logger.LogInformation("No more categorisation results found on update queue. Open Search Update service will now finish processing."); } - void SubmitUpdatesToElasticDatabase() + void SubmitUpdatesToOpenSearchDatabase() { if (_batchSize == 1 || internalQueue.Count == 1) { @@ -182,18 +183,18 @@ private void BulkUpdateCategoriesOnIAViews(IList listOfIAVie try { - _logger.LogInformation($"Submitting bulk update of {listOfIAViewUpdatesToProcess.Count} items to Elastic Search: "); - _targetElasticRepository.SaveAll(listOfIAViewUpdatesToProcess); + _logger.LogInformation($"Submitting bulk update of {listOfIAViewUpdatesToProcess.Count} items to Open Search: "); + _targetOpenSearchRepository.SaveAll(listOfIAViewUpdatesToProcess); foreach (var item in listOfIAViewUpdatesToProcess) { - _logger.LogInformation($"Updated Elastic Search entry: {item.ToString()}".PadLeft(5)); + _logger.LogInformation($"Updated Open Search entry: {item.ToString()}".PadLeft(5)); } int totalForThisBulkUpdateOperation = listOfIAViewUpdatesToProcess.Count; - _logger.LogInformation($"Completed bulk update in Elastic Search for {totalForThisBulkUpdateOperation} items: "); + _logger.LogInformation($"Completed bulk update in Open Search for {totalForThisBulkUpdateOperation} items: "); _totalInfoAssetsUPdated += totalForThisBulkUpdateOperation; - _logger.LogInformation($" Category data for {_totalInfoAssetsUPdated} assets has now been added or updated in Elastic Search."); + _logger.LogInformation($" Category data for {_totalInfoAssetsUPdated} assets has now been added or updated in Open Search."); } catch (Exception e) { @@ -205,9 +206,9 @@ private void UpdateCategoriesOnIAView(IaidWithCategories item) { try { - _logger.LogInformation("Submitting single Asset update to Elastic Search: " + item.ToString()); - _targetElasticRepository.Save(item); - _logger.LogInformation($"Completed single Asset in Elastic Search: {item.ToString()}." ); + _logger.LogInformation("Submitting single Asset update to Open Search: " + item.ToString()); + _targetOpenSearchRepository.Save(item); + _logger.LogInformation($"Completed single Asset in Open Search: {item.ToString()}." ); _totalInfoAssetsUPdated++; } catch (Exception) diff --git a/Taxonomy.Common/Service/Interface/IInformationAssetViewService.cs b/Taxonomy.Common/Service/Interface/IInformationAssetViewService.cs index af89248..be313f5 100644 --- a/Taxonomy.Common/Service/Interface/IInformationAssetViewService.cs +++ b/Taxonomy.Common/Service/Interface/IInformationAssetViewService.cs @@ -1,5 +1,5 @@ using NationalArchives.Taxonomy.Common.Domain; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using System; using System.Collections.Generic; using System.Text; @@ -13,6 +13,6 @@ public interface IInformationAssetViewService { Task> PerformSearch(String query, Double score, int limit, int offset, string strHeldBy = "TNA "); - InformationAssetScrollList BrowseAllDocReferences(ElasticAssetBrowseParams browseParams, string scrollId = null); + InformationAssetScrollList BrowseAllDocReferences(OpenSearchAssetBrowseParams browseParams, string scrollId = null); } } diff --git a/Taxonomy.Common/Service/Interface/IUpdateElasticService.cs b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs similarity index 54% rename from Taxonomy.Common/Service/Interface/IUpdateElasticService.cs rename to Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs index ab030ae..37a6161 100644 --- a/Taxonomy.Common/Service/Interface/IUpdateElasticService.cs +++ b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs @@ -1,9 +1,9 @@ -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using System; namespace NationalArchives.Taxonomy.Common.Service.Interface { - public interface IUpdateElasticService + public interface IUpdateOpenSearchService { void Init(); diff --git a/Taxonomy.Common/TaxonomyErrorType.cs b/Taxonomy.Common/TaxonomyErrorType.cs index b1c2583..a8b0d04 100644 --- a/Taxonomy.Common/TaxonomyErrorType.cs +++ b/Taxonomy.Common/TaxonomyErrorType.cs @@ -45,13 +45,13 @@ public enum TaxonomyErrorType */ DOC_NOT_FOUND, - ELASTIC_SCROLL_EXCEPTION, + OPEN_SEARCH_SCROLL_EXCEPTION, - ELASTIC_INVALID_RESPONSE, + OPEN_SEARCH_INVALID_RESPONSE, - ELASTIC_UPDATE_ERROR, + OPEN_SEARCH_UPDATE_ERROR, - ELASTIC_BULK_UPDATE_ERROR, + OPEN_SEARCH_BULK_UPDATE_ERROR, FULL_REINDEX_WORKER_EXCEPTION, diff --git a/TaxonomyCLI/Program.cs b/TaxonomyCLI/Program.cs index 653b96c..b05cb88 100644 --- a/TaxonomyCLI/Program.cs +++ b/TaxonomyCLI/Program.cs @@ -10,10 +10,10 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Queue; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Lucene; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; using NationalArchives.Taxonomy.Common.Service; @@ -89,23 +89,23 @@ private static ServiceProvider ConfigureServices(IConfigurationRoot config, stri services.AddSingleton(); services.AddSingleton(typeof(ILogger), typeof(Logger)); - services.AddSingleton(config.GetSection("DiscoveryElasticParams").Get()); - services.AddSingleton(config.GetSection("CategoryElasticParams").Get()); + services.AddSingleton(config.GetSection("DiscoveryOpenSearchParams").Get()); + services.AddSingleton(config.GetSection("CategoryOpenSearchParams").Get()); services.AddSingleton(typeof(ILogger), typeof(Logger)); - services.AddTransient>((ctx) => + services.AddTransient>((ctx) => { - ElasticConnectionParameters cparams = ctx.GetRequiredService(); - IConnectElastic recordAssetsElasticConnection = new ElasticConnection(cparams); + OpenSearchConnectionParameters cparams = ctx.GetRequiredService(); + IConnectOpenSearch recordAssetsElasticConnection = new OpenSearchConnection(cparams); return recordAssetsElasticConnection; }); services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); + IConnectOpenSearch elasticConnectionInfo = ctx.GetRequiredService>(); LuceneHelperTools luceneHelperTools = ctx.GetRequiredService(); - ElasticIAViewRepository iaRepo = new ElasticIAViewRepository(elasticConnectionInfo, luceneHelperTools, mapper); + OpenSearchIAViewRepository iaRepo = new OpenSearchIAViewRepository(elasticConnectionInfo, luceneHelperTools, mapper); return iaRepo; }); @@ -113,22 +113,22 @@ private static ServiceProvider ConfigureServices(IConfigurationRoot config, stri // Get the categories form either Mongo or Elastic switch (categorySource) { - case CategorySource.Elastic: + case CategorySource.OpenSearch: // Categories connection info - services.AddTransient>((ctx) => + services.AddTransient>((ctx) => { - CategoryDataElasticConnectionParameters categoryDataElasticConnParams = config.GetSection("CategoryElasticParams").Get(); - IConnectElastic categoriesElasticConnection = new ElasticConnection(categoryDataElasticConnParams); + CategoryDataOpenSearchConnectionParameters categoryDataElasticConnParams = config.GetSection("CategoryOpenSearchParams").Get(); + IConnectOpenSearch categoriesElasticConnection = new OpenSearchConnection(categoryDataElasticConnParams); return categoriesElasticConnection; }); // category list repo using category connection info. - services.AddTransient((ctx) => + services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); - ElasticCategoryRepository categoryRepo = new ElasticCategoryRepository(elasticConnectionInfo, mapper); + IConnectOpenSearch elasticConnectionInfo = ctx.GetRequiredService>(); + OpenSearchCategoryRepository categoryRepo = new OpenSearchCategoryRepository(elasticConnectionInfo, mapper); return categoryRepo; }); diff --git a/TaxonomyCLI/appsettings.json b/TaxonomyCLI/appsettings.json index d187e97..6f00294 100644 --- a/TaxonomyCLI/appsettings.json +++ b/TaxonomyCLI/appsettings.json @@ -7,12 +7,12 @@ "**/obj", "**/platforms" ], - "DiscoveryElasticParams": { + "DiscoveryOpenSearchParams": { "Scheme": "https", "Host": "vpc-", "Port": "443", "IndexDatabase": "discovery_records", - "ElasticAwsParams": { + "OpenSearchAwsParams": { "UseAwsConnection": "true", "Region": "eu-west-2", "RoleArn": "FROM_Env_Var", @@ -21,7 +21,7 @@ } }, - "CategoryElasticParams": { + "CategoryOpenSearchParams": { "Scheme": "https", "Host": "vpc-", "Port": "443", diff --git a/ds-discovery-opensearch-taxonomy.sln b/ds-discovery-opensearch-taxonomy.sln index 5eac869..5141046 100644 --- a/ds-discovery-opensearch-taxonomy.sln +++ b/ds-discovery-opensearch-taxonomy.sln @@ -1,33 +1,33 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 -VisualStudioVersion = 17.12.35506.116 d17.12 +VisualStudioVersion = 17.12.35506.116 MinimumVisualStudioVersion = 10.0.40219.1 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "taxonomy-api", "taxonomy-api", "{AA038D2A-36A9-49F1-BD23-41AE94FE9DE5}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "tna.taxonomy.api", "tna.taxonomy.api\tna.taxonomy.api.csproj", "{BB4ABC97-1ECA-4E7E-9574-786C714BDEC1}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "tna.taxonomy.api", "tna.taxonomy.api\tna.taxonomy.api.csproj", "{BB4ABC97-1ECA-4E7E-9574-786C714BDEC1}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "taxonomy-batch", "taxonomy-batch", "{EE923A5B-4A01-4434-8983-0B1F25CB0201}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.Batch", "NationalArchives.Taxonomy.Batch\NationalArchives.Taxonomy.Batch.csproj", "{59616E74-9E39-473F-95A1-B0BF5E9E1B8F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Batch", "NationalArchives.Taxonomy.Batch\NationalArchives.Taxonomy.Batch.csproj", "{59616E74-9E39-473F-95A1-B0BF5E9E1B8F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.Batch.UnitTests", "NationalArchives.Taxonomy.Batch.UnitTests\NationalArchives.Taxonomy.Batch.UnitTests.csproj", "{35468E65-1198-4384-A54C-39D0FF77CC28}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Batch.UnitTests", "NationalArchives.Taxonomy.Batch.UnitTests\NationalArchives.Taxonomy.Batch.UnitTests.csproj", "{35468E65-1198-4384-A54C-39D0FF77CC28}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.Batch.Update.Elastic", "NationalArchives.Taxonomy.Batch.Update.Elastic\NationalArchives.Taxonomy.Batch.Update.Elastic.csproj", "{F52AE188-6355-49A3-A54C-6242FE35E6E8}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Batch.Update.OpenSearch", "NationalArchives.Taxonomy.Batch.Update.Elastic\NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj", "{F52AE188-6355-49A3-A54C-6242FE35E6E8}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "taxonomy-cli", "taxonomy-cli", "{30BB5F98-02D8-435D-9514-666E56332608}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.CLI", "TaxonomyCLI\NationalArchives.Taxonomy.CLI.csproj", "{E80CAE10-9A8F-4775-AF84-EE54B370C93F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.CLI", "TaxonomyCLI\NationalArchives.Taxonomy.CLI.csproj", "{E80CAE10-9A8F-4775-AF84-EE54B370C93F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.CLI.UnitTests", "TaxonomyCliUnitTests\NationalArchives.Taxonomy.CLI.UnitTests.csproj", "{C9CC5122-186B-4D60-B9CE-5A6522911446}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.CLI.UnitTests", "TaxonomyCliUnitTests\NationalArchives.Taxonomy.CLI.UnitTests.csproj", "{C9CC5122-186B-4D60-B9CE-5A6522911446}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "taxonomy-common", "taxonomy-common", "{706FCBE1-11AB-4271-AE14-A7B58CCB4507}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.Common", "Taxonomy.Common\NationalArchives.Taxonomy.Common.csproj", "{64383CD7-55BC-4F6C-9C31-5B60725F9B08}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Common", "Taxonomy.Common\NationalArchives.Taxonomy.Common.csproj", "{64383CD7-55BC-4F6C-9C31-5B60725F9B08}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.Taxonomy.Common.UnitTests", "NationalArchives.Taxonomy.Common.UnitTests\NationalArchives.Taxonomy.Common.UnitTests.csproj", "{3A5974EC-387D-4390-A634-D4C965096854}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Common.UnitTests", "NationalArchives.Taxonomy.Common.UnitTests\NationalArchives.Taxonomy.Common.UnitTests.csproj", "{3A5974EC-387D-4390-A634-D4C965096854}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NationalArchives.ActiveMQ", "NationalArchives.ActiveMQ\NationalArchives.ActiveMQ.csproj", "{55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.ActiveMQ", "NationalArchives.ActiveMQ\NationalArchives.ActiveMQ.csproj", "{55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -86,4 +86,7 @@ Global {3A5974EC-387D-4390-A634-D4C965096854} = {706FCBE1-11AB-4271-AE14-A7B58CCB4507} {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4} = {706FCBE1-11AB-4271-AE14-A7B58CCB4507} EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {E0CFD241-AF85-4C51-ABE1-B03ACB6C3EA0} + EndGlobalSection EndGlobal diff --git a/tna.taxonomy.api/Controllers/TaxonomyController.cs b/tna.taxonomy.api/Controllers/TaxonomyController.cs index 237c2b1..0c8e0d6 100644 --- a/tna.taxonomy.api/Controllers/TaxonomyController.cs +++ b/tna.taxonomy.api/Controllers/TaxonomyController.cs @@ -86,7 +86,7 @@ public ActionResult TestBrowseAll(InformationAssetSc { try { - var elasticBrowseparams = new ElasticAssetBrowseParams() { ScrollTimeout = scrollRequest.Timeout, PageSize = scrollRequest.PageSize }; + var elasticBrowseparams = new OpenSearchAssetBrowseParams() { ScrollTimeout = scrollRequest.Timeout, PageSize = scrollRequest.PageSize }; var results = _iaViewService.BrowseAllDocReferences(elasticBrowseparams, scrollId: scrollRequest.ScrollId); return Ok(results); } diff --git a/tna.taxonomy.api/Program.cs b/tna.taxonomy.api/Program.cs index 6542f89..a6ceff1 100644 --- a/tna.taxonomy.api/Program.cs +++ b/tna.taxonomy.api/Program.cs @@ -2,9 +2,9 @@ using Lucene.Net.Analysis; using Microsoft.OpenApi.Models; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.DataObjects.Elastic; +using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.Elastic; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Lucene; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; using NationalArchives.Taxonomy.Common.Service; @@ -16,14 +16,14 @@ // Add services to the container. builder.Services.AddAutoMapper(mc => mc.AddMaps(new[] { "NationalArchives.Taxonomy.Common" })); -builder.Services.AddSingleton(config.GetSection("DiscoveryElasticParams").Get()); -builder.Services.AddSingleton(config.GetSection("CategoryElasticParams").Get()); +builder.Services.AddSingleton(config.GetSection("DiscoveryOpenSearchParams").Get()); +builder.Services.AddSingleton(config.GetSection("CategoryOpenSearchParams").Get()); builder.Services.AddSingleton(typeof(ILogger), typeof(Logger)); -builder.Services.AddScoped>((ctx) => +builder.Services.AddScoped>((ctx) => { - ElasticConnectionParameters cparams = ctx.GetRequiredService(); - IConnectElastic recordAssetsElasticConnection = new ElasticConnection(cparams); + OpenSearchConnectionParameters cparams = ctx.GetRequiredService(); + IConnectOpenSearch recordAssetsElasticConnection = new OpenSearchConnection(cparams); return recordAssetsElasticConnection; }); @@ -31,22 +31,22 @@ // Get the categories form either Mongo or Elastic switch (categorySource) { - case CategorySource.Elastic: + case CategorySource.OpenSearch: // Categories connection info - builder.Services.AddTransient>((ctx) => + builder.Services.AddTransient>((ctx) => { - CategoryDataElasticConnectionParameters categoryDataElasticConnParams = config.GetSection("CategoryElasticParams").Get(); - IConnectElastic categoriesElasticConnection = new ElasticConnection(categoryDataElasticConnParams); + CategoryDataOpenSearchConnectionParameters categoryDataElasticConnParams = config.GetSection("CategoryOpenSearchParams").Get(); + IConnectOpenSearch categoriesElasticConnection = new OpenSearchConnection(categoryDataElasticConnParams); return categoriesElasticConnection; }); // category list repo using category connection info. - builder.Services.AddTransient((ctx) => + builder.Services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); - ElasticCategoryRepository categoryRepo = new ElasticCategoryRepository(elasticConnectionInfo, mapper); + IConnectOpenSearch elasticConnectionInfo = ctx.GetRequiredService>(); + OpenSearchCategoryRepository categoryRepo = new OpenSearchCategoryRepository(elasticConnectionInfo, mapper); return categoryRepo; }); @@ -74,9 +74,9 @@ builder.Services.AddTransient((ctx) => { IMapper mapper = ctx.GetRequiredService(); - IConnectElastic elasticConnectionInfo = ctx.GetRequiredService>(); + IConnectOpenSearch elasticConnectionInfo = ctx.GetRequiredService>(); LuceneHelperTools luceneHelperTools = ctx.GetRequiredService(); - ElasticIAViewRepository iaRepo = new ElasticIAViewRepository(elasticConnectionInfo, luceneHelperTools, mapper); + OpenSearchIAViewRepository iaRepo = new OpenSearchIAViewRepository(elasticConnectionInfo, luceneHelperTools, mapper); return iaRepo; }); diff --git a/tna.taxonomy.api/Properties/Resources.Designer.cs b/tna.taxonomy.api/Properties/Resources.Designer.cs new file mode 100644 index 0000000..00e2cf5 --- /dev/null +++ b/tna.taxonomy.api/Properties/Resources.Designer.cs @@ -0,0 +1,63 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace tna.taxonomy.api.Properties { + using System; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "17.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Resources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Resources() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("tna.taxonomy.api.Properties.Resources", typeof(Resources).Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + } +} diff --git a/tna.taxonomy.api/Properties/Resources.resx b/tna.taxonomy.api/Properties/Resources.resx new file mode 100644 index 0000000..4fdb1b6 --- /dev/null +++ b/tna.taxonomy.api/Properties/Resources.resx @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 1.3 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.3500.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/tna.taxonomy.api/appsettings.json b/tna.taxonomy.api/appsettings.json index 018e792..d9f8091 100644 --- a/tna.taxonomy.api/appsettings.json +++ b/tna.taxonomy.api/appsettings.json @@ -6,13 +6,13 @@ "LogFilePath": "Logs\\log-{Date}.txt" }, "AllowedHosts": "*", - "DiscoveryElasticParams-lobapp2": { - "Scheme": "http", - "Host": "xx.web.local", - "Port": "9200", - "IndexDatabase": "discovery_records_dev" + "DiscoveryOpenSearchParams": { + "Scheme": "https", + "Host": "", + "Port": "80", + "IndexDatabase": "discovery_records" }, - "CategoryElasticParams": { + "CategoryOpenSearchParams": { "Scheme": "http", "Host": "xx.web.local", "Port": "9200", @@ -21,7 +21,7 @@ "CategoryMongoParams": { "ConnectionString": "mongodb+srv://", "DatabaseName": "taxonomy", - "CollectionName": "elastic-categories" + "CollectionName": "categories" }, "CategorySource": "Mongo", "CategoriserLuceneParams": { diff --git a/tna.taxonomy.api/tna.taxonomy.api.csproj b/tna.taxonomy.api/tna.taxonomy.api.csproj index e5282b4..452ce20 100644 --- a/tna.taxonomy.api/tna.taxonomy.api.csproj +++ b/tna.taxonomy.api/tna.taxonomy.api.csproj @@ -1,4 +1,4 @@ - + net8.0 @@ -16,4 +16,19 @@ + + + True + True + Resources.resx + + + + + + ResXFileCodeGenerator + Resources.Designer.cs + + + From 9230e328b9ded6565891bbdd816a0f48954e903b Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Fri, 15 Nov 2024 13:04:31 +0000 Subject: [PATCH 05/22] Various updates including Nuget packages and configuring projects to use User Secrets for local development. --- .gitignore | 2 ++ .../NationalArchives.Taxonomy.Batch.csproj | 11 ++++--- NationalArchives.Taxonomy.Batch/Program.cs | 8 ++--- .../appsettings.json | 4 +-- .../NationalArchives.Taxonomy.CLI.csproj | 2 ++ TaxonomyCLI/Program.cs | 3 +- TaxonomyCLI/appsettings.json | 30 +++++++++---------- tna.taxonomy.api/Program.cs | 3 +- .../Properties/launchSettings.json | 7 ++--- tna.taxonomy.api/tna.taxonomy.api.csproj | 3 +- 10 files changed, 39 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 0fddaa2..14037a2 100644 --- a/.gitignore +++ b/.gitignore @@ -268,3 +268,5 @@ aws-logger-errors.txt /Dockerfile_web /Dockerfile_api /run +/NationalArchives.Taxonomy.Batch.Update.Elastic/Properties/launchSettings.json +/tna.taxonomy.api/Properties/launchSettings.json diff --git a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj index 566af2d..a9886ac 100644 --- a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj +++ b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj @@ -4,6 +4,7 @@ Exe net8.0 win-x64 + 78174f14-bf9a-47ee-8ef8-e2ec7284aa7a @@ -12,10 +13,12 @@ - - - - + + + + + + diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index ca7cc26..5773f71 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -53,7 +53,8 @@ public static void Main(string[] args) serviceLogger.LogInformation("Starting the taxonomy generator."); } - CreateHostBuilder(args).Build().Run(); + var builder = CreateHostBuilder(args); + builder.Build().Run(); } catch (Exception e) { @@ -86,6 +87,7 @@ public static IHostBuilder CreateHostBuilder(string[] args) => }).ConfigureAppConfiguration((hostingContext, config) => { config.AddEnvironmentVariables("TAXONOMY_"); + config.AddUserSecrets(); }).UseWindowsService(); private static void ConfigureServicesForHost(HostBuilderContext context, IServiceCollection services) @@ -104,7 +106,6 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic CategorisationParams categorisationParams = config.GetSection(categorisationParamsConfigSource).Get(); - services.AddAutoMapper(mc => mc.AddMaps(new[] { "NationalArchives.Taxonomy.Common" })); services.AddSingleton(); @@ -120,16 +121,13 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton(typeof(ILogger), typeof(Logger)); } - DiscoveryOpenSearchConnectionParameters discoveryOpenSearchConnParams = config.GetSection("DiscoveryOpenSearchParams").Get(); - services.AddSingleton(categorisationParams); // Need to add as a service as FullReindexService and DailyUpdate service are instantiated via AddHostedService where we can't pass parameters directly. CategoriserLuceneParams categoriserLuceneParams = config.GetSection("CategoriserLuceneParams").Get(); - //params for update staging queue. UpdateStagingQueueParams updateStagingQueueParams = config.GetSection("UpdateStagingQueueParams").Get(); services.AddSingleton(updateStagingQueueParams); diff --git a/NationalArchives.Taxonomy.Batch/appsettings.json b/NationalArchives.Taxonomy.Batch/appsettings.json index ab073f5..e2ecafa 100644 --- a/NationalArchives.Taxonomy.Batch/appsettings.json +++ b/NationalArchives.Taxonomy.Batch/appsettings.json @@ -28,7 +28,7 @@ "RoleArn": "arn:aws:iam::xxx:role/elastic_update_role", "AccessKey": "??", "SecretKey": "??" - //we've to move away from using this and use the profile to access AWS' + //we've to move away from using this and use the profile to access AWS' } }, "OpenSearchAssetFetchParams": { @@ -78,5 +78,5 @@ "CollectionName": "categories" }, "CategorySource": "Mongo", - "OperationMode": "Daily_Update" // "Daily_Update" or "Full_Reindex" + "OperationMode": "Full_Reindex" // "Daily_Update" or "Full_Reindex" } \ No newline at end of file diff --git a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj index dd2dd61..b0af319 100644 --- a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj +++ b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj @@ -6,6 +6,7 @@ NationalArchives.Taxonomy.CLI NationalArchives.Taxonomy.CLI win-x64;win-x86 + a3e63ff7-6874-4ea5-9823-e3b48647959e @@ -16,6 +17,7 @@ + diff --git a/TaxonomyCLI/Program.cs b/TaxonomyCLI/Program.cs index b05cb88..cdc1783 100644 --- a/TaxonomyCLI/Program.cs +++ b/TaxonomyCLI/Program.cs @@ -40,7 +40,8 @@ static int Main(string[] args) .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true) .AddJsonFile($"appsettings.{environmentName}.json", optional: true, reloadOnChange: true) - .AddEnvironmentVariables("TAXONOMY_"); + .AddEnvironmentVariables("TAXONOMY_") + .AddUserSecrets(); var config = builder.Build(); IServiceProvider provider = ConfigureServices(config, args); diff --git a/TaxonomyCLI/appsettings.json b/TaxonomyCLI/appsettings.json index 6f00294..ba9d9fc 100644 --- a/TaxonomyCLI/appsettings.json +++ b/TaxonomyCLI/appsettings.json @@ -8,35 +8,35 @@ "**/platforms" ], "DiscoveryOpenSearchParams": { - "Scheme": "https", - "Host": "vpc-", - "Port": "443", - "IndexDatabase": "discovery_records", + "Scheme": "??", + "Host": "??", + "Port": "??", + "IndexDatabase": "??", "OpenSearchAwsParams": { - "UseAwsConnection": "true", - "Region": "eu-west-2", - "RoleArn": "FROM_Env_Var", - "AccessKey": "FROM_Env_Var", - "SecretKey": "FROM_Env_Var" + "UseAwsConnection": "??", + "Region": "??", + "RoleArn": "??", + "AccessKey": "??", + "SecretKey": "??" } }, "CategoryOpenSearchParams": { "Scheme": "https", - "Host": "vpc-", + "Host": "??", "Port": "443", "IndexDatabase": "categories_elastic" }, "CategoryMongoParams": { - "ConnectionString": "mongodb://", - "DatabaseName": "taxonomy", - "CollectionName": "categories_elastic2_new_fieldnames" + "ConnectionString": "mongodb+srv://", + "DatabaseName": "??", + "CollectionName": "??" }, "CategorySource": "Mongo", "UpdateStagingQueueParams": { - "Uri": "activemq:tcp://localhost:61616", - "QueueName": "taxonomy.results1", + "Uri": "??", + "QueueName": "??", "PostUpdates": "false" }, "CategoriserLuceneParams": { diff --git a/tna.taxonomy.api/Program.cs b/tna.taxonomy.api/Program.cs index a6ceff1..5a7048e 100644 --- a/tna.taxonomy.api/Program.cs +++ b/tna.taxonomy.api/Program.cs @@ -12,6 +12,7 @@ var builder = WebApplication.CreateBuilder(args); var config = builder.Configuration; +config.AddEnvironmentVariables("TAXONOMY_"); // Add services to the container. builder.Services.AddAutoMapper(mc => mc.AddMaps(new[] { "NationalArchives.Taxonomy.Common" })); @@ -27,7 +28,7 @@ return recordAssetsElasticConnection; }); -CategorySource categorySource = (CategorySource)Enum.Parse(typeof(CategorySource), config.GetValue("CategorySource")); + CategorySource categorySource = (CategorySource)Enum.Parse(typeof(CategorySource), config.GetValue("CategorySource")); // Get the categories form either Mongo or Elastic switch (categorySource) { diff --git a/tna.taxonomy.api/Properties/launchSettings.json b/tna.taxonomy.api/Properties/launchSettings.json index ad90c74..996c325 100644 --- a/tna.taxonomy.api/Properties/launchSettings.json +++ b/tna.taxonomy.api/Properties/launchSettings.json @@ -14,17 +14,14 @@ "dotnetRunMessages": true, "launchBrowser": true, "launchUrl": "swagger", - "applicationUrl": "http://localhost:5114", - "environmentVariables": { - "ASPNETCORE_ENVIRONMENT": "Production" - } + "applicationUrl": "http://localhost:5114" }, "IIS Express": { "commandName": "IISExpress", "launchBrowser": true, "launchUrl": "swagger", "environmentVariables": { - "ASPNETCORE_ENVIRONMENT": "Production" + "ASPNETCORE_ENVIRONMENT": "Development" } } } diff --git a/tna.taxonomy.api/tna.taxonomy.api.csproj b/tna.taxonomy.api/tna.taxonomy.api.csproj index 452ce20..a5cd3c1 100644 --- a/tna.taxonomy.api/tna.taxonomy.api.csproj +++ b/tna.taxonomy.api/tna.taxonomy.api.csproj @@ -1,9 +1,10 @@ - + net8.0 enable enable + 37148cc4-54a2-4455-8ad6-677881b1d563 From a35d58029f10cc3c869f93ffdb6641db63c01b4b Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Fri, 15 Nov 2024 15:51:42 +0000 Subject: [PATCH 06/22] Further updaes to various Nuget packages --- ...alArchives.Taxonomy.Batch.UnitTests.csproj | 4 ++- ...es.Taxonomy.Batch.Update.OpenSearch.csproj | 16 ++++++------ .../NationalArchives.Taxonomy.Batch.csproj | 25 ++++++++++--------- ...lArchives.Taxonomy.Common.UnitTests.csproj | 4 ++- .../Elastic/AbstractOpenSearchRespository.cs | 4 --- .../NationalArchives.Taxonomy.Common.csproj | 20 +++++++++------ .../NationalArchives.Taxonomy.CLI.csproj | 17 +++++++------ ...onalArchives.Taxonomy.CLI.UnitTests.csproj | 4 ++- tna.taxonomy.api/Program.cs | 1 + 9 files changed, 53 insertions(+), 42 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj b/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj index 9e78b48..3696b12 100644 --- a/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj +++ b/NationalArchives.Taxonomy.Batch.UnitTests/NationalArchives.Taxonomy.Batch.UnitTests.csproj @@ -9,7 +9,9 @@ - + + + diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj index ea38947..d52cd5b 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj @@ -7,14 +7,14 @@ - - - - - - - - + + + + + + + + diff --git a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj index a9886ac..8f8721b 100644 --- a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj +++ b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj @@ -19,18 +19,19 @@ - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj index 9292c40..4073c1a 100644 --- a/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj +++ b/NationalArchives.Taxonomy.Common.UnitTests/NationalArchives.Taxonomy.Common.UnitTests.csproj @@ -11,7 +11,9 @@ - + + + diff --git a/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs b/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs index d717a9e..8a2e752 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/AbstractOpenSearchRespository.cs @@ -1,8 +1,4 @@ using AutoMapper; -using Nest; -using System; -using System.Collections.Generic; -using System.Text; namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { diff --git a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj index 1536ed1..8a973a6 100644 --- a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj +++ b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj @@ -14,13 +14,13 @@ - - - - + + + + + + - - @@ -30,7 +30,13 @@ - + + + + + + + diff --git a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj index b0af319..132fee5 100644 --- a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj +++ b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj @@ -11,14 +11,15 @@ - - - - - - - - + + + + + + + + + diff --git a/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj b/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj index 617b89e..00f3d8f 100644 --- a/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj +++ b/TaxonomyCliUnitTests/NationalArchives.Taxonomy.CLI.UnitTests.csproj @@ -9,7 +9,9 @@ - + + + diff --git a/tna.taxonomy.api/Program.cs b/tna.taxonomy.api/Program.cs index 5a7048e..a2f74c5 100644 --- a/tna.taxonomy.api/Program.cs +++ b/tna.taxonomy.api/Program.cs @@ -13,6 +13,7 @@ var config = builder.Configuration; config.AddEnvironmentVariables("TAXONOMY_"); +config.AddUserSecrets(); // Add services to the container. builder.Services.AddAutoMapper(mc => mc.AddMaps(new[] { "NationalArchives.Taxonomy.Common" })); From 8c0d01f0dc7e87008295e126e93040e53c9707b1 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Mon, 18 Nov 2024 11:03:37 +0000 Subject: [PATCH 07/22] Fixed MongoCategoryRepository so that retrieval of categories from MongoDB only occurs once and other threads then use the caches version. --- NationalArchives.Taxonomy.Batch/Program.cs | 2 +- .../Domain/Queue/ActiveMqUpdateSender.cs | 1 - .../Mongo/MongoCategoryRepository.cs | 73 +++++++++++-------- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index 5773f71..61933a9 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -179,7 +179,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic case CategorySource.Mongo: //Mongo categories - services.AddTransient((ctx) => + services.AddSingleton((ctx) => { IMapper mapper = ctx.GetRequiredService(); MongoConnectionParams categoryDataMongoConnParams = config.GetSection("CategoryMongoParams").Get(); diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs index 2d03808..037e744 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs @@ -179,7 +179,6 @@ private void Consume1() while (!IsComplete() && !_token.IsCancellationRequested) { - if (_sendErrors.Count >= _maxSendErrors) { if (!_tcs.Task.IsFaulted) //Only one worker should set this as calling repeatedly causes an exception diff --git a/Taxonomy.Common/Domain/Repository/Mongo/MongoCategoryRepository.cs b/Taxonomy.Common/Domain/Repository/Mongo/MongoCategoryRepository.cs index 5159a93..0a5fc06 100644 --- a/Taxonomy.Common/Domain/Repository/Mongo/MongoCategoryRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Mongo/MongoCategoryRepository.cs @@ -7,16 +7,18 @@ using System.Collections.Generic; using System.Threading.Tasks; using System.Linq; +using System.Threading; namespace NationalArchives.Taxonomy.Common.Domain.Repository.Mongo { - public sealed class MongoCategoryRepository : ICategoryRepository + public sealed class MongoCategoryRepository : ICategoryRepository, IDisposable { private static IList _categories; private readonly IMapper _mapper; private IMongoCollection m_MongoCollection = null; + private readonly SemaphoreSlim _semaphore = new SemaphoreSlim(1, 1); public MongoCategoryRepository(MongoConnectionParams mongoConnectionParams, IMapper mapper) { @@ -49,52 +51,60 @@ public long Count() public async Task> FindAll() { - if (_categories != null) - { - return _categories; - } - + await _semaphore.WaitAsync(); try { - FilterDefinition filter = FilterDefinition.Empty; - var categories = new List(); - - using (IAsyncCursor mongoCategoriesCursor = await m_MongoCollection.FindAsync(filter)) + if (_categories != null) + { + return _categories; + } + try { - while (await mongoCategoriesCursor.MoveNextAsync()) + FilterDefinition filter = FilterDefinition.Empty; + var categories = new List(); + + using (IAsyncCursor mongoCategoriesCursor = await m_MongoCollection.FindAsync(filter)) { - IEnumerable batch = mongoCategoriesCursor.Current; - foreach (CategoryFromMongo mongoCategory in batch) + while (await mongoCategoriesCursor.MoveNextAsync()) { - Category category = _mapper.Map(mongoCategory); - - foreach (string s in new string []{category.Id, category.Query, category.Title }) + IEnumerable batch = mongoCategoriesCursor.Current; + foreach (CategoryFromMongo mongoCategory in batch) { - if (String.IsNullOrWhiteSpace(s)) + Category category = _mapper.Map(mongoCategory); + + foreach (string s in new string[] { category.Id, category.Query, category.Title }) { - throw new ApplicationException($"Error retreiving category data from Mongo Collection {m_MongoCollection.CollectionName()}, database {m_MongoCollection.DatabaseName()}, server {m_MongoCollection.Server()}. Current Category: {category}."); + if (String.IsNullOrWhiteSpace(s)) + { + throw new ApplicationException($"Error retreiving category data from Mongo Collection {m_MongoCollection.CollectionName()}, database {m_MongoCollection.DatabaseName()}, server {m_MongoCollection.Server()}. Current Category: {category}."); + } } + categories.Add(category); } - categories.Add(category); } } - } - if (categories.Count > 0) - { - _categories = categories; + if (categories.Count > 0) + { + _categories = categories; + } + else + { + throw new ApplicationException($"Could not retrieve category information from Mongo collection {m_MongoCollection.CollectionName()}, database {m_MongoCollection.DatabaseName()}, server {m_MongoCollection.Server()}."); + } + + return categories; } - else + catch (Exception ex) { - throw new ApplicationException($"Could not retrieve category information from Mongo collection {m_MongoCollection.CollectionName()}, database {m_MongoCollection.DatabaseName()}, server {m_MongoCollection.Server()}."); + throw; } - - return categories; } - catch (Exception ex) + finally { - throw; + _semaphore.Release(); } + } public Category FindByCiaid(string ciaid) @@ -119,5 +129,10 @@ public void Save(Category category) { throw new NotImplementedException(); } + + public void Dispose() + { + _semaphore?.Dispose(); + } } } From c4faf9e3ce505660cb33d177ca6a95883761d0e1 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Tue, 19 Nov 2024 12:30:40 +0000 Subject: [PATCH 08/22] 1. Replaced BinaryFormatter with BinaryReader amd BinaryWriter for serialising and derseralising categorisation results from ActiveMQ. 2. Added unit test for the new serialisation/derialisation. 3. Added User Secrets to taxonomy Update project --- ...es.Taxonomy.Batch.Update.OpenSearch.csproj | 1 + .../Program.cs | 1 + .../appsettings.json | 4 +- .../SerialisationTests.cs | 36 +++++ .../Queue/ActiveMqDirectUpdateSender.cs | 8 +- .../Domain/Queue/ActiveMqUpdateReceiver.cs | 3 +- .../Domain/Queue/ActiveMqUpdateSender.cs | 6 +- .../Helpers/IaidWithCategoriesSerialiser.cs | 124 +++++++++++++----- .../Service/Impl/UpdateOpenSearchService.cs | 1 - 9 files changed, 139 insertions(+), 45 deletions(-) create mode 100644 NationalArchives.Taxonomy.Common.UnitTests/SerialisationTests.cs diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj index d52cd5b..611e5c5 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj @@ -4,6 +4,7 @@ Exe net8.0 true + 0acc472f-079b-40ce-8c52-b94d0f5becba diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index 70ac3f5..a41f297 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -65,6 +65,7 @@ public static IHostBuilder CreateHostBuilder(string[] args) => }).ConfigureAppConfiguration((hostingContext, config) => { config.AddEnvironmentVariables("TAXONOMY_"); + config.AddUserSecrets(); }).UseWindowsService(); diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index 7b6b90e..a12fa26 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -35,8 +35,8 @@ } }, "UpdateStagingQueueParams": { - "Uri": "ssl://b-XS:61617", - "QueueName": "taxonomy.results2", + "Uri": "*", + "QueueName": "*", "MaxSize": "30000000", "UserName": "", "Password": "" diff --git a/NationalArchives.Taxonomy.Common.UnitTests/SerialisationTests.cs b/NationalArchives.Taxonomy.Common.UnitTests/SerialisationTests.cs new file mode 100644 index 0000000..3745b21 --- /dev/null +++ b/NationalArchives.Taxonomy.Common.UnitTests/SerialisationTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using NationalArchives.Taxonomy.Common.BusinessObjects; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using NationalArchives.Taxonomy.Common.Helpers; + +namespace NationalArchives.Taxonomy.Common.UnitTests +{ + [TestClass] + public class SerialisationTests + { + [TestMethod] + public void IaidWithCategories_Serialisation() + { + var iaidWithCategories1 = new IaidWithCategories("C12345", new List() {"C10161", "C10272", "C10383", "C10494", "C10505" }); + var iaidWithCategories2 = new IaidWithCategories("C54321", new List() { "C76757" }); + var iaidWithCategories3 = new IaidWithCategories("C67890", new List() {}); + var iaidWithCategories4 = new IaidWithCategories("C90818", new List() { "C40303" }); + + var categorisationResults = new List() { iaidWithCategories1, iaidWithCategories2, iaidWithCategories3, iaidWithCategories4 }; + + byte[] serialisedResults = categorisationResults.ToByteArray(); + List deserialisedResults = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(serialisedResults); + + Assert.AreEqual(categorisationResults.Count, deserialisedResults.Count); + + foreach (IaidWithCategories categorisation in categorisationResults) + { + Assert.IsTrue(deserialisedResults.Contains(categorisation)); + } + } + } +} diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs index 52157c7..394b90f 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs @@ -77,15 +77,11 @@ public bool Enqueue(IaidWithCategories item, CancellationToken token) } try { - //string itemString = JsonConvert.SerializeObject(item); - //var textMessage = m_Producer.CreateTextMessage(itemString); - // m_Producer.Send(textMessage); - - byte[] serialisedResult = item.ToByteArray(); + var itemAsList = new List() { item }; + byte[] serialisedResult = itemAsList.ToByteArray(); var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); m_Producer.Send(bytesMessage); - return true; } catch (Exception e) diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs index db35011..63c8bf2 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs @@ -113,7 +113,8 @@ public List DeQueueNextListOfIaidsWithCategories() if (nextBytesMessage != null) { - List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(nextBytesMessage); + byte[] bytes = nextBytesMessage.Content; + List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); return nextBatchFromInterimQueue; } else diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs index 037e744..f4ac3a4 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs @@ -14,14 +14,12 @@ namespace NationalArchives.Taxonomy.Common.Domain.Queue { public class ActiveMqUpdateSender : IUpdateStagingQueueSender, IDisposable { - private readonly ConnectionFactory _activeMqConnectionFactory; private readonly IConnection _activeMqConnection; private readonly ISession _activeMqSession; private readonly IDestination _activeMqdestination; private readonly IMessageProducer _activeMqProducer; - private BlockingCollection _blockingCollection = new BlockingCollection(); private CancellationToken _token = default; @@ -176,7 +174,6 @@ public IReadOnlyCollection QueueUpdateErrors private void Consume1() { - while (!IsComplete() && !_token.IsCancellationRequested) { if (_sendErrors.Count >= _maxSendErrors) @@ -191,7 +188,6 @@ private void Consume1() var currentBatch = new List(_batchSize); - for (int i = 0; i < _batchSize && (!IsComplete() && !_token.IsCancellationRequested); i++) { IaidWithCategories nextResult; @@ -202,12 +198,12 @@ private void Consume1() { currentBatch.Add(nextResult); } - } if (currentBatch.Count > 0) { byte[] serialisedResults = currentBatch.ToByteArray(); + try { var bytesMessage = _activeMqProducer.CreateBytesMessage(serialisedResults); diff --git a/Taxonomy.Common/Helpers/IaidWithCategoriesSerialiser.cs b/Taxonomy.Common/Helpers/IaidWithCategoriesSerialiser.cs index 5400eec..594a535 100644 --- a/Taxonomy.Common/Helpers/IaidWithCategoriesSerialiser.cs +++ b/Taxonomy.Common/Helpers/IaidWithCategoriesSerialiser.cs @@ -1,5 +1,6 @@ using Apache.NMS; using NationalArchives.Taxonomy.Common.BusinessObjects; +using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.IO; @@ -10,53 +11,116 @@ namespace NationalArchives.Taxonomy.Common.Helpers { internal static class IaidWithCategoriesSerialiser { + private const string CATEGORISATION_RESULTS_START = "Start of Categorisation Results."; + private const string CATEGORISATION_RESULT_START = "Categorisation Result:"; + private const string CATEGORISATION_RESULT_END = "Result End."; + private const string CATEGORISATION_RESULTS_END = "End of Categorisation Results."; + + private const string UNEXPECTED_READER_OUTPUT = "Unexpected ouput when deserialising Categorisation Results from queue."; public static byte[] ToByteArray(this List categorisationResult) { - BinaryFormatter bf = new BinaryFormatter(); - using (var ms = new MemoryStream()) + using (var stream = new MemoryStream()) { - bf.Serialize(ms, categorisationResult); - return ms.ToArray(); - } + using (var writer = new BinaryWriter(stream, Encoding.UTF8, false)) + { + writer.Write(CATEGORISATION_RESULTS_START); + foreach (IaidWithCategories item in categorisationResult) + { + writer.Write(CATEGORISATION_RESULT_START); + writer.Write(item.Iaid); + foreach (string s in item.CategoryIds) + { + writer.Write(s); + } + writer.Write(CATEGORISATION_RESULT_END); + } + writer.Write(CATEGORISATION_RESULTS_END); + } + return stream.ToArray(); + } } - + + [Obsolete] public static byte[] ToByteArray(this IaidWithCategories categorisationResult) { - BinaryFormatter bf = new BinaryFormatter(); - using (var ms = new MemoryStream()) + using (var stream = new MemoryStream()) { - bf.Serialize(ms, categorisationResult); - return ms.ToArray(); + using (var writer = new BinaryWriter(stream, Encoding.UTF8, false)) + { + writer.Write(categorisationResult.Iaid); + foreach(string s in categorisationResult.CategoryIds) + { + writer.Write(s); + } + } + return stream.ToArray(); } } - - internal static List IdxMessageToListOfIaidsWithCategories(IBytesMessage msg) + internal static List IdxMessageToListOfIaidsWithCategories(byte[] bytes) { - List returnList = null; - - using (var memStream = new MemoryStream()) - { - var binForm = new BinaryFormatter(); - memStream.Write(msg.Content, 0, msg.Content.Length); - memStream.Seek(0, SeekOrigin.Begin); - var obj = binForm.Deserialize(memStream); + var deserialisedResults = new List(); - switch(obj) + using (var stream = new MemoryStream(bytes)) + { + using (var reader = new BinaryReader(stream, Encoding.UTF8, false)) { - case List lc: - returnList = lc; - break; - case IaidWithCategories singleResult: - returnList = new List() { singleResult }; - break; - default: - throw new TaxonomyException("Unable to deserialise categorisation result(s) from queue message."); + string resultsStart = reader.ReadString(); + { + if(resultsStart != CATEGORISATION_RESULTS_START) + { + throw new TaxonomyException(UNEXPECTED_READER_OUTPUT); + } + } + + while(true) + { + string next = reader.ReadString(); + if (next == CATEGORISATION_RESULTS_END || String.IsNullOrEmpty(next)) + { + break; + } + else + { + if (next == CATEGORISATION_RESULT_START) + { + var nextCatResult = GetResult(reader); + deserialisedResults.Add(nextCatResult); + } + else + { + throw new TaxonomyException(UNEXPECTED_READER_OUTPUT); + } + } + } } - return returnList; + } + + return deserialisedResults; } + + private static IaidWithCategories GetResult(BinaryReader reader) + { + string iaid = reader.ReadString(); + List categories = new List(); + + string next; + + while (true) + { + next = reader.ReadString(); + + if (next == CATEGORISATION_RESULT_END || String.IsNullOrEmpty(next)) + { + break; + } + categories.Add(next); + }; + + return new IaidWithCategories(iaid, categories); + } } } diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 73d553d..01631bd 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -171,7 +171,6 @@ void SubmitUpdatesToOpenSearchDatabase() throw; } } - private void BulkUpdateCategoriesOnIAViews(IList listOfIAViewUpdatesToProcess) { From ed3447a7dcd5d8e49591d60ce06f4dafaac62ce0 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Wed, 20 Nov 2024 16:13:18 +0000 Subject: [PATCH 09/22] 1. Correct nlog config spelling.2. Added null check on Timer dispose --- NationalArchives.Taxonomy.Batch/nlog.config | 2 +- Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch/nlog.config b/NationalArchives.Taxonomy.Batch/nlog.config index 4a1fc4d..12d631e 100644 --- a/NationalArchives.Taxonomy.Batch/nlog.config +++ b/NationalArchives.Taxonomy.Batch/nlog.config @@ -17,6 +17,6 @@ - + \ No newline at end of file diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs index f4ac3a4..c1275b9 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs @@ -91,7 +91,7 @@ public Task Init(CancellationToken token, Action updateQueueProg _updateQueueProgress = updateQueueProgress; _tcs = new TaskCompletionSource(); - Timer notifcationTimer = new Timer(PrintUpdate, null, 60000, 60000) ; + Timer notificationTimer = new Timer(PrintUpdate, null, 60000, 60000) ; var tasks = new List(); @@ -112,7 +112,7 @@ public Task Init(CancellationToken token, Action updateQueueProg } finally { - notifcationTimer.Dispose(); + notificationTimer?.Dispose(); } _initialised = true; From c7c3df7ca1e0d1952e79355fab63e87782f162fd Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Fri, 22 Nov 2024 12:38:05 +0000 Subject: [PATCH 10/22] ActiveMQ connectivity now via Nuget package in place of project within solution. --- ...ationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj | 1 + .../NationalArchives.Taxonomy.Batch.csproj | 2 +- TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj | 1 + ds-discovery-opensearch-taxonomy.sln | 7 ------- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj index 611e5c5..cf4c6eb 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/NationalArchives.Taxonomy.Batch.Update.OpenSearch.csproj @@ -15,6 +15,7 @@ + diff --git a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj index 8f8721b..95a3eb4 100644 --- a/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj +++ b/NationalArchives.Taxonomy.Batch/NationalArchives.Taxonomy.Batch.csproj @@ -29,13 +29,13 @@ + - diff --git a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj index 132fee5..37a5ef7 100644 --- a/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj +++ b/TaxonomyCLI/NationalArchives.Taxonomy.CLI.csproj @@ -20,6 +20,7 @@ + diff --git a/ds-discovery-opensearch-taxonomy.sln b/ds-discovery-opensearch-taxonomy.sln index 5141046..d1328ea 100644 --- a/ds-discovery-opensearch-taxonomy.sln +++ b/ds-discovery-opensearch-taxonomy.sln @@ -27,8 +27,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.C EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.Taxonomy.Common.UnitTests", "NationalArchives.Taxonomy.Common.UnitTests\NationalArchives.Taxonomy.Common.UnitTests.csproj", "{3A5974EC-387D-4390-A634-D4C965096854}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NationalArchives.ActiveMQ", "NationalArchives.ActiveMQ\NationalArchives.ActiveMQ.csproj", "{55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -67,10 +65,6 @@ Global {3A5974EC-387D-4390-A634-D4C965096854}.Debug|Any CPU.Build.0 = Debug|Any CPU {3A5974EC-387D-4390-A634-D4C965096854}.Release|Any CPU.ActiveCfg = Release|Any CPU {3A5974EC-387D-4390-A634-D4C965096854}.Release|Any CPU.Build.0 = Release|Any CPU - {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}.Debug|Any CPU.Build.0 = Debug|Any CPU - {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}.Release|Any CPU.ActiveCfg = Release|Any CPU - {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -84,7 +78,6 @@ Global {C9CC5122-186B-4D60-B9CE-5A6522911446} = {30BB5F98-02D8-435D-9514-666E56332608} {64383CD7-55BC-4F6C-9C31-5B60725F9B08} = {706FCBE1-11AB-4271-AE14-A7B58CCB4507} {3A5974EC-387D-4390-A634-D4C965096854} = {706FCBE1-11AB-4271-AE14-A7B58CCB4507} - {55F32EEA-1946-4B6B-99EF-50ACDB62FEB4} = {706FCBE1-11AB-4271-AE14-A7B58CCB4507} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {E0CFD241-AF85-4C51-ABE1-B03ACB6C3EA0} From fe7c205c415a7e04a3625dcb68b9955113815c92 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 5 Dec 2024 08:46:29 +0000 Subject: [PATCH 11/22] Some changes to appsettings including pre-populating some values which probably don't need to be secrets. --- .../appsettings.json | 2 +- TaxonomyCLI/appsettings.json | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index a12fa26..29dbcd8 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -29,7 +29,7 @@ "OpenSearchAwsParams": { "UseAwsConnection": "true", "Region": "eu-west-2", - "RoleArn": "arn:aws:iam::968803923593:role/elastic_update_role", + "RoleArn": "??", "AccessKey": "??", "SecretKey": "??" } diff --git a/TaxonomyCLI/appsettings.json b/TaxonomyCLI/appsettings.json index ba9d9fc..1d123a3 100644 --- a/TaxonomyCLI/appsettings.json +++ b/TaxonomyCLI/appsettings.json @@ -8,10 +8,10 @@ "**/platforms" ], "DiscoveryOpenSearchParams": { - "Scheme": "??", + "Scheme": "https", "Host": "??", - "Port": "??", - "IndexDatabase": "??", + "Port": "443", + "IndexDatabase": "discovery_records", "OpenSearchAwsParams": { "UseAwsConnection": "??", "Region": "??", @@ -29,8 +29,8 @@ }, "CategoryMongoParams": { "ConnectionString": "mongodb+srv://", - "DatabaseName": "??", - "CollectionName": "??" + "DatabaseName": "taxonomy", + "CollectionName": "categories" }, "CategorySource": "Mongo", From b6f963a96cc42448343ae9fd8ba3265493844f23 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Fri, 6 Dec 2024 15:51:09 +0000 Subject: [PATCH 12/22] Initial work on migrating staging queue to Amazon SQS --- .../Queue/AmazonSqsDirectUpdateSender.cs | 114 ++++++++ .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 126 +++++++++ .../Domain/Queue/AmazonSqsUpdateSender.cs | 265 ++++++++++++++++++ .../NationalArchives.Taxonomy.Common.csproj | 5 +- 4 files changed, 508 insertions(+), 2 deletions(-) create mode 100644 Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs create mode 100644 Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs create mode 100644 Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs new file mode 100644 index 0000000..a56f451 --- /dev/null +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs @@ -0,0 +1,114 @@ +using Apache.NMS; +using Apache.NMS.ActiveMQ; +using NationalArchives.Taxonomy.Common.BusinessObjects; +using NationalArchives.Taxonomy.Common.Helpers; +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace NationalArchives.Taxonomy.Common.Domain.Queue +{ + public class AmazonSqsDirectUpdateSender : IUpdateStagingQueueSender, IDisposable + { + private readonly ConnectionFactory m_ConnectionFactory; + private readonly IConnection m_Connection; + private readonly ISession m_Session; + private readonly IDestination m_destination; + private readonly IMessageProducer m_Producer; + + private bool _addingCompleted; + + /// + /// Implementation of IUpdateStagingQueueSender where updates are sent + /// directly to an ActiveMQ instance. + /// + /// + public AmazonSqsDirectUpdateSender(UpdateStagingQueueParams qParams) + { + if(!qParams.PostUpdates) + { + return; + } + + m_ConnectionFactory = new ConnectionFactory(qParams.Uri); + + if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + { + m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + } + else + { + m_Connection = m_ConnectionFactory.CreateConnection(); + } + + m_Connection.Start(); + m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); + m_destination = m_Session.GetQueue(qParams.QueueName); + m_Producer = m_Session.CreateProducer(m_destination); + } + + + public Task Init(CancellationToken token, Action updateQueueProgress) + { + throw new NotImplementedException(); + } + + /// + /// + /// + /// + /// Returns true for compatibility with other possible queue implemnenations on the same interface + public bool Enqueue(IaidWithCategories item, CancellationToken token) + { + if(m_Producer == null) + { + return false; + } + + if(token.IsCancellationRequested) + { + return false; + } + + if (item == null) + { + throw new TaxonomyException("No item supplied for interim queue update request!"); + } + try + { + var itemAsList = new List() { item }; + byte[] serialisedResult = itemAsList.ToByteArray(); + var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); + m_Producer.Send(bytesMessage); + + return true; + } + catch (Exception e) + { + throw; + } + } + + public bool IsAddingCompleted + { + get => _addingCompleted; + } + + public IReadOnlyCollection QueueUpdateErrors => throw new NotImplementedException(); + + public void Dispose() + { + m_Producer?.Dispose(); + m_Session?.Dispose(); + m_Connection?.Dispose(); + } + + + public void CompleteAdding() + { + _addingCompleted = true; + } + + } +} diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs new file mode 100644 index 0000000..c8e382a --- /dev/null +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -0,0 +1,126 @@ +using Apache.NMS; +using Apache.NMS.ActiveMQ; +using NationalArchives.Taxonomy.Common.BusinessObjects; +using NationalArchives.Taxonomy.Common.Helpers; +using Newtonsoft.Json; +using System; +using System.Collections.Generic; + +namespace NationalArchives.Taxonomy.Common.Domain.Queue +{ + public class AmazonSqsUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable + { + private const int FETCH_RETRY_COUNT = 5; + + private readonly ConnectionFactory m_ConnectionFactory; + private readonly IConnection m_Connection; + private readonly ISession m_Session; + private readonly IDestination m_destination; + private readonly IMessageConsumer m_Consumer; + + public AmazonSqsUpdateReceiver(UpdateStagingQueueParams qParams) + { + + if(qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) + { + throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); + } + + try + { + m_ConnectionFactory = new ConnectionFactory(qParams.Uri); + if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + { + m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + } + else + { + m_Connection = m_ConnectionFactory.CreateConnection(); + } + m_Connection.Start(); + m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); + m_destination = m_Session.GetQueue(qParams.QueueName); + m_Consumer = m_Session.CreateConsumer(m_destination); + + } + catch (Exception e) + { + Dispose(); + throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, $"Error establishing a connection to ActiveMQ {qParams.QueueName}, at {qParams.Uri}", e); + } + } + + public IList DequeueIaidsWithCategories(int numberToFetch) + { + throw new NotImplementedException(); + } + + public IaidWithCategories DeQueueNextIaidWithCategories() + { + + IMessage nextItem; + int attempts = 0; + + do + { + nextItem = m_Consumer.ReceiveNoWait(); + attempts++; + } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); + + ITextMessage msg = nextItem as ITextMessage; + + if(msg != null) + { + IaidWithCategories iaidWithCategories = JsonConvert.DeserializeObject(msg.Text); + return iaidWithCategories; + } + else + { + return null; + } + } + + + + + public void Dispose() + { + m_Consumer?.Dispose(); + m_Session?.Dispose(); + m_Connection?.Dispose(); + } + + public List DeQueueNextListOfIaidsWithCategories() + { + IMessage nextItem; + IBytesMessage nextBytesMessage = null; + int attempts = 0; + + do + { + nextItem = m_Consumer.ReceiveNoWait(); + if (nextItem != null) + { + nextBytesMessage = nextItem as IBytesMessage; + + } + else + { + attempts++; + } + } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); + + + if (nextBytesMessage != null) + { + byte[] bytes = nextBytesMessage.Content; + List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); + return nextBatchFromInterimQueue; + } + else + { + return null; + } + } + } +} diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs new file mode 100644 index 0000000..515c2be --- /dev/null +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs @@ -0,0 +1,265 @@ +using Amazon.SQS.Model; +using Amazon.SQS; +using Apache.NMS; +using Apache.NMS.ActiveMQ; +using Microsoft.Extensions.Logging; +using NationalArchives.Taxonomy.Common.BusinessObjects; +using NationalArchives.Taxonomy.Common.Helpers; +using Newtonsoft.Json; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Threading; +using System.Threading.Tasks; + +namespace NationalArchives.Taxonomy.Common.Domain.Queue +{ + public class AmazonSqsUpdateSender : IUpdateStagingQueueSender, IDisposable + { + private readonly ConnectionFactory _activeMqConnectionFactory; + private readonly IConnection _activeMqConnection; + private readonly ISession _activeMqSession; + private readonly IDestination _activeMqdestination; + private readonly IMessageProducer _activeMqProducer; + + private BlockingCollection _blockingCollection = new BlockingCollection(); + private CancellationToken _token = default; + + private readonly int _workerCount; + private readonly int _batchSize; + + private readonly int _maxSendErrors; + private List _sendErrors = new List(); + + private TaskCompletionSource _tcs; + + private volatile int _resultsSent; + + Action _updateQueueProgress; + + private ILogger _logger; + private bool _verboseLoggingEnabled; + + private ThreadLocal _workerResultCount = new ThreadLocal(); + private ThreadLocal _workerMessageCount = new ThreadLocal(); + + private bool _initialised; + + public AmazonSqsUpdateSender(UpdateStagingQueueParams qParams, ILogger logger) + { + if (qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) + { + throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); + } + + try + { + _activeMqConnectionFactory = new ConnectionFactory(qParams.Uri); + if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + { + _activeMqConnection = _activeMqConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + } + else + { + _activeMqConnection = _activeMqConnectionFactory.CreateConnection(); + } + _activeMqConnection.Start(); + _activeMqSession = _activeMqConnection.CreateSession(AcknowledgementMode.AutoAcknowledge); + _activeMqdestination = _activeMqSession.GetQueue(qParams.QueueName); + _activeMqProducer = _activeMqSession.CreateProducer(_activeMqdestination); + + _workerCount = Math.Max(qParams.WorkerCount, 1); + _maxSendErrors = qParams.MaxErrors; + _batchSize = Math.Max(qParams.BatchSize, 1); + + _logger = logger; + _verboseLoggingEnabled = qParams.EnableVerboseLogging; + } + catch (Exception e) + { + Dispose(); + throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, $"Error establishing a connection to ActiveMQ {qParams.QueueName}, at {qParams.Uri}", e); + } + } + + public Task Init(CancellationToken token, Action updateQueueProgress) + { + if(_initialised) + { + return null; + } + + _token = token; + _updateQueueProgress = updateQueueProgress; + _tcs = new TaskCompletionSource(); + + Timer notificationTimer = new Timer(PrintUpdate, null, 60000, 60000) ; + + var tasks = new List(); + + try + { + for (int i = 0; i < _workerCount; i++) + { + Task task = Task.Factory.StartNew(Consume1); + tasks.Add(task); + } + + Task.WaitAll(tasks.ToArray()); + _tcs.SetResult(_sendErrors.Count == 0 ? true : false); + } + catch (Exception ex) + { + _tcs.TrySetException(ex); + } + finally + { + notificationTimer?.Dispose(); + } + + _initialised = true; + return _tcs.Task; + } + + private void PrintUpdate(object data) + { + if (_resultsSent > 0) + { + _updateQueueProgress(_resultsSent, _blockingCollection.Count); + } + } + + public void CompleteAdding() + { + try + { + _blockingCollection.CompleteAdding(); + } + catch (ObjectDisposedException) + { + } + } + + public bool IsAddingCompleted + { + get => _blockingCollection.IsAddingCompleted; + } + + public bool Enqueue(IaidWithCategories item, CancellationToken token = default(CancellationToken)) + { + + + if (item == null || token.IsCancellationRequested) + { + return false; + } + + try + { + _blockingCollection.Add(item); + return true; + } + catch (Exception ex) + { + _sendErrors.Add($"Error adding item to internal queue: {item.ToString()}, {ex.Message}"); + return false; + } + } + + public IReadOnlyCollection QueueUpdateErrors + { + get => new ReadOnlyCollection(_sendErrors); + } + + + + private async Task Consume1() + { + + while (!IsComplete() && !_token.IsCancellationRequested) + { + if (_sendErrors.Count >= _maxSendErrors) + { + if (!_tcs.Task.IsFaulted) //Only one worker should set this as calling repeatedly causes an exception + { + _tcs.TrySetException(new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "The Active MQ update error count has been exceeded.")); + } + CompleteAdding(); + break; + } + + var currentBatch = new List(_batchSize); + + for (int i = 0; i < _batchSize && (!IsComplete() && !_token.IsCancellationRequested); i++) + { + IaidWithCategories nextResult; + + bool gotResult = _blockingCollection.TryTake(out nextResult); + + if(gotResult) + { + currentBatch.Add(nextResult); + } + } + + if (currentBatch.Count > 0) + { + var client = new AmazonSQSClient(); + var request = new SendMessageRequest() + { + //MessageBody = JsonSerializer.Serialize(currentBatch), + MessageBody = JsonConvert.SerializeObject(currentBatch), + QueueUrl = "https://sqs.ap-southeast-2.amazonaws.com/189107071895/youtube-demo" + }; + + var result = await client.SendMessageAsync(request); + } + } + + if(_token.IsCancellationRequested) + { + _logger.LogInformation($"Queue update worker [{Thread.CurrentThread.ManagedThreadId}] terminating following a cancellation request."); + _tcs.TrySetCanceled(); + CompleteAdding(); + } + else + { + _logger.LogInformation($"Queue update worker with thread ID [{Thread.CurrentThread.ManagedThreadId}] is finishing as there are no more results on the internal queue. This worker forwarded {_workerMessageCount.Value} messages containing {_workerResultCount.Value} results."); + } + } + + public void Dispose() + { + try + { + if (_blockingCollection.IsAddingCompleted) + { + CompleteAdding(); + } + } + catch (ObjectDisposedException) + { + } + + _blockingCollection.Dispose(); + _activeMqProducer?.Dispose(); + _activeMqSession?.Dispose(); + _activeMqConnection?.Dispose(); + + } + + private bool IsComplete() + { + try + { + bool isComplete = _blockingCollection.IsCompleted; + return isComplete; + } + catch (Exception) + { + return true; + } + } + } +} diff --git a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj index 8a973a6..f5d1db1 100644 --- a/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj +++ b/Taxonomy.Common/NationalArchives.Taxonomy.Common.csproj @@ -14,8 +14,9 @@ - - + + + From c453d0c21ea56fc81fde2534e7c5484a7fcd6523 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Mon, 9 Dec 2024 15:50:56 +0000 Subject: [PATCH 13/22] Taxonomy Generator now using Amazon SQS, replacing Active/Amazon MQ, for both Full Reindex and Daily Updates. --- NationalArchives.Taxonomy.Batch/Program.cs | 22 +++-- .../appsettings.json | 15 ++- .../Queue/AmazonSqsDirectUpdateSender.cs | 93 ++++++++++++++----- .../Queue/AmazonSqsStagingQueueParams.cs | 23 +++++ .../Domain/Queue/AmazonSqsUpdateSender.cs | 93 +++++++++++++------ Taxonomy.Common/TaxonomyErrorType.cs | 3 + 6 files changed, 191 insertions(+), 58 deletions(-) create mode 100644 Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index 61933a9..dd689d4 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -118,7 +118,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton(typeof(ILogger), typeof(Logger)); if (_operationMode == OperationMode.Full_Reindex) { - services.AddSingleton(typeof(ILogger), typeof(Logger)); + services.AddSingleton(typeof(ILogger), typeof(Logger)); } DiscoveryOpenSearchConnectionParameters discoveryOpenSearchConnParams = config.GetSection("DiscoveryOpenSearchParams").Get(); @@ -129,8 +129,11 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic CategoriserLuceneParams categoriserLuceneParams = config.GetSection("CategoriserLuceneParams").Get(); //params for update staging queue. - UpdateStagingQueueParams updateStagingQueueParams = config.GetSection("UpdateStagingQueueParams").Get(); - services.AddSingleton(updateStagingQueueParams); + //UpdateStagingQueueParams updateStagingQueueParams = config.GetSection("UpdateStagingQueueParams").Get(); + //services.AddSingleton(updateStagingQueueParams); + + AmazonSqsStagingQueueParams awsSqsParams = config.GetSection("AmazonSqsParams").Get(); + services.AddSingleton(awsSqsParams); // IAIDs connection info services.AddTransient>((ctx) => @@ -218,16 +221,21 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton((ctx) => { var logger = ctx.GetRequiredService>(); - UpdateStagingQueueParams qParams = ctx.GetRequiredService(); - return new ActiveMqUpdateSender(qParams, logger); + //UpdateStagingQueueParams qParams = ctx.GetRequiredService(); + //return new ActiveMqUpdateSender(qParams, logger); + AmazonSqsStagingQueueParams qParams = ctx.GetRequiredService(); + return new AmazonSqsUpdateSender(qParams, logger); }); } else { services.AddSingleton((ctx) => { - UpdateStagingQueueParams qParams = ctx.GetRequiredService(); - return new ActiveMqDirectUpdateSender(qParams); + //UpdateStagingQueueParams qParams = ctx.GetRequiredService(); + //return new ActiveMqDirectUpdateSender(qParams); + AmazonSqsStagingQueueParams qParams = ctx.GetRequiredService(); + var logger = ctx.GetRequiredService>(); + return new AmazonSqsDirectUpdateSender(qParams, logger); }); } diff --git a/NationalArchives.Taxonomy.Batch/appsettings.json b/NationalArchives.Taxonomy.Batch/appsettings.json index e2ecafa..116c2b2 100644 --- a/NationalArchives.Taxonomy.Batch/appsettings.json +++ b/NationalArchives.Taxonomy.Batch/appsettings.json @@ -31,6 +31,19 @@ //we've to move away from using this and use the profile to access AWS' } }, + "AmazonSqsParams": { + "QueueUrl": "*", + "UseIntegratedSecurity": "false", + "Region": "eu-west-2", + "RoleArn": "*", + "AccessKey": "*", + "SecretKey": "*", + "WorkerCount": "1", + "MaxErrors": "5", + "BatchSize": "1000", + "EnableVerboseLogging": "true", + "PostUpdates": "true" + }, "OpenSearchAssetFetchParams": { "PageSize": "1000", "ScrollTimeout": "10000", @@ -78,5 +91,5 @@ "CollectionName": "categories" }, "CategorySource": "Mongo", - "OperationMode": "Full_Reindex" // "Daily_Update" or "Full_Reindex" + "OperationMode": "Daily_Update" // "Daily_Update" or "Full_Reindex" } \ No newline at end of file diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs index a56f451..1fb1804 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs @@ -1,7 +1,13 @@ -using Apache.NMS; +using Amazon; +using Amazon.Runtime; +using Amazon.SQS; +using Amazon.SQS.Model; +using Apache.NMS; using Apache.NMS.ActiveMQ; +using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; using NationalArchives.Taxonomy.Common.Helpers; +using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Threading; @@ -19,33 +25,41 @@ public class AmazonSqsDirectUpdateSender : IUpdateStagingQueueSender, IDisposabl private bool _addingCompleted; + private readonly AmazonSqsStagingQueueParams _qParams; + private readonly ILogger _logger; + + private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update_FULL_DAILY_UPDATE"; + /// /// Implementation of IUpdateStagingQueueSender where updates are sent /// directly to an ActiveMQ instance. /// /// - public AmazonSqsDirectUpdateSender(UpdateStagingQueueParams qParams) + public AmazonSqsDirectUpdateSender(AmazonSqsStagingQueueParams qParams, ILogger logger) { if(!qParams.PostUpdates) { return; } - m_ConnectionFactory = new ConnectionFactory(qParams.Uri); + _qParams = qParams; + _logger = logger; - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - { - m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - } - else - { - m_Connection = m_ConnectionFactory.CreateConnection(); - } + //m_ConnectionFactory = new ConnectionFactory(qParams.Uri); + + //if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + //{ + // m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + //} + //else + //{ + // m_Connection = m_ConnectionFactory.CreateConnection(); + //} - m_Connection.Start(); - m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - m_destination = m_Session.GetQueue(qParams.QueueName); - m_Producer = m_Session.CreateProducer(m_destination); + //m_Connection.Start(); + //m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); + //m_destination = m_Session.GetQueue(qParams.QueueName); + //m_Producer = m_Session.CreateProducer(m_destination); } @@ -61,10 +75,10 @@ public Task Init(CancellationToken token, Action updateQueueProg /// Returns true for compatibility with other possible queue implemnenations on the same interface public bool Enqueue(IaidWithCategories item, CancellationToken token) { - if(m_Producer == null) - { - return false; - } + //if(m_Producer == null) + //{ + // return false; + //} if(token.IsCancellationRequested) { @@ -78,9 +92,44 @@ public bool Enqueue(IaidWithCategories item, CancellationToken token) try { var itemAsList = new List() { item }; - byte[] serialisedResult = itemAsList.ToByteArray(); - var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); - m_Producer.Send(bytesMessage); + //byte[] serialisedResult = itemAsList.ToByteArray(); + //var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); + //m_Producer.Send(bytesMessage); + + AmazonSQSClient client; + RegionEndpoint region = RegionEndpoint.GetBySystemName(_qParams.Region); + + if (!_qParams.UseIntegratedSecurity) + { + AWSCredentials credentials = null; + + if (!String.IsNullOrEmpty(_qParams.SessionToken)) + { + credentials = new SessionAWSCredentials(awsAccessKeyId: _qParams.AccessKey, awsSecretAccessKey: _qParams.SecretKey, _qParams.SessionToken); + } + else + { + credentials = new BasicAWSCredentials(accessKey: _qParams.AccessKey, secretKey: _qParams.SecretKey); + } + + + AWSCredentials aWSAssumeRoleCredentials = new AssumeRoleAWSCredentials(credentials, _qParams.RoleArn, ROLE_SESSION_NAME); + + client = new AmazonSQSClient(aWSAssumeRoleCredentials, region); + } + else + { + client = new AmazonSQSClient(region); + } + + var request = new SendMessageRequest() + { + MessageBody = JsonConvert.SerializeObject(itemAsList), + QueueUrl = _qParams.QueueUrl, + }; + + var awaiter = client.SendMessageAsync(request).GetAwaiter(); + var result = awaiter.GetResult(); return true; } diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs new file mode 100644 index 0000000..3d895eb --- /dev/null +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace NationalArchives.Taxonomy.Common.Domain.Queue +{ + public class AmazonSqsStagingQueueParams + { + public string QueueUrl { get; set; } + public bool UseIntegratedSecurity { get; set; } + public string Region {get; set;} + public string RoleArn { get; set; } + public string AccessKey { get; set; } + public string SecretKey { get; set; } + public string SessionToken { get; set; } + public int MaxSize { get; set; } + public int WorkerCount { get; set; } = 1; + public int MaxErrors { get; set; } = 1; + public int BatchSize { get; set; } + public bool EnableVerboseLogging { get; set; } + public bool PostUpdates { get; set; } + } +} diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs index 515c2be..662a11c 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs @@ -12,11 +12,15 @@ using System.Collections.ObjectModel; using System.Threading; using System.Threading.Tasks; +using Amazon.Runtime; +using Amazon; namespace NationalArchives.Taxonomy.Common.Domain.Queue { public class AmazonSqsUpdateSender : IUpdateStagingQueueSender, IDisposable { + private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update_FULL_REINDEX"; + private readonly ConnectionFactory _activeMqConnectionFactory; private readonly IConnection _activeMqConnection; private readonly ISession _activeMqSession; @@ -38,37 +42,38 @@ public class AmazonSqsUpdateSender : IUpdateStagingQueueSender, IDisposable Action _updateQueueProgress; - private ILogger _logger; + private readonly ILogger _logger; private bool _verboseLoggingEnabled; private ThreadLocal _workerResultCount = new ThreadLocal(); private ThreadLocal _workerMessageCount = new ThreadLocal(); + private readonly AmazonSqsStagingQueueParams _qParams; + private bool _initialised; - public AmazonSqsUpdateSender(UpdateStagingQueueParams qParams, ILogger logger) + public AmazonSqsUpdateSender(AmazonSqsStagingQueueParams qParams, ILogger logger) { - if (qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) - { - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); - } - try { - _activeMqConnectionFactory = new ConnectionFactory(qParams.Uri); - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - { - _activeMqConnection = _activeMqConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - } - else + if (qParams == null || String.IsNullOrEmpty(qParams.QueueUrl)) { - _activeMqConnection = _activeMqConnectionFactory.CreateConnection(); + throw new TaxonomyException(TaxonomyErrorType.SQS_EXCEPTION, "Invalid or missing queue parameters for Amazon SQS"); } - _activeMqConnection.Start(); - _activeMqSession = _activeMqConnection.CreateSession(AcknowledgementMode.AutoAcknowledge); - _activeMqdestination = _activeMqSession.GetQueue(qParams.QueueName); - _activeMqProducer = _activeMqSession.CreateProducer(_activeMqdestination); - + // _activeMqConnectionFactory = new ConnectionFactory(qParams.Uri); + // if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + // { + // _activeMqConnection = _activeMqConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + // } + // else + // { + // _activeMqConnection = _activeMqConnectionFactory.CreateConnection(); + // } + // _activeMqConnection.Start(); + // _activeMqSession = _activeMqConnection.CreateSession(AcknowledgementMode.AutoAcknowledge); + // _activeMqdestination = _activeMqSession.GetQueue(qParams.QueueName); + // _activeMqProducer = _activeMqSession.CreateProducer(_activeMqdestination); + _qParams = qParams; _workerCount = Math.Max(qParams.WorkerCount, 1); _maxSendErrors = qParams.MaxErrors; _batchSize = Math.Max(qParams.BatchSize, 1); @@ -79,7 +84,7 @@ public AmazonSqsUpdateSender(UpdateStagingQueueParams qParams, ILogger 0) { - var client = new AmazonSQSClient(); - var request = new SendMessageRequest() + try { - //MessageBody = JsonSerializer.Serialize(currentBatch), - MessageBody = JsonConvert.SerializeObject(currentBatch), - QueueUrl = "https://sqs.ap-southeast-2.amazonaws.com/189107071895/youtube-demo" - }; - - var result = await client.SendMessageAsync(request); + AmazonSQSClient client; + RegionEndpoint region = RegionEndpoint.GetBySystemName(_qParams.Region); + + if (!_qParams.UseIntegratedSecurity) + { + AWSCredentials credentials = null; + + if (!String.IsNullOrEmpty(_qParams.SessionToken)) + { + credentials = new SessionAWSCredentials(awsAccessKeyId: _qParams.AccessKey, awsSecretAccessKey: _qParams.SecretKey, _qParams.SessionToken); + } + else + { + credentials = new BasicAWSCredentials(accessKey: _qParams.AccessKey, secretKey: _qParams.SecretKey); + } + + + AWSCredentials aWSAssumeRoleCredentials = new AssumeRoleAWSCredentials(credentials, _qParams.RoleArn, ROLE_SESSION_NAME); + + client = new AmazonSQSClient(aWSAssumeRoleCredentials, region); + } + else + { + client = new AmazonSQSClient(region); + } + + var request = new SendMessageRequest() + { + MessageBody = JsonConvert.SerializeObject(currentBatch), + QueueUrl = _qParams.QueueUrl, + }; + + var result = await client.SendMessageAsync(request); + Console.WriteLine(result); + } + catch (Exception ex) + { + throw; + } } } diff --git a/Taxonomy.Common/TaxonomyErrorType.cs b/Taxonomy.Common/TaxonomyErrorType.cs index a8b0d04..f668d63 100644 --- a/Taxonomy.Common/TaxonomyErrorType.cs +++ b/Taxonomy.Common/TaxonomyErrorType.cs @@ -40,6 +40,9 @@ public enum TaxonomyErrorType */ JMS_EXCEPTION, + // Amazon SQS + SQS_EXCEPTION, + /** * Document was not found in lucene Index */ From 39b4ad002daa405314ea48828c33199474f5fb8b Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Mon, 9 Dec 2024 16:18:48 +0000 Subject: [PATCH 14/22] Taxonomy CLI now using SQS instead of ActiveMQ/Amazon to post updates --- .../Domain/Queue/AmazonSqsDirectUpdateSender.cs | 3 --- TaxonomyCLI/Program.cs | 14 +++++++++++--- TaxonomyCLI/appsettings.json | 14 +++++++++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs index 1fb1804..a1144ce 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs @@ -92,9 +92,6 @@ public bool Enqueue(IaidWithCategories item, CancellationToken token) try { var itemAsList = new List() { item }; - //byte[] serialisedResult = itemAsList.ToByteArray(); - //var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); - //m_Producer.Send(bytesMessage); AmazonSQSClient client; RegionEndpoint region = RegionEndpoint.GetBySystemName(_qParams.Region); diff --git a/TaxonomyCLI/Program.cs b/TaxonomyCLI/Program.cs index cdc1783..efaaa3d 100644 --- a/TaxonomyCLI/Program.cs +++ b/TaxonomyCLI/Program.cs @@ -155,12 +155,20 @@ private static ServiceProvider ConfigureServices(IConfigurationRoot config, stri if (hasLiveUpdates) { //params for update staging queue. - services.AddSingleton(config.GetSection("UpdateStagingQueueParams").Get()); + //services.AddSingleton(config.GetSection("UpdateStagingQueueParams").Get()); + AmazonSqsStagingQueueParams awsSqsParams = config.GetSection("AmazonSqsParams").Get(); + services.AddSingleton(awsSqsParams); + + services.AddSingleton(typeof(ILogger), typeof(Logger)); services.AddSingleton((ctx) => { - UpdateStagingQueueParams qParams = ctx.GetRequiredService(); - return new ActiveMqDirectUpdateSender(qParams); + //UpdateStagingQueueParams qParams = ctx.GetRequiredService(); + //return new ActiveMqDirectUpdateSender(qParams); + + AmazonSqsStagingQueueParams qParams = ctx.GetRequiredService(); + var logger = ctx.GetRequiredService>(); + return new AmazonSqsDirectUpdateSender(qParams, logger); }); } diff --git a/TaxonomyCLI/appsettings.json b/TaxonomyCLI/appsettings.json index 1d123a3..6de105e 100644 --- a/TaxonomyCLI/appsettings.json +++ b/TaxonomyCLI/appsettings.json @@ -20,7 +20,19 @@ "SecretKey": "??" } }, - + "AmazonSqsParams": { + "QueueUrl": "*", + "UseIntegratedSecurity": "false", + "Region": "eu-west-2", + "RoleArn": "*", + "AccessKey": "*", + "SecretKey": "*", + "WorkerCount": "1", + "MaxErrors": "5", + "BatchSize": "1000", + "EnableVerboseLogging": "true", + "PostUpdates": "true" + }, "CategoryOpenSearchParams": { "Scheme": "https", "Host": "??", From 6b703710fc0db31d288b5e4eb8330800c665f80f Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Mon, 9 Dec 2024 16:44:11 +0000 Subject: [PATCH 15/22] Progress on Taxonomy database update migration to SQS --- .../appsettings.json | 13 ++++ .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 59 ++++++++++++++----- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index 29dbcd8..94f59cd 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -34,6 +34,19 @@ "SecretKey": "??" } }, + "AmazonSqsParams": { + "QueueUrl": "*", + "UseIntegratedSecurity": "false", + "Region": "eu-west-2", + "RoleArn": "*", + "AccessKey": "*", + "SecretKey": "*", + "WorkerCount": "1", + "MaxErrors": "5", + "BatchSize": "1000", + "EnableVerboseLogging": "true", + "PostUpdates": "true" + }, "UpdateStagingQueueParams": { "Uri": "*", "QueueName": "*", diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index c8e382a..df2e39b 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -1,16 +1,21 @@ -using Apache.NMS; +using Amazon.Runtime; +using Amazon.SQS.Model; +using Amazon.SQS; +using Apache.NMS; using Apache.NMS.ActiveMQ; using NationalArchives.Taxonomy.Common.BusinessObjects; using NationalArchives.Taxonomy.Common.Helpers; using Newtonsoft.Json; using System; using System.Collections.Generic; +using Amazon; namespace NationalArchives.Taxonomy.Common.Domain.Queue { public class AmazonSqsUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable { private const int FETCH_RETRY_COUNT = 5; + private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update"; private readonly ConnectionFactory m_ConnectionFactory; private readonly IConnection m_Connection; @@ -18,35 +23,61 @@ public class AmazonSqsUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable private readonly IDestination m_destination; private readonly IMessageConsumer m_Consumer; - public AmazonSqsUpdateReceiver(UpdateStagingQueueParams qParams) + private AmazonSQSClient _client; + + public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) { - if(qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) + if(qParams == null || String.IsNullOrEmpty(qParams.QueueUrl)) { - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); + throw new TaxonomyException(TaxonomyErrorType.SQS_EXCEPTION, "Invalid or missing queue parameters for Amazon SQS"); } try { - m_ConnectionFactory = new ConnectionFactory(qParams.Uri); - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + //m_ConnectionFactory = new ConnectionFactory(qParams.Uri); + //if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) + //{ + // m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + //} + //else + //{ + // m_Connection = m_ConnectionFactory.CreateConnection(); + //} + //m_Connection.Start(); + //m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); + //m_destination = m_Session.GetQueue(qParams.QueueName); + //m_Consumer = m_Session.CreateConsumer(m_destination); + + RegionEndpoint region = RegionEndpoint.GetBySystemName(qParams.Region); + + if (!qParams.UseIntegratedSecurity) { - m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); + AWSCredentials credentials = null; + + if (!String.IsNullOrEmpty(qParams.SessionToken)) + { + credentials = new SessionAWSCredentials(awsAccessKeyId: qParams.AccessKey, awsSecretAccessKey: qParams.SecretKey, qParams.SessionToken); + } + else + { + credentials = new BasicAWSCredentials(accessKey: qParams.AccessKey, secretKey: qParams.SecretKey); + } + + AWSCredentials aWSAssumeRoleCredentials = new AssumeRoleAWSCredentials(credentials, qParams.RoleArn, ROLE_SESSION_NAME); + + _client = new AmazonSQSClient(aWSAssumeRoleCredentials, region); } else - { - m_Connection = m_ConnectionFactory.CreateConnection(); + { + _client = new AmazonSQSClient(region); } - m_Connection.Start(); - m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - m_destination = m_Session.GetQueue(qParams.QueueName); - m_Consumer = m_Session.CreateConsumer(m_destination); } catch (Exception e) { Dispose(); - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, $"Error establishing a connection to ActiveMQ {qParams.QueueName}, at {qParams.Uri}", e); + throw new TaxonomyException(TaxonomyErrorType.SQS_EXCEPTION, $"Error establishing a connection to Amazon SQS {qParams.QueueUrl}.", e); } } From f342cbf240d3ac0a150b646509b290825552f7d4 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Mon, 9 Dec 2024 17:57:49 +0000 Subject: [PATCH 16/22] First working implementation of OpenSearch updating with SQS --- .../Program.cs | 7 +- .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 76 +++++++++++++------ 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index a41f297..9e3efdc 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -74,7 +74,10 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic IConfiguration config = context.Configuration; var openSearchUpdateParams = config.GetSection(nameof(OpenSearchUpdateParams)).Get(); - var stagingQueueParams = config.GetSection(nameof(UpdateStagingQueueParams)).Get(); + + //var stagingQueueParams = config.GetSection(nameof(UpdateStagingQueueParams)).Get(); + var stagingQueueParams = config.GetSection("AmazonSqsParams").Get(); + var updateOpenSearchConnParams = config.GetSection(nameof(UpdateOpenSearchConnectionParameters)).Get(); services.AddSingleton(typeof(ILogger), typeof(Logger)); @@ -84,7 +87,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic //Staging queue for updates. Needs to be a singleton or we get multiple consumers! services.AddSingleton((ctx) => { - return new ActiveMqUpdateReceiver(stagingQueueParams); + return new AmazonSqsUpdateReceiver(stagingQueueParams); }); services.AddTransient((ctx) => diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index df2e39b..4bdbc38 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -9,6 +9,7 @@ using System; using System.Collections.Generic; using Amazon; +using Amazon.Runtime.Internal.Endpoints.StandardLibrary; namespace NationalArchives.Taxonomy.Common.Domain.Queue { @@ -23,6 +24,8 @@ public class AmazonSqsUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable private readonly IDestination m_destination; private readonly IMessageConsumer m_Consumer; + private readonly AmazonSqsStagingQueueParams _qParams; + private AmazonSQSClient _client; public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) @@ -33,6 +36,8 @@ public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) throw new TaxonomyException(TaxonomyErrorType.SQS_EXCEPTION, "Invalid or missing queue parameters for Amazon SQS"); } + _qParams = qParams; + try { //m_ConnectionFactory = new ConnectionFactory(qParams.Uri); @@ -123,34 +128,59 @@ public void Dispose() public List DeQueueNextListOfIaidsWithCategories() { - IMessage nextItem; - IBytesMessage nextBytesMessage = null; - int attempts = 0; - - do - { - nextItem = m_Consumer.ReceiveNoWait(); - if (nextItem != null) - { - nextBytesMessage = nextItem as IBytesMessage; + //IMessage nextItem; + //IBytesMessage nextBytesMessage = null; + //int attempts = 0; + + //do + //{ + // nextItem = m_Consumer.ReceiveNoWait(); + // if (nextItem != null) + // { + // nextBytesMessage = nextItem as IBytesMessage; - } - else - { - attempts++; - } - } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); - - - if (nextBytesMessage != null) + // } + // else + // { + // attempts++; + // } + //} while (nextItem == null && attempts <= FETCH_RETRY_COUNT); + + + //if (nextBytesMessage != null) + //{ + // byte[] bytes = nextBytesMessage.Content; + // List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); + // return nextBatchFromInterimQueue; + //} + //else + //{ + // return null; + //} + + //TODO - May be more performant to return multiple results i.e. set MaxNumberOfMessages = 10 and return List or enumeration + + // TODO + // Receive the message + // Process the message + // Delete the message + var requestParams = new ReceiveMessageRequest { - byte[] bytes = nextBytesMessage.Content; - List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); - return nextBatchFromInterimQueue; + QueueUrl = _qParams.QueueUrl, + MaxNumberOfMessages = 1, + WaitTimeSeconds = TimeSpan.FromSeconds(10).Seconds, + }; + + //TODO - May be more performant to return multiple results i.e. + ReceiveMessageResponse message = _client.ReceiveMessageAsync(requestParams).Result; + if (message.Messages.Count == 1) + { + List result = JsonConvert.DeserializeObject>(message.Messages[0].Body); + return result; } else { - return null; + throw new TaxonomyException("Unexpected message count"); } } } From 86f29ac50f54747ae93a60313900a037a784296f Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Tue, 10 Dec 2024 13:55:10 +0000 Subject: [PATCH 17/22] Various updates and fixes to SQS based staging --- .../Program.cs | 1 - .../Service/UpdateOpenSearchWindowsService.cs | 9 +- .../ActiveMq/CategoriseDocActiveMqConsumer.cs | 9 +- NationalArchives.Taxonomy.Batch/Program.cs | 8 +- .../Service/FullReindexService.cs | 13 +-- .../appsettings.json | 1 + .../Domain/Queue/ActiveMqUpdateReceiver.cs | 5 ++ .../Queue/AmazonSqsDirectUpdateSender.cs | 27 +----- .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 83 +++++++------------ .../Domain/Queue/AmazonSqsUpdateSender.cs | 47 ++++++----- .../Queue/IUpdateStagingQueueReceiver.cs | 2 + .../Service/Impl/UpdateOpenSearchService.cs | 37 ++++++--- .../Interface/IUpdateOpenSearchService.cs | 5 +- 13 files changed, 106 insertions(+), 141 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index 9e3efdc..447a578 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -83,7 +83,6 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton(typeof(ILogger), typeof(Logger)); services.AddSingleton(typeof(ILogger), typeof(Logger)); - //Staging queue for updates. Needs to be a singleton or we get multiple consumers! services.AddSingleton((ctx) => { diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs index d8a2375..ca6cbd7 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs @@ -47,11 +47,12 @@ protected override Task ExecuteAsync(CancellationToken stoppingToken) _hostApplicationLifetime.ApplicationStopping.Register(OnStopping); _hostApplicationLifetime.ApplicationStopped.Register(OnStopped); - Task updateTask = Task.Run(() => _updateOpenSearchService.Init()); + Task updateTask = _updateOpenSearchService.Init(); TaskAwaiter awaiter = updateTask.GetAwaiter(); awaiter.OnCompleted(() => OutputCompletion(updateTask)); + updateTask.Wait(); return Task.CompletedTask; } @@ -63,7 +64,11 @@ private void OutputCompletion(Task task) } else if (task.IsFaulted) { - _logger.LogError("The Open Search update service is stopping due to an exception."); + _logger.LogError(task.Exception, "The Open Search update service is stopping due to an exception."); + foreach(Exception ex in task.Exception.Flatten().InnerExceptions ) + { + _logger.LogError(ex, "Inner Exception"); + } } else { diff --git a/NationalArchives.Taxonomy.Batch/DailyUpdate.MesssageQueue/ActiveMq/CategoriseDocActiveMqConsumer.cs b/NationalArchives.Taxonomy.Batch/DailyUpdate.MesssageQueue/ActiveMq/CategoriseDocActiveMqConsumer.cs index 810f6b0..1c414a2 100644 --- a/NationalArchives.Taxonomy.Batch/DailyUpdate.MesssageQueue/ActiveMq/CategoriseDocActiveMqConsumer.cs +++ b/NationalArchives.Taxonomy.Batch/DailyUpdate.MesssageQueue/ActiveMq/CategoriseDocActiveMqConsumer.cs @@ -12,13 +12,6 @@ namespace NationalArchives.Taxonomy.Batch.DailyUpdate.MessageQueue internal sealed class CategoriseDocActiveMqConsumer : ActiveMqConsumerBase { private readonly ICategoriserService _categoriserService; - //TODO: Event to notify service when processing from queue complete. - - //public CategoriseDocActiveMqConsumer(ICategoriserService categoriserService, - // MessageQueueParams inputMsgQueueParams, ILogger logger) : base(inputMsgQueueParams.BrokerUri, inputMsgQueueParams.UpdateQueueName, logger) - //{ - // _categoriserService = categoriserService; - //} public CategoriseDocActiveMqConsumer(ICategoriserService categoriserService, MessageQueueParams inputMsgQueueParams, ILogger logger) : base(inputMsgQueueParams, inputMsgQueueParams.UpdateQueueName, logger) @@ -90,7 +83,6 @@ protected override void HandleTextMessage(string messageId, string message) } catch (Exception e) { - Debug.Print(e.Message); _logger.LogCritical($"Fatal Error: {e.Message}" ); Exception ie = e.InnerException; @@ -100,6 +92,7 @@ protected override void HandleTextMessage(string messageId, string message) ie = ie.InnerException; } while (ie != null); + _tcs.SetException(e); throw; } } diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index dd689d4..1099b21 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -116,11 +116,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton(typeof(ILogger), typeof(Logger)); services.AddSingleton(typeof(ILogger), typeof(Logger)); services.AddSingleton(typeof(ILogger), typeof(Logger)); - if (_operationMode == OperationMode.Full_Reindex) - { - services.AddSingleton(typeof(ILogger), typeof(Logger)); - } - + DiscoveryOpenSearchConnectionParameters discoveryOpenSearchConnParams = config.GetSection("DiscoveryOpenSearchParams").Get(); services.AddSingleton(categorisationParams); @@ -279,7 +275,7 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic { services.AddSingleton>((ctx) => { - UpdateStagingQueueParams qparams = ctx.GetRequiredService(); + var qparams = ctx.GetRequiredService(); return new FullReIndexIaidPcQueue(qparams.MaxSize); }); // => FullReindexService diff --git a/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs b/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs index 3132a85..2358a78 100644 --- a/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs +++ b/NationalArchives.Taxonomy.Batch/Service/FullReindexService.cs @@ -129,11 +129,6 @@ protected override Task ExecuteAsync(CancellationToken stoppingToken) _logger.LogError(msg); _logger.LogError(iaidProducerTask.Exception.Message); LogInnerExceptions(iaidProducerTask.Exception.InnerExceptions); - //foreach (Exception inner in iaidProducerTask.Exception.InnerExceptions) - //{ - // //_logger.LogError($"Message: { iaidProducerTask.Exception.Message}, stack trace: { iaidProducerTask.Exception.StackTrace}"); - // LogInnerExceptions() - //} StopApplication(); } else if (iaidProducerTask.IsCanceled) @@ -181,12 +176,8 @@ void LogInnerExceptions(IEnumerable innerExceptions) { string msg = "Error updating the results queue. Please check the logs for errors"; _StopMessage = msg; - _logger.LogError(msg); - LogInnerExceptions(resultsQueueUpdateTask.Exception.InnerExceptions); - //foreach (Exception inner in resultsQueueUpdateTask.Exception.InnerExceptions) - //{ - // _logger.LogError($"Message: { resultsQueueUpdateTask.Exception.Message}, stack trace: { resultsQueueUpdateTask.Exception.StackTrace}"); - //} + _logger.LogError(resultsQueueUpdateTask.Exception, msg); + LogInnerExceptions(resultsQueueUpdateTask.Exception.Flatten().InnerExceptions); StopApplication(); } diff --git a/NationalArchives.Taxonomy.Batch/appsettings.json b/NationalArchives.Taxonomy.Batch/appsettings.json index 116c2b2..580c48d 100644 --- a/NationalArchives.Taxonomy.Batch/appsettings.json +++ b/NationalArchives.Taxonomy.Batch/appsettings.json @@ -39,6 +39,7 @@ "AccessKey": "*", "SecretKey": "*", "WorkerCount": "1", + "MaxSize": "36000000", "MaxErrors": "5", "BatchSize": "1000", "EnableVerboseLogging": "true", diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs index 63c8bf2..cee47a3 100644 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs @@ -122,5 +122,10 @@ public List DeQueueNextListOfIaidsWithCategories() return null; } } + + public IAsyncEnumerable> IterateResults() + { + throw new NotImplementedException(); + } } } diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs index a1144ce..cd12d9d 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs @@ -28,7 +28,7 @@ public class AmazonSqsDirectUpdateSender : IUpdateStagingQueueSender, IDisposabl private readonly AmazonSqsStagingQueueParams _qParams; private readonly ILogger _logger; - private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update_FULL_DAILY_UPDATE"; + private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update"; /// /// Implementation of IUpdateStagingQueueSender where updates are sent @@ -44,22 +44,6 @@ public AmazonSqsDirectUpdateSender(AmazonSqsStagingQueueParams qParams, ILogger< _qParams = qParams; _logger = logger; - - //m_ConnectionFactory = new ConnectionFactory(qParams.Uri); - - //if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - //{ - // m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - //} - //else - //{ - // m_Connection = m_ConnectionFactory.CreateConnection(); - //} - - //m_Connection.Start(); - //m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - //m_destination = m_Session.GetQueue(qParams.QueueName); - //m_Producer = m_Session.CreateProducer(m_destination); } @@ -72,14 +56,9 @@ public Task Init(CancellationToken token, Action updateQueueProg /// /// /// - /// Returns true for compatibility with other possible queue implemnenations on the same interface + /// Returns true for compatibility with other possible queue implemenations on the same interface public bool Enqueue(IaidWithCategories item, CancellationToken token) { - //if(m_Producer == null) - //{ - // return false; - //} - if(token.IsCancellationRequested) { return false; @@ -109,7 +88,6 @@ public bool Enqueue(IaidWithCategories item, CancellationToken token) credentials = new BasicAWSCredentials(accessKey: _qParams.AccessKey, secretKey: _qParams.SecretKey); } - AWSCredentials aWSAssumeRoleCredentials = new AssumeRoleAWSCredentials(credentials, _qParams.RoleArn, ROLE_SESSION_NAME); client = new AmazonSQSClient(aWSAssumeRoleCredentials, region); @@ -155,6 +133,5 @@ public void CompleteAdding() { _addingCompleted = true; } - } } diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index 4bdbc38..b2147ac 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -40,20 +40,6 @@ public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) try { - //m_ConnectionFactory = new ConnectionFactory(qParams.Uri); - //if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - //{ - // m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - //} - //else - //{ - // m_Connection = m_ConnectionFactory.CreateConnection(); - //} - //m_Connection.Start(); - //m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - //m_destination = m_Session.GetQueue(qParams.QueueName); - //m_Consumer = m_Session.CreateConsumer(m_destination); - RegionEndpoint region = RegionEndpoint.GetBySystemName(qParams.Region); if (!qParams.UseIntegratedSecurity) @@ -116,9 +102,6 @@ public IaidWithCategories DeQueueNextIaidWithCategories() } } - - - public void Dispose() { m_Consumer?.Dispose(); @@ -128,42 +111,6 @@ public void Dispose() public List DeQueueNextListOfIaidsWithCategories() { - //IMessage nextItem; - //IBytesMessage nextBytesMessage = null; - //int attempts = 0; - - //do - //{ - // nextItem = m_Consumer.ReceiveNoWait(); - // if (nextItem != null) - // { - // nextBytesMessage = nextItem as IBytesMessage; - - // } - // else - // { - // attempts++; - // } - //} while (nextItem == null && attempts <= FETCH_RETRY_COUNT); - - - //if (nextBytesMessage != null) - //{ - // byte[] bytes = nextBytesMessage.Content; - // List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); - // return nextBatchFromInterimQueue; - //} - //else - //{ - // return null; - //} - - //TODO - May be more performant to return multiple results i.e. set MaxNumberOfMessages = 10 and return List or enumeration - - // TODO - // Receive the message - // Process the message - // Delete the message var requestParams = new ReceiveMessageRequest { QueueUrl = _qParams.QueueUrl, @@ -171,7 +118,6 @@ public List DeQueueNextListOfIaidsWithCategories() WaitTimeSeconds = TimeSpan.FromSeconds(10).Seconds, }; - //TODO - May be more performant to return multiple results i.e. ReceiveMessageResponse message = _client.ReceiveMessageAsync(requestParams).Result; if (message.Messages.Count == 1) { @@ -183,5 +129,34 @@ public List DeQueueNextListOfIaidsWithCategories() throw new TaxonomyException("Unexpected message count"); } } + + public async IAsyncEnumerable> IterateResults() + { + var requestParams = new ReceiveMessageRequest + { + QueueUrl = _qParams.QueueUrl, + MaxNumberOfMessages = 10, + WaitTimeSeconds = TimeSpan.FromSeconds(10).Seconds, + }; + + ReceiveMessageResponse message = null; + + try + { + message = await _client.ReceiveMessageAsync(requestParams); + } + catch (Exception ex) + { + throw; + } + + foreach (Message msg in message.Messages) + { + List result = JsonConvert.DeserializeObject>(msg.Body); + await _client.DeleteMessageAsync(_qParams.QueueUrl, msg.ReceiptHandle); + yield return result; + } + Console.WriteLine("Done"); + } } } diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs index 662a11c..c4de15b 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs @@ -60,19 +60,7 @@ public AmazonSqsUpdateSender(AmazonSqsStagingQueueParams qParams, ILogger Init(CancellationToken token, Action updateQueueProgress) + public async Task Init(CancellationToken token, Action updateQueueProgress) { if(_initialised) { - return null; + return false; } _token = token; @@ -111,12 +99,23 @@ public Task Init(CancellationToken token, Action updateQueueProg tasks.Add(task); } + var firstToComplete = await Task.WhenAny(tasks); + await firstToComplete; + if (_tcs.Task.IsFaulted) + { + throw _tcs.Task.Exception; + } + Task.WaitAll(tasks.ToArray()); _tcs.SetResult(_sendErrors.Count == 0 ? true : false); } catch (Exception ex) { - _tcs.TrySetException(ex); + if (!_tcs.Task.IsFaulted) + { + _tcs.SetException(ex); + } + return await _tcs.Task; } finally { @@ -124,7 +123,7 @@ public Task Init(CancellationToken token, Action updateQueueProg } _initialised = true; - return _tcs.Task; + return await _tcs.Task; } private void PrintUpdate(object data) @@ -244,11 +243,21 @@ private async Task Consume1() QueueUrl = _qParams.QueueUrl, }; - var result = await client.SendMessageAsync(request); - Console.WriteLine(result); + //try + //{ + + SendMessageResponse result = client.SendMessageAsync(request).Result; + //} + //catch (Exception) + //{ + + // throw; + //} + //Console.WriteLine(result); } catch (Exception ex) { + _tcs.SetException(ex); throw; } } diff --git a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs index 781b6b6..8a1ec10 100644 --- a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs @@ -11,5 +11,7 @@ public interface IUpdateStagingQueueReceiver IaidWithCategories DeQueueNextIaidWithCategories(); List DeQueueNextListOfIaidsWithCategories(); + + IAsyncEnumerable> IterateResults(); } } diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 01631bd..52c9d5e 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -9,6 +9,7 @@ using System.Linq; using System.Text; using System.Threading; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Service.Impl { @@ -46,11 +47,11 @@ public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSea _logger = logger; } - public void Init() + public async Task Init() { try { - StartProcessing(); + await StartProcessing(); } catch (Exception e) { @@ -61,6 +62,7 @@ public void Init() _logger.LogError(sb.ToString()); throw; } + finally { Console.WriteLine("Done!"); } } public void Flush() @@ -94,7 +96,7 @@ public void Flush() } } - private void StartProcessing() + private async Task StartProcessing() { int nullCounter = 0; int minutesSinceLastNoUpdatesLogMessage = 0; @@ -103,22 +105,30 @@ private void StartProcessing() { while (!IsProcessingComplete) { - List nextBatchFromInterimUpdateQueue = _interimUpdateQueue.DeQueueNextListOfIaidsWithCategories(); - if (nextBatchFromInterimUpdateQueue != null) + //List nextBatchFromInterimUpdateQueue = _interimUpdateQueue.DeQueueNextListOfIaidsWithCategories(); + var enumerator = _interimUpdateQueue.IterateResults().GetAsyncEnumerator(); + + while (await enumerator.MoveNextAsync()) { - foreach (var categorisationResultItem in nextBatchFromInterimUpdateQueue) + List nextBatchOfResults = enumerator.Current; + + if (nextBatchOfResults.Count > 0) { - if (categorisationResultItem != null) + foreach (IaidWithCategories categorisationResult in nextBatchOfResults) { - internalQueue.Enqueue(categorisationResultItem); + if (categorisationResult != null) + { + internalQueue.Enqueue(categorisationResult); + } + else + { + nullCounter++; + } } - } - } - else - { - nullCounter++; + } } + Thread.Sleep(_queueFetchWaitTime); TimeSpan timeSinceLastUpdate = DateTime.Now - _lastOpenSearchUpdate; @@ -215,5 +225,6 @@ private void UpdateCategoriesOnIAView(IaidWithCategories item) throw; } } + } } diff --git a/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs index 37a6161..4f217b5 100644 --- a/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs @@ -1,12 +1,13 @@ using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using System; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Service.Interface { public interface IUpdateOpenSearchService { - void Init(); + Task Init(); - void Flush(); + void Flush(); } } From b43367fd207196aefa02f62049027ef9fa50fc83 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Tue, 10 Dec 2024 14:37:46 +0000 Subject: [PATCH 18/22] Opensearch update now async throughout --- .../appsettings.json | 2 +- .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 1 - .../IOpenSearchIAViewUpdateRepository.cs | 5 +++-- .../OpenSearchIAViewUpdateRepository.cs | 9 +++++---- .../Service/Impl/UpdateOpenSearchService.cs | 19 +++++++++---------- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch/appsettings.json b/NationalArchives.Taxonomy.Batch/appsettings.json index 580c48d..557f874 100644 --- a/NationalArchives.Taxonomy.Batch/appsettings.json +++ b/NationalArchives.Taxonomy.Batch/appsettings.json @@ -92,5 +92,5 @@ "CollectionName": "categories" }, "CategorySource": "Mongo", - "OperationMode": "Daily_Update" // "Daily_Update" or "Full_Reindex" + "OperationMode": "Full_Reindex" // "Daily_Update" or "Full_Reindex" } \ No newline at end of file diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index b2147ac..1d625e9 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -156,7 +156,6 @@ public async IAsyncEnumerable> IterateResults() await _client.DeleteMessageAsync(_qParams.QueueUrl, msg.ReceiptHandle); yield return result; } - Console.WriteLine("Done"); } } } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs index dba5e80..f079e3c 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/IOpenSearchIAViewUpdateRepository.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Text; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { @@ -9,8 +10,8 @@ public interface IOpenSearchIAViewUpdateRepository { IaidWithCategories GetByDocReference(string docReference); - void Save(IaidWithCategories iaidWithCategories); + Task Save(IaidWithCategories iaidWithCategories); - void SaveAll(IEnumerable iaidsWithCategories); + Task SaveAll(IEnumerable iaidsWithCategories); } } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs index 249134a..81adbc8 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { @@ -28,7 +29,7 @@ public IaidWithCategories GetByDocReference(string docReference) throw new NotImplementedException(); } - public void Save(IaidWithCategories iaidWithCategories) + public async Task Save(IaidWithCategories iaidWithCategories) { if (iaidWithCategories == null) { @@ -36,7 +37,7 @@ public void Save(IaidWithCategories iaidWithCategories) } var update = new { TAXONOMY_ID = iaidWithCategories.CategoryIds }; - var response = _openSearchClient.Update(iaidWithCategories.Iaid, u => u.Doc(update).DocAsUpsert()); + var response = await _openSearchClient.UpdateAsync(iaidWithCategories.Iaid, u => u.Doc(update).DocAsUpsert()); if(!response.IsValid) { string errorInfo = GetOpenSearchErrorInfo(response); @@ -45,7 +46,7 @@ public void Save(IaidWithCategories iaidWithCategories) } } - public void SaveAll(IEnumerable iaidsWithCategories) + public async Task SaveAll(IEnumerable iaidsWithCategories) { if(iaidsWithCategories == null) { @@ -61,7 +62,7 @@ public void SaveAll(IEnumerable iaidsWithCategories) } //TODO: Async? - var response = _openSearchClient.BulkAsync(descriptor).Result; + var response = await _openSearchClient.BulkAsync(descriptor); if (!response.IsValid) { string errorInfo = GetOpenSearchErrorInfo(response); diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 52c9d5e..750eded 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -62,7 +62,6 @@ public async Task Init() _logger.LogError(sb.ToString()); throw; } - finally { Console.WriteLine("Done!"); } } public void Flush() @@ -158,11 +157,11 @@ private async Task StartProcessing() if (nullCounter >= NULL_COUNTER_THRESHOLD) { IsProcessingComplete = true; - SubmitUpdatesToOpenSearchDatabase(); + await SubmitUpdatesToOpenSearchDatabase(); _logger.LogInformation("No more categorisation results found on update queue. Open Search Update service will now finish processing."); } - void SubmitUpdatesToOpenSearchDatabase() + async Task SubmitUpdatesToOpenSearchDatabase() { if (_batchSize == 1 || internalQueue.Count == 1) { @@ -171,7 +170,7 @@ void SubmitUpdatesToOpenSearchDatabase() else { var items = internalQueue.DequeueChunk(_batchSize).ToList(); - BulkUpdateCategoriesOnIAViews(items); + await BulkUpdateCategoriesOnIAViews(items); } } } @@ -182,7 +181,7 @@ void SubmitUpdatesToOpenSearchDatabase() } } - private void BulkUpdateCategoriesOnIAViews(IList listOfIAViewUpdatesToProcess) + private async Task BulkUpdateCategoriesOnIAViews(IList listOfIAViewUpdatesToProcess) { if(listOfIAViewUpdatesToProcess.Count == 0) @@ -193,7 +192,7 @@ private void BulkUpdateCategoriesOnIAViews(IList listOfIAVie try { _logger.LogInformation($"Submitting bulk update of {listOfIAViewUpdatesToProcess.Count} items to Open Search: "); - _targetOpenSearchRepository.SaveAll(listOfIAViewUpdatesToProcess); + await _targetOpenSearchRepository.SaveAll(listOfIAViewUpdatesToProcess); foreach (var item in listOfIAViewUpdatesToProcess) { @@ -205,22 +204,22 @@ private void BulkUpdateCategoriesOnIAViews(IList listOfIAVie _totalInfoAssetsUPdated += totalForThisBulkUpdateOperation; _logger.LogInformation($" Category data for {_totalInfoAssetsUPdated} assets has now been added or updated in Open Search."); } - catch (Exception e) + catch (Exception ex) { throw; } } - private void UpdateCategoriesOnIAView(IaidWithCategories item) + private async Task UpdateCategoriesOnIAView(IaidWithCategories item) { try { _logger.LogInformation("Submitting single Asset update to Open Search: " + item.ToString()); - _targetOpenSearchRepository.Save(item); + await _targetOpenSearchRepository.Save(item); _logger.LogInformation($"Completed single Asset in Open Search: {item.ToString()}." ); _totalInfoAssetsUPdated++; } - catch (Exception) + catch (Exception ex) { throw; } From cc359a789836caef49c5f071e5a46c3db962fbc9 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Tue, 10 Dec 2024 15:02:05 +0000 Subject: [PATCH 19/22] Removed redundant code. --- .../Queue/ActiveMqDirectUpdateSender.cs | 114 -------- .../Domain/Queue/ActiveMqUpdateReceiver.cs | 131 --------- .../Domain/Queue/ActiveMqUpdateSender.cs | 271 ------------------ .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 65 +---- .../Queue/IUpdateStagingQueueReceiver.cs | 7 - 5 files changed, 7 insertions(+), 581 deletions(-) delete mode 100644 Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs delete mode 100644 Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs delete mode 100644 Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs deleted file mode 100644 index 394b90f..0000000 --- a/Taxonomy.Common/Domain/Queue/ActiveMqDirectUpdateSender.cs +++ /dev/null @@ -1,114 +0,0 @@ -using Apache.NMS; -using Apache.NMS.ActiveMQ; -using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; -using System; -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; - -namespace NationalArchives.Taxonomy.Common.Domain.Queue -{ - public class ActiveMqDirectUpdateSender : IUpdateStagingQueueSender, IDisposable - { - private readonly ConnectionFactory m_ConnectionFactory; - private readonly IConnection m_Connection; - private readonly ISession m_Session; - private readonly IDestination m_destination; - private readonly IMessageProducer m_Producer; - - private bool _addingCompleted; - - /// - /// Implementation of IUpdateStagingQueueSender where updates are sent - /// directly to an ActiveMQ instance. - /// - /// - public ActiveMqDirectUpdateSender(UpdateStagingQueueParams qParams) - { - if(!qParams.PostUpdates) - { - return; - } - - m_ConnectionFactory = new ConnectionFactory(qParams.Uri); - - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - { - m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - } - else - { - m_Connection = m_ConnectionFactory.CreateConnection(); - } - - m_Connection.Start(); - m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - m_destination = m_Session.GetQueue(qParams.QueueName); - m_Producer = m_Session.CreateProducer(m_destination); - } - - - public Task Init(CancellationToken token, Action updateQueueProgress) - { - throw new NotImplementedException(); - } - - /// - /// - /// - /// - /// Returns true for compatibility with other possible queue implemnenations on the same interface - public bool Enqueue(IaidWithCategories item, CancellationToken token) - { - if(m_Producer == null) - { - return false; - } - - if(token.IsCancellationRequested) - { - return false; - } - - if (item == null) - { - throw new TaxonomyException("No item supplied for interim queue update request!"); - } - try - { - var itemAsList = new List() { item }; - byte[] serialisedResult = itemAsList.ToByteArray(); - var bytesMessage = m_Producer.CreateBytesMessage(serialisedResult); - m_Producer.Send(bytesMessage); - - return true; - } - catch (Exception e) - { - throw; - } - } - - public bool IsAddingCompleted - { - get => _addingCompleted; - } - - public IReadOnlyCollection QueueUpdateErrors => throw new NotImplementedException(); - - public void Dispose() - { - m_Producer?.Dispose(); - m_Session?.Dispose(); - m_Connection?.Dispose(); - } - - - public void CompleteAdding() - { - _addingCompleted = true; - } - - } -} diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs deleted file mode 100644 index cee47a3..0000000 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateReceiver.cs +++ /dev/null @@ -1,131 +0,0 @@ -using Apache.NMS; -using Apache.NMS.ActiveMQ; -using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; -using Newtonsoft.Json; -using System; -using System.Collections.Generic; - -namespace NationalArchives.Taxonomy.Common.Domain.Queue -{ - public class ActiveMqUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable - { - private const int FETCH_RETRY_COUNT = 5; - - private readonly ConnectionFactory m_ConnectionFactory; - private readonly IConnection m_Connection; - private readonly ISession m_Session; - private readonly IDestination m_destination; - private readonly IMessageConsumer m_Consumer; - - public ActiveMqUpdateReceiver(UpdateStagingQueueParams qParams) - { - - if(qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) - { - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); - } - - try - { - m_ConnectionFactory = new ConnectionFactory(qParams.Uri); - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - { - m_Connection = m_ConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - } - else - { - m_Connection = m_ConnectionFactory.CreateConnection(); - } - m_Connection.Start(); - m_Session = m_Connection.CreateSession(AcknowledgementMode.AutoAcknowledge); - m_destination = m_Session.GetQueue(qParams.QueueName); - m_Consumer = m_Session.CreateConsumer(m_destination); - - } - catch (Exception e) - { - Dispose(); - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, $"Error establishing a connection to ActiveMQ {qParams.QueueName}, at {qParams.Uri}", e); - } - } - - public IList DequeueIaidsWithCategories(int numberToFetch) - { - throw new NotImplementedException(); - } - - public IaidWithCategories DeQueueNextIaidWithCategories() - { - - IMessage nextItem; - int attempts = 0; - - do - { - nextItem = m_Consumer.ReceiveNoWait(); - attempts++; - } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); - - ITextMessage msg = nextItem as ITextMessage; - - if(msg != null) - { - IaidWithCategories iaidWithCategories = JsonConvert.DeserializeObject(msg.Text); - return iaidWithCategories; - } - else - { - return null; - } - } - - - - - public void Dispose() - { - m_Consumer?.Dispose(); - m_Session?.Dispose(); - m_Connection?.Dispose(); - } - - public List DeQueueNextListOfIaidsWithCategories() - { - IMessage nextItem; - IBytesMessage nextBytesMessage = null; - int attempts = 0; - - do - { - nextItem = m_Consumer.ReceiveNoWait(); - if (nextItem != null) - { - nextBytesMessage = nextItem as IBytesMessage; - - } - else - { - attempts++; - } - } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); - - - if (nextBytesMessage != null) - { - byte[] bytes = nextBytesMessage.Content; - List nextBatchFromInterimQueue = IaidWithCategoriesSerialiser.IdxMessageToListOfIaidsWithCategories(bytes); - return nextBatchFromInterimQueue; - } - else - { - return null; - } - } - - public IAsyncEnumerable> IterateResults() - { - throw new NotImplementedException(); - } - } -} diff --git a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs b/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs deleted file mode 100644 index c1275b9..0000000 --- a/Taxonomy.Common/Domain/Queue/ActiveMqUpdateSender.cs +++ /dev/null @@ -1,271 +0,0 @@ -using Apache.NMS; -using Apache.NMS.ActiveMQ; -using Microsoft.Extensions.Logging; -using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.Collections.ObjectModel; -using System.Threading; -using System.Threading.Tasks; - -namespace NationalArchives.Taxonomy.Common.Domain.Queue -{ - public class ActiveMqUpdateSender : IUpdateStagingQueueSender, IDisposable - { - private readonly ConnectionFactory _activeMqConnectionFactory; - private readonly IConnection _activeMqConnection; - private readonly ISession _activeMqSession; - private readonly IDestination _activeMqdestination; - private readonly IMessageProducer _activeMqProducer; - - private BlockingCollection _blockingCollection = new BlockingCollection(); - private CancellationToken _token = default; - - private readonly int _workerCount; - private readonly int _batchSize; - - private readonly int _maxSendErrors; - private List _sendErrors = new List(); - - private TaskCompletionSource _tcs; - - private volatile int _resultsSent; - - Action _updateQueueProgress; - - private ILogger _logger; - private bool _verboseLoggingEnabled; - - private ThreadLocal _workerResultCount = new ThreadLocal(); - private ThreadLocal _workerMessageCount = new ThreadLocal(); - - private bool _initialised; - - public ActiveMqUpdateSender(UpdateStagingQueueParams qParams, ILogger logger) - { - if (qParams == null || String.IsNullOrEmpty(qParams.QueueName) || String.IsNullOrEmpty(qParams.Uri)) - { - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "Invalid or missing queue parameters for Active MQ"); - } - - try - { - _activeMqConnectionFactory = new ConnectionFactory(qParams.Uri); - if (!String.IsNullOrWhiteSpace(qParams.UserName) && !String.IsNullOrWhiteSpace(qParams.Password)) - { - _activeMqConnection = _activeMqConnectionFactory.CreateConnection(qParams.UserName, qParams.Password); - } - else - { - _activeMqConnection = _activeMqConnectionFactory.CreateConnection(); - } - _activeMqConnection.Start(); - _activeMqSession = _activeMqConnection.CreateSession(AcknowledgementMode.AutoAcknowledge); - _activeMqdestination = _activeMqSession.GetQueue(qParams.QueueName); - _activeMqProducer = _activeMqSession.CreateProducer(_activeMqdestination); - - _workerCount = Math.Max(qParams.WorkerCount, 1); - _maxSendErrors = qParams.MaxErrors; - _batchSize = Math.Max(qParams.BatchSize, 1); - - _logger = logger; - _verboseLoggingEnabled = qParams.EnableVerboseLogging; - } - catch (Exception e) - { - Dispose(); - throw new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, $"Error establishing a connection to ActiveMQ {qParams.QueueName}, at {qParams.Uri}", e); - } - } - - public Task Init(CancellationToken token, Action updateQueueProgress) - { - if(_initialised) - { - return null; - } - - _token = token; - _updateQueueProgress = updateQueueProgress; - _tcs = new TaskCompletionSource(); - - Timer notificationTimer = new Timer(PrintUpdate, null, 60000, 60000) ; - - var tasks = new List(); - - try - { - for (int i = 0; i < _workerCount; i++) - { - Task task = Task.Factory.StartNew(Consume1); - tasks.Add(task); - } - - Task.WaitAll(tasks.ToArray()); - _tcs.SetResult(_sendErrors.Count == 0 ? true : false); - } - catch (Exception ex) - { - _tcs.TrySetException(ex); - } - finally - { - notificationTimer?.Dispose(); - } - - _initialised = true; - return _tcs.Task; - } - - private void PrintUpdate(object data) - { - if (_resultsSent > 0) - { - _updateQueueProgress(_resultsSent, _blockingCollection.Count); - } - } - - public void CompleteAdding() - { - try - { - _blockingCollection.CompleteAdding(); - } - catch (ObjectDisposedException) - { - } - } - - public bool IsAddingCompleted - { - get => _blockingCollection.IsAddingCompleted; - } - - public bool Enqueue(IaidWithCategories item, CancellationToken token = default(CancellationToken)) - { - - - if (item == null || token.IsCancellationRequested) - { - return false; - } - - try - { - _blockingCollection.Add(item); - return true; - } - catch (Exception ex) - { - _sendErrors.Add($"Error adding item to internal queue: {item.ToString()}, {ex.Message}"); - return false; - } - } - - public IReadOnlyCollection QueueUpdateErrors - { - get => new ReadOnlyCollection(_sendErrors); - } - - - - private void Consume1() - { - - while (!IsComplete() && !_token.IsCancellationRequested) - { - if (_sendErrors.Count >= _maxSendErrors) - { - if (!_tcs.Task.IsFaulted) //Only one worker should set this as calling repeatedly causes an exception - { - _tcs.TrySetException(new TaxonomyException(TaxonomyErrorType.JMS_EXCEPTION, "The Active MQ update error count has been exceeded.")); - } - CompleteAdding(); - break; - } - - var currentBatch = new List(_batchSize); - - for (int i = 0; i < _batchSize && (!IsComplete() && !_token.IsCancellationRequested); i++) - { - IaidWithCategories nextResult; - - bool gotResult = _blockingCollection.TryTake(out nextResult); - - if(gotResult) - { - currentBatch.Add(nextResult); - } - } - - if (currentBatch.Count > 0) - { - byte[] serialisedResults = currentBatch.ToByteArray(); - - try - { - var bytesMessage = _activeMqProducer.CreateBytesMessage(serialisedResults); - _activeMqProducer.Send(bytesMessage); - _workerMessageCount.Value++; - _workerResultCount.Value += currentBatch.Count; - _resultsSent += currentBatch.Count; - if (_verboseLoggingEnabled) - { - _logger.LogInformation($"Forwarded a message with {currentBatch.Count} categoriation results to the external ActiveMQ update queue. This worker [thread ID {Thread.CurrentThread.ManagedThreadId}] has now forwarded {_workerMessageCount.Value} messages containing {_workerResultCount.Value} results."); - } - } - catch (Exception ex) - { - _sendErrors.Add($"Error updating the queue for {String.Join(";", currentBatch)}. Details: {ex.Message}"); - } - } - } - - if(_token.IsCancellationRequested) - { - _logger.LogInformation($"Queue update worker [{Thread.CurrentThread.ManagedThreadId}] terminating following a cancellation request."); - _tcs.TrySetCanceled(); - CompleteAdding(); - } - else - { - _logger.LogInformation($"Queue update worker with thread ID [{Thread.CurrentThread.ManagedThreadId}] is finishing as there are no more results on the internal queue. This worker forwarded {_workerMessageCount.Value} messages containing {_workerResultCount.Value} results."); - } - } - - public void Dispose() - { - try - { - if (_blockingCollection.IsAddingCompleted) - { - CompleteAdding(); - } - } - catch (ObjectDisposedException) - { - } - - _blockingCollection.Dispose(); - _activeMqProducer?.Dispose(); - _activeMqSession?.Dispose(); - _activeMqConnection?.Dispose(); - - } - - private bool IsComplete() - { - try - { - bool isComplete = _blockingCollection.IsCompleted; - return isComplete; - } - catch (Exception) - { - return true; - } - } - } -} diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index 1d625e9..a6cec4d 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -72,64 +72,6 @@ public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) } } - public IList DequeueIaidsWithCategories(int numberToFetch) - { - throw new NotImplementedException(); - } - - public IaidWithCategories DeQueueNextIaidWithCategories() - { - - IMessage nextItem; - int attempts = 0; - - do - { - nextItem = m_Consumer.ReceiveNoWait(); - attempts++; - } while (nextItem == null && attempts <= FETCH_RETRY_COUNT); - - ITextMessage msg = nextItem as ITextMessage; - - if(msg != null) - { - IaidWithCategories iaidWithCategories = JsonConvert.DeserializeObject(msg.Text); - return iaidWithCategories; - } - else - { - return null; - } - } - - public void Dispose() - { - m_Consumer?.Dispose(); - m_Session?.Dispose(); - m_Connection?.Dispose(); - } - - public List DeQueueNextListOfIaidsWithCategories() - { - var requestParams = new ReceiveMessageRequest - { - QueueUrl = _qParams.QueueUrl, - MaxNumberOfMessages = 1, - WaitTimeSeconds = TimeSpan.FromSeconds(10).Seconds, - }; - - ReceiveMessageResponse message = _client.ReceiveMessageAsync(requestParams).Result; - if (message.Messages.Count == 1) - { - List result = JsonConvert.DeserializeObject>(message.Messages[0].Body); - return result; - } - else - { - throw new TaxonomyException("Unexpected message count"); - } - } - public async IAsyncEnumerable> IterateResults() { var requestParams = new ReceiveMessageRequest @@ -157,5 +99,12 @@ public async IAsyncEnumerable> IterateResults() yield return result; } } + + public void Dispose() + { + m_Consumer?.Dispose(); + m_Session?.Dispose(); + m_Connection?.Dispose(); + } } } diff --git a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs index 8a1ec10..3772a8f 100644 --- a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs @@ -5,13 +5,6 @@ namespace NationalArchives.Taxonomy.Common.Domain.Queue { public interface IUpdateStagingQueueReceiver { - - IList DequeueIaidsWithCategories(int numberToFetch); - - IaidWithCategories DeQueueNextIaidWithCategories(); - - List DeQueueNextListOfIaidsWithCategories(); - IAsyncEnumerable> IterateResults(); } } From 2a48c15c31fa675ae484d85482b91de710abdd80 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Wed, 11 Dec 2024 17:59:53 +0000 Subject: [PATCH 20/22] Fixes and improvmenets to Taxonomy update using SQS. In particular, I had to replace IAsyncEnumerable af=gainst SQS with simply fetching batches at a time, as the enumerator stopped returning results shortly into the process. --- .../OpenSearchUpdateParams.cs | 6 +- .../Program.cs | 8 +- .../Service/UpdateOpenSearchWindowsService.cs | 4 +- .../appsettings.json | 3 +- .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 97 +++++++-- .../Queue/IUpdateStagingQueueReceiver.cs | 6 +- .../OpenSearchIAViewUpdateRepository.cs | 5 +- Taxonomy.Common/Helpers/QueueExtensions.cs | 12 +- .../Service/Impl/UpdateOpenSearchService.cs | 196 +++++++++++------- .../Interface/IUpdateOpenSearchService.cs | 2 +- 10 files changed, 226 insertions(+), 113 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs index a4a6f20..3f0e8e1 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs @@ -6,8 +6,10 @@ namespace NationalArchives.Taxonomy.Batch { internal sealed class OpenSearchUpdateParams { - public uint BulkUpdateBatchSize { get; set; } + public int BulkUpdateBatchSize { get; set; } - public uint QueueFetchSleepTime { get; set; } + public int QueueFetchSleepTime { get; set; } + + public int SearchDatabaseUpdateInterval { get; set; } } } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index 447a578..7b5c1ca 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -96,14 +96,16 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic services.AddSingleton((ctx) => { - uint bulkUpdateBatchSize = openSearchUpdateParams.BulkUpdateBatchSize; - uint queueFetchWaitTime = openSearchUpdateParams.QueueFetchSleepTime; + int bulkUpdateBatchSize = openSearchUpdateParams.BulkUpdateBatchSize; + int queueFetchWaitTime = openSearchUpdateParams.QueueFetchSleepTime; + int searchDatabaseUpdateInterval = openSearchUpdateParams.SearchDatabaseUpdateInterval; + Console.WriteLine($"Using a batch size of {bulkUpdateBatchSize} and a queue fetch interval of {queueFetchWaitTime} sceonds for Open Search bulk updates."); IUpdateStagingQueueReceiver interimQueue = ctx.GetRequiredService(); IOpenSearchIAViewUpdateRepository updateRepo = ctx.GetRequiredService(); ILogger logger = ctx.GetRequiredService>(); - return new UpdateOpenSearchService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime); + return new UpdateOpenSearchService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime, searchDatabaseUpdateInterval); }); services.AddHostedService(); diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs index ca6cbd7..e9417f4 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Service/UpdateOpenSearchWindowsService.cs @@ -27,7 +27,7 @@ public override Task StopAsync(CancellationToken cancellationToken) try { _logger.LogInformation(Properties.Resources.FlushRemaingUpdatesToOpenSearchMsg); - _updateOpenSearchService.Flush(); + _updateOpenSearchService.Flush().Wait(TimeSpan.FromMinutes(2)); _logger.LogInformation("Stopping the Open Search Update Windows Service."); base.StopAsync(cancellationToken); @@ -36,7 +36,7 @@ public override Task StopAsync(CancellationToken cancellationToken) } catch (Exception e) { - Console.WriteLine(e.Message); + _logger.LogError(e, "Exception occurre whilst attempting to stop the Taxonomy Update Service"); return Task.FromException(e); } } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index 94f59cd..ae66b25 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -19,7 +19,8 @@ }, "OpenSearchUpdateParams": { "BulkUpdateBatchSize": "1000", - "QueueFetchSleepTime": "500" + "QueueFetchSleepTime": "2000", + "SearchDatabaseUpdateInterval" : "2000" }, "UpdateOpenSearchConnectionParameters": { "Scheme": "https", diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index a6cec4d..8411469 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -10,6 +10,11 @@ using System.Collections.Generic; using Amazon; using Amazon.Runtime.Internal.Endpoints.StandardLibrary; +using System.Threading.Tasks; +using System.Threading; +using Amazon.Runtime.Internal.Util; +using Microsoft.Extensions.Logging; +using System.Linq; namespace NationalArchives.Taxonomy.Common.Domain.Queue { @@ -18,12 +23,6 @@ public class AmazonSqsUpdateReceiver : IUpdateStagingQueueReceiver, IDisposable private const int FETCH_RETRY_COUNT = 5; private const string ROLE_SESSION_NAME = "Taxonomy_SQS_Update"; - private readonly ConnectionFactory m_ConnectionFactory; - private readonly IConnection m_Connection; - private readonly ISession m_Session; - private readonly IDestination m_destination; - private readonly IMessageConsumer m_Consumer; - private readonly AmazonSqsStagingQueueParams _qParams; private AmazonSQSClient _client; @@ -72,39 +71,97 @@ public AmazonSqsUpdateReceiver(AmazonSqsStagingQueueParams qParams) } } - public async IAsyncEnumerable> IterateResults() + public async Task> GetNextBatchOfResults(Microsoft.Extensions.Logging.ILogger logger, int sqsRequestTimeoutSeconds) { - var requestParams = new ReceiveMessageRequest + List results = new List(); + List msgHandlesForDelete = new List(); + + CancellationTokenSource fetchCancelSource = new CancellationTokenSource(TimeSpan.FromSeconds(sqsRequestTimeoutSeconds)); + + + // Try long polling first. But sometimes this times out and brings back no results, even with the max 20 seconds wait time. + // Therefore we have Short polling as a fallback. This generally brings back fewer results, sometimes as few as 1 or 5 in testing. + // See https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-short-and-long-polling.html#:~:text=The%20maximum%20long%20polling%20wait,t%20included%20in%20a%20response). + var longPollingRequestParams = new ReceiveMessageRequest + { + QueueUrl = _qParams.QueueUrl, + MaxNumberOfMessages = 10, + WaitTimeSeconds = TimeSpan.FromSeconds(Math.Min(sqsRequestTimeoutSeconds, 20)).Seconds // 20 sconds the max for ReceiveMessageRequest but may want to use more for Cancel Token + }; + + var shortPollingRequestParams = new ReceiveMessageRequest { QueueUrl = _qParams.QueueUrl, MaxNumberOfMessages = 10, - WaitTimeSeconds = TimeSpan.FromSeconds(10).Seconds, + WaitTimeSeconds = 0 }; ReceiveMessageResponse message = null; try { - message = await _client.ReceiveMessageAsync(requestParams); + message = await _client.ReceiveMessageAsync(longPollingRequestParams, fetchCancelSource.Token); + + if (message == null) + { + logger.LogWarning($"Request to SQS queue {_qParams.QueueUrl} using Long Polling failed to retrieve any taxonomy classifcations. Attempting Short Polling request"); + message = await _client.ReceiveMessageAsync(shortPollingRequestParams, fetchCancelSource.Token); + } + else + { + logger.LogInformation($"Long polling request to SQS queue brought back {message.Messages.Count} messages containing {message.Messages.SelectMany(m => m.MessageId).Count()} taxonomy results."); + } + + if (message != null && message.Messages.Count > 0) + { + foreach (Message msg in message?.Messages) + { + List result = JsonConvert.DeserializeObject>(msg.Body); + results.AddRange(result); + msgHandlesForDelete.Add(new DeleteMessageBatchRequestEntry() { Id = msg.MessageId, ReceiptHandle = msg.ReceiptHandle }); + } + } + else + { + logger.LogWarning($"Request to SQS queue {_qParams.QueueUrl} failed to retrieve any taxonomy classifcations."); + } + + + if (msgHandlesForDelete.Count > 0) + { + var deleteRequest = new DeleteMessageBatchRequest() + { + QueueUrl = _qParams.QueueUrl, + Entries = msgHandlesForDelete + }; + + try + { + CancellationTokenSource deleteCancelSource = new CancellationTokenSource(TimeSpan.FromSeconds(sqsRequestTimeoutSeconds)); + await _client.DeleteMessageBatchAsync(deleteRequest, deleteCancelSource.Token); + } + catch (TaskCanceledException tcex) + { + logger.LogWarning($"Request for taxonomy categorisation results from SQS queue {_qParams.QueueUrl} succeeded. However the subsequent delete request for the message timed out after waiting {sqsRequestTimeoutSeconds} seconds"); + } + } + + return results; + } + catch(TaskCanceledException tcex) + { + logger.LogError($"Request for taxonomy categorisation results from SQS queue {_qParams.QueueUrl} timed out after waiting {sqsRequestTimeoutSeconds} seconds"); + return results; } catch (Exception ex) { throw; } - - foreach (Message msg in message.Messages) - { - List result = JsonConvert.DeserializeObject>(msg.Body); - await _client.DeleteMessageAsync(_qParams.QueueUrl, msg.ReceiptHandle); - yield return result; - } } public void Dispose() { - m_Consumer?.Dispose(); - m_Session?.Dispose(); - m_Connection?.Dispose(); + _client?.Dispose(); } } } diff --git a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs index 3772a8f..3b3f8ec 100644 --- a/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/IUpdateStagingQueueReceiver.cs @@ -1,10 +1,12 @@ -using NationalArchives.Taxonomy.Common.BusinessObjects; +using Microsoft.Extensions.Logging; +using NationalArchives.Taxonomy.Common.BusinessObjects; using System.Collections.Generic; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Domain.Queue { public interface IUpdateStagingQueueReceiver { - IAsyncEnumerable> IterateResults(); + Task> GetNextBatchOfResults(ILogger logger, int sqsRequestTimeoutSeconds); } } diff --git a/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs index 81adbc8..397e9ae 100644 --- a/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs +++ b/Taxonomy.Common/Domain/Repository/Elastic/OpenSearchIAViewUpdateRepository.cs @@ -11,7 +11,7 @@ namespace NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch { public class OpenSearchIAViewUpdateRepository : IOpenSearchIAViewUpdateRepository { - private OpenSearchClient _openSearchClient; + private readonly OpenSearchClient _openSearchClient; //TODO: Not using the IConnectElastic interface here, it just seems to get in the way, look at refactoring generally. // But see where we get to on using Lucene.net and the InfoAseet input source. @@ -61,8 +61,7 @@ public async Task SaveAll(IEnumerable iaidsWithCategories) descriptor.Update(u => u.Doc(doc).DocAsUpsert(true).Id(iaidWithCategories.Iaid)); } - //TODO: Async? - var response = await _openSearchClient.BulkAsync(descriptor); + BulkResponse response = await _openSearchClient.BulkAsync(descriptor); if (!response.IsValid) { string errorInfo = GetOpenSearchErrorInfo(response); diff --git a/Taxonomy.Common/Helpers/QueueExtensions.cs b/Taxonomy.Common/Helpers/QueueExtensions.cs index f981065..3b90663 100644 --- a/Taxonomy.Common/Helpers/QueueExtensions.cs +++ b/Taxonomy.Common/Helpers/QueueExtensions.cs @@ -1,14 +1,20 @@ -using System.Collections.Generic; +using System.Collections.Concurrent; +using System.Collections.Generic; namespace NationalArchives.Taxonomy.Common.Helpers { internal static class QueueExtensions { - public static IEnumerable DequeueChunk(this Queue queue, uint chunkSize) + public static IEnumerable DequeueChunk(this ConcurrentQueue queue, int chunkSize) { for (int i = 0; i < chunkSize && queue.Count > 0; i++) { - yield return queue.Dequeue(); + T nextItem; + bool itemFound = queue.TryDequeue(out nextItem); + if (nextItem != null) + { + yield return nextItem; + } } } } diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 750eded..382cadd 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -5,6 +5,7 @@ using NationalArchives.Taxonomy.Common.Helpers; using NationalArchives.Taxonomy.Common.Service.Interface; using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Text; @@ -13,27 +14,31 @@ namespace NationalArchives.Taxonomy.Common.Service.Impl { - public class UpdateOpenSearchService : IUpdateOpenSearchService + public class UpdateOpenSearchService : IUpdateOpenSearchService, IDisposable { private readonly IUpdateStagingQueueReceiver _interimUpdateQueue; private readonly IOpenSearchIAViewUpdateRepository _targetOpenSearchRepository; - private readonly Queue internalQueue = new Queue(); - private readonly uint _batchSize; - private readonly int _queueFetchWaitTime; - + private readonly ConcurrentQueue _internalQueue = new ConcurrentQueue(); + private readonly int _batchSize; + private readonly int _queueFetchWaitTimeMS; + private readonly int _searchDatabaseUpdateIntervalMS; private readonly ILogger _logger; - private const int NULL_COUNTER_THRESHOLD = 259200; // 259200 seconds == 3 days. + //private const int NULL_COUNTER_THRESHOLD = 259200; // 259200 seconds == 3 days. + private const int NULL_COUNTER_THRESHOLD = 72; // Keep running for 3 days with 1 check per hour + private const int MAX_SEARCH_DB_UPDATE_ERRORS = 5; bool _isProcessingComplete = false; - private volatile int _totalInfoAssetsUPdated; + private volatile int _totalInfoAssetsUpdated; - public bool IsProcessingComplete { get => _isProcessingComplete; set => _isProcessingComplete = value; } + private CancellationTokenSource _cancelSource = new CancellationTokenSource(); + private int _searchDatabaseUpdateErrors = 0; - private DateTime _lastOpenSearchUpdate = DateTime.Now; + public bool IsProcessingComplete { get => _isProcessingComplete; set => _isProcessingComplete = value; } - public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSearchIAViewUpdateRepository targetOpenSearchRepository, ILogger logger, uint batchSize = 1, uint queueFetchWaitTime = 1000) + public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSearchIAViewUpdateRepository targetOpenSearchRepository, ILogger logger, + int batchSize = 1, int queueFetchWaitTimeMS = 1000, int searchDatabaseUpdateIntervalMS = 10000) { if (updateQueue == null || targetOpenSearchRepository == null) { @@ -43,7 +48,8 @@ public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSea _interimUpdateQueue = updateQueue; _targetOpenSearchRepository = targetOpenSearchRepository; _batchSize = batchSize; - _queueFetchWaitTime = Convert.ToInt32(queueFetchWaitTime); + _queueFetchWaitTimeMS = queueFetchWaitTimeMS; + _searchDatabaseUpdateIntervalMS = searchDatabaseUpdateIntervalMS; _logger = logger; } @@ -51,20 +57,16 @@ public async Task Init() { try { - await StartProcessing(); + await StartProcessing(_cancelSource.Token); } - catch (Exception e) + catch (Exception ex) { - StringBuilder sb = new StringBuilder("Exception Occurred: " + e.Message); - sb.Append("\n"); - sb.Append("Stack Trace: \n"); - sb.Append(e.StackTrace); - _logger.LogError(sb.ToString()); + _logger.LogError(ex, "Exception whilst starting or running the Taxonomy Search database update process."); throw; } } - public void Flush() + public async Task Flush() { List remainingInternalQueueItems = null; @@ -72,21 +74,16 @@ public void Flush() { do { - remainingInternalQueueItems = internalQueue.DequeueChunk(_batchSize).ToList(); + remainingInternalQueueItems = _internalQueue.DequeueChunk(_batchSize).ToList(); if (remainingInternalQueueItems.Count > 0) { - BulkUpdateCategoriesOnIAViews(remainingInternalQueueItems); + await BulkUpdateCategoriesOnIAViews(remainingInternalQueueItems); } } while (remainingInternalQueueItems.Count() > 0); } - catch (Exception e) + catch (Exception ex) { - StringBuilder sb = new StringBuilder("Exception Occurred whilst flushing remaining updates: " + e.Message); - sb.Append("\n"); - sb.Append("Stack Trace: \n"); - sb.Append(e.StackTrace); - - _logger.LogError(sb.ToString()); + _logger.LogError(ex, "Exception Occurred whilst flushing remaining updates from the internal queue."); throw; } finally @@ -95,89 +92,132 @@ public void Flush() } } - private async Task StartProcessing() + private async Task StartProcessing(CancellationToken token) { int nullCounter = 0; - int minutesSinceLastNoUpdatesLogMessage = 0; try { - while (!IsProcessingComplete) + + Task searchDatabaseUpdateTask = Task.Run(() => PeriodicSearchDatabaseUpdateAsync(TimeSpan.FromMilliseconds(_searchDatabaseUpdateIntervalMS), token)); + + while (!IsProcessingComplete && !_cancelSource.IsCancellationRequested) { - //List nextBatchFromInterimUpdateQueue = _interimUpdateQueue.DeQueueNextListOfIaidsWithCategories(); - var enumerator = _interimUpdateQueue.IterateResults().GetAsyncEnumerator(); - while (await enumerator.MoveNextAsync()) + List nextBatchOfResults = await _interimUpdateQueue.GetNextBatchOfResults(_logger, sqsRequestTimeoutSeconds: 30); + + if (nextBatchOfResults != null) { - List nextBatchOfResults = enumerator.Current; - if (nextBatchOfResults.Count > 0) + if (nextBatchOfResults?.Count > 0) { - foreach (IaidWithCategories categorisationResult in nextBatchOfResults) + foreach (IaidWithCategories categorisationResult in nextBatchOfResults) { if (categorisationResult != null) { - internalQueue.Enqueue(categorisationResult); - } - else - { - nullCounter++; + _internalQueue.Enqueue(categorisationResult); } } + await Task.Delay(_queueFetchWaitTimeMS); } - } + else + { + nullCounter++; + + // If we didn;t get anything back, Wait an hour before trying again... + await Task.Delay(TimeSpan.FromHours(1)); + // this allows us to keep running for 3 days with no updates before shutting down the service, assuming a one hour wait between each check. + if (nullCounter >= NULL_COUNTER_THRESHOLD) + { + IsProcessingComplete = true; + await RetrieveAndSubmitUpdatesToOpenSearchDatabase(); + _cancelSource.Cancel(); + _logger.LogInformation("No more categorisation results found on update queue. Open Search Update service will now finish processing."); + } + } + } + } + + await Task.Delay(_queueFetchWaitTimeMS); - Thread.Sleep(_queueFetchWaitTime); + } + catch (Exception e) + { + throw; + } + finally + { + _cancelSource?.Cancel(); + } + } - TimeSpan timeSinceLastUpdate = DateTime.Now - _lastOpenSearchUpdate; + private async Task PeriodicSearchDatabaseUpdateAsync(TimeSpan interval, CancellationToken cancellationToken) + { + DateTime _lastOpenSearchUpdateTime = DateTime.Now; + int minutesSinceLastNoUpdatesLogMessage = 0; + TimeSpan timeSinceLastOpenSearchUpdateCheck = interval; - if (internalQueue.Count >= _batchSize || ((internalQueue.Count > 0) && timeSinceLastUpdate >= TimeSpan.FromMinutes(5))) + try + { + while (true && !cancellationToken.IsCancellationRequested) + { + if (_internalQueue.Count >= _batchSize || ((_internalQueue.Count > 0) && timeSinceLastOpenSearchUpdateCheck >= interval)) { - _lastOpenSearchUpdate = DateTime.Now; - SubmitUpdatesToOpenSearchDatabase(); + Task delayTask = Task.Delay(interval, cancellationToken); + await RetrieveAndSubmitUpdatesToOpenSearchDatabase(); + + await delayTask; } else { - if (timeSinceLastUpdate >= TimeSpan.FromMinutes(5)) + if (timeSinceLastOpenSearchUpdateCheck >= TimeSpan.FromMinutes(5)) { - _totalInfoAssetsUPdated = 0; - int minutesSinceLastUpdate = Convert.ToInt32(Math.Round(timeSinceLastUpdate.TotalMinutes)); + + int minutesSinceLastUpdate = Convert.ToInt32(Math.Round(timeSinceLastOpenSearchUpdateCheck.TotalMinutes)); if (minutesSinceLastUpdate % 5 == 0 && minutesSinceLastUpdate > minutesSinceLastNoUpdatesLogMessage) { minutesSinceLastNoUpdatesLogMessage = minutesSinceLastUpdate; _logger.LogInformation($"No Taxonomy updates have been received by the Open Search" + $" update service in the last {minutesSinceLastUpdate} minutes. Resetting the update counter."); - } + _totalInfoAssetsUpdated = 0; + } } } - // this allows us to keep running for 3 days with no updates before shutting down the service, given the one second wait between each check. - if (nullCounter >= NULL_COUNTER_THRESHOLD) - { - IsProcessingComplete = true; - await SubmitUpdatesToOpenSearchDatabase(); - _logger.LogInformation("No more categorisation results found on update queue. Open Search Update service will now finish processing."); - } + timeSinceLastOpenSearchUpdateCheck = DateTime.Now - _lastOpenSearchUpdateTime; + } + } + catch (Exception ex) + { + _logger.LogError(ex, "error updating OpenSearch database Taxonomy records."); + _searchDatabaseUpdateErrors++ ; - async Task SubmitUpdatesToOpenSearchDatabase() - { - if (_batchSize == 1 || internalQueue.Count == 1) - { - UpdateCategoriesOnIAView(internalQueue.Dequeue()); - } - else - { - var items = internalQueue.DequeueChunk(_batchSize).ToList(); - await BulkUpdateCategoriesOnIAViews(items); - } - } + if (_searchDatabaseUpdateErrors >= MAX_SEARCH_DB_UPDATE_ERRORS) + { + _logger.LogCritical($"Taxonomy search database update has exceeded the configured error threshold of {MAX_SEARCH_DB_UPDATE_ERRORS}. Operation aborted."); + _cancelSource.Cancel(); } + } - catch (Exception e) + } + + private async Task RetrieveAndSubmitUpdatesToOpenSearchDatabase() + { + if (_batchSize == 1 || _internalQueue.Count == 1) { - throw; + IaidWithCategories nextItem; + bool itemRetrived = _internalQueue.TryDequeue(out nextItem); + if (itemRetrived) + { + await UpdateCategoriesOnIAView(nextItem); + } + } + else + { + var items = _internalQueue.DequeueChunk(_batchSize).ToList(); + await BulkUpdateCategoriesOnIAViews(items); } } @@ -201,8 +241,8 @@ private async Task BulkUpdateCategoriesOnIAViews(IList listO int totalForThisBulkUpdateOperation = listOfIAViewUpdatesToProcess.Count; _logger.LogInformation($"Completed bulk update in Open Search for {totalForThisBulkUpdateOperation} items: "); - _totalInfoAssetsUPdated += totalForThisBulkUpdateOperation; - _logger.LogInformation($" Category data for {_totalInfoAssetsUPdated} assets has now been added or updated in Open Search."); + _totalInfoAssetsUpdated += totalForThisBulkUpdateOperation; + _logger.LogInformation($" Category data for {_totalInfoAssetsUpdated} assets has now been added or updated in Open Search."); } catch (Exception ex) { @@ -217,7 +257,7 @@ private async Task UpdateCategoriesOnIAView(IaidWithCategories item) _logger.LogInformation("Submitting single Asset update to Open Search: " + item.ToString()); await _targetOpenSearchRepository.Save(item); _logger.LogInformation($"Completed single Asset in Open Search: {item.ToString()}." ); - _totalInfoAssetsUPdated++; + _totalInfoAssetsUpdated++; } catch (Exception ex) { @@ -225,5 +265,9 @@ private async Task UpdateCategoriesOnIAView(IaidWithCategories item) } } + public void Dispose() + { + _cancelSource?.Dispose(); + } } } diff --git a/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs index 4f217b5..0a849fb 100644 --- a/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Interface/IUpdateOpenSearchService.cs @@ -8,6 +8,6 @@ public interface IUpdateOpenSearchService { Task Init(); - void Flush(); + Task Flush(); } } From 05d1a0c91a9cc3ed7b5df1b20eeaf2aa275d9822 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 12 Dec 2024 09:13:31 +0000 Subject: [PATCH 21/22] UPdated NLog settings for taxonomy Generator and Update, to put errors and warnings into separate log files. Removed and sorted using statements in various places. --- .../appsettings.json | 2 +- .../nlog.config | 16 ++++++++++++++-- .../Producers/FullReindexCategoriser.cs | 1 - NationalArchives.Taxonomy.Batch/Program.cs | 2 +- NationalArchives.Taxonomy.Batch/nlog.config | 19 ++++++++++++++++--- .../Queue/AmazonSqsDirectUpdateSender.cs | 1 - .../Queue/AmazonSqsStagingQueueParams.cs | 6 +----- .../Domain/Queue/AmazonSqsUpdateReceiver.cs | 17 ++++++----------- .../Domain/Queue/AmazonSqsUpdateSender.cs | 7 +++---- .../Domain/Queue/UpdateStagingQueueParams.cs | 6 +----- .../Impl/QueryBasedCategoriserService.cs | 2 +- .../Service/Impl/UpdateOpenSearchService.cs | 2 +- 12 files changed, 45 insertions(+), 36 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index ae66b25..22d3008 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -20,7 +20,7 @@ "OpenSearchUpdateParams": { "BulkUpdateBatchSize": "1000", "QueueFetchSleepTime": "2000", - "SearchDatabaseUpdateInterval" : "2000" + "SearchDatabaseUpdateInterval" : "1000" }, "UpdateOpenSearchConnectionParameters": { "Scheme": "https", diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/nlog.config b/NationalArchives.Taxonomy.Batch.Update.Elastic/nlog.config index 46c098a..a59b28b 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/nlog.config +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/nlog.config @@ -8,15 +8,27 @@ - + + - + + + \ No newline at end of file diff --git a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs index a449b01..85e3d2e 100644 --- a/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs +++ b/NationalArchives.Taxonomy.Batch/FullReindex/Producers/FullReindexCategoriser.cs @@ -7,7 +7,6 @@ using NationalArchives.Taxonomy.Common.Service; using System; using System.Collections.Generic; -using System.Diagnostics; using System.Linq; using System.Threading; using System.Threading.Tasks; diff --git a/NationalArchives.Taxonomy.Batch/Program.cs b/NationalArchives.Taxonomy.Batch/Program.cs index 1099b21..8381a41 100644 --- a/NationalArchives.Taxonomy.Batch/Program.cs +++ b/NationalArchives.Taxonomy.Batch/Program.cs @@ -14,9 +14,9 @@ using NationalArchives.Taxonomy.Common.DataObjects.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Queue; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Lucene; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Service; using NLog.Extensions.Logging; using System; diff --git a/NationalArchives.Taxonomy.Batch/nlog.config b/NationalArchives.Taxonomy.Batch/nlog.config index 12d631e..4b8442a 100644 --- a/NationalArchives.Taxonomy.Batch/nlog.config +++ b/NationalArchives.Taxonomy.Batch/nlog.config @@ -8,15 +8,28 @@ - + + - + + + \ No newline at end of file diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs index cd12d9d..5c1e5ac 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsDirectUpdateSender.cs @@ -6,7 +6,6 @@ using Apache.NMS.ActiveMQ; using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; using Newtonsoft.Json; using System; using System.Collections.Generic; diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs index 3d895eb..fdff5e7 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsStagingQueueParams.cs @@ -1,8 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace NationalArchives.Taxonomy.Common.Domain.Queue +namespace NationalArchives.Taxonomy.Common.Domain.Queue { public class AmazonSqsStagingQueueParams { diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs index 8411469..c267a0f 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateReceiver.cs @@ -1,20 +1,15 @@ -using Amazon.Runtime; -using Amazon.SQS.Model; +using Amazon; +using Amazon.Runtime; using Amazon.SQS; -using Apache.NMS; -using Apache.NMS.ActiveMQ; +using Amazon.SQS.Model; +using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; using Newtonsoft.Json; using System; using System.Collections.Generic; -using Amazon; -using Amazon.Runtime.Internal.Endpoints.StandardLibrary; -using System.Threading.Tasks; -using System.Threading; -using Amazon.Runtime.Internal.Util; -using Microsoft.Extensions.Logging; using System.Linq; +using System.Threading; +using System.Threading.Tasks; namespace NationalArchives.Taxonomy.Common.Domain.Queue { diff --git a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs index c4de15b..9e768e9 100644 --- a/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs +++ b/Taxonomy.Common/Domain/Queue/AmazonSqsUpdateSender.cs @@ -1,10 +1,11 @@ -using Amazon.SQS.Model; +using Amazon; +using Amazon.Runtime; using Amazon.SQS; +using Amazon.SQS.Model; using Apache.NMS; using Apache.NMS.ActiveMQ; using Microsoft.Extensions.Logging; using NationalArchives.Taxonomy.Common.BusinessObjects; -using NationalArchives.Taxonomy.Common.Helpers; using Newtonsoft.Json; using System; using System.Collections.Concurrent; @@ -12,8 +13,6 @@ using System.Collections.ObjectModel; using System.Threading; using System.Threading.Tasks; -using Amazon.Runtime; -using Amazon; namespace NationalArchives.Taxonomy.Common.Domain.Queue { diff --git a/Taxonomy.Common/Domain/Queue/UpdateStagingQueueParams.cs b/Taxonomy.Common/Domain/Queue/UpdateStagingQueueParams.cs index cdb5648..a58c60e 100644 --- a/Taxonomy.Common/Domain/Queue/UpdateStagingQueueParams.cs +++ b/Taxonomy.Common/Domain/Queue/UpdateStagingQueueParams.cs @@ -1,8 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace NationalArchives.Taxonomy.Common.Domain.Queue +namespace NationalArchives.Taxonomy.Common.Domain.Queue { public class UpdateStagingQueueParams { diff --git a/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs b/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs index 2a2da56..e393cee 100644 --- a/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs +++ b/Taxonomy.Common/Service/Impl/QueryBasedCategoriserService.cs @@ -2,8 +2,8 @@ using NationalArchives.Taxonomy.Common.Domain; using NationalArchives.Taxonomy.Common.Domain.Queue; using NationalArchives.Taxonomy.Common.Domain.Repository.Common; -using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using NationalArchives.Taxonomy.Common.Domain.Repository.Mongo; +using NationalArchives.Taxonomy.Common.Domain.Repository.OpenSearch; using System; using System.Collections.Generic; using System.Linq; diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 382cadd..905ada2 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -8,7 +8,6 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; -using System.Text; using System.Threading; using System.Threading.Tasks; @@ -243,6 +242,7 @@ private async Task BulkUpdateCategoriesOnIAViews(IList listO _logger.LogInformation($"Completed bulk update in Open Search for {totalForThisBulkUpdateOperation} items: "); _totalInfoAssetsUpdated += totalForThisBulkUpdateOperation; _logger.LogInformation($" Category data for {_totalInfoAssetsUpdated} assets has now been added or updated in Open Search."); + _logger.LogInformation($" There are currently {_internalQueue.Count} results on the internal queue that have been retrieved from Amazon SQS and are awaiting submission to the database."); } catch (Exception ex) { From ba3474cda02b7a5bbd55932addebffc63245c002 Mon Sep 17 00:00:00 2001 From: Brian N O'Reilly Date: Thu, 12 Dec 2024 14:07:19 +0000 Subject: [PATCH 22/22] Added maximum internal queue size for Taxonomy update. If this threshold is exceeded, then further fetches from SQS are paused to allow for database updates to reduce the queue size to helf of the configured maximum before resuming. --- .../OpenSearchUpdateParams.cs | 2 ++ .../Program.cs | 3 +- .../appsettings.json | 3 +- .../Service/Impl/UpdateOpenSearchService.cs | 32 +++++++++++++++++-- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs index 3f0e8e1..23165a9 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/OpenSearchUpdateParams.cs @@ -11,5 +11,7 @@ internal sealed class OpenSearchUpdateParams public int QueueFetchSleepTime { get; set; } public int SearchDatabaseUpdateInterval { get; set; } + + public int MaxInternalQueueSize { get; set; } } } diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs index 7b5c1ca..f8c86fe 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/Program.cs @@ -99,13 +99,14 @@ private static void ConfigureServicesForHost(HostBuilderContext context, IServic int bulkUpdateBatchSize = openSearchUpdateParams.BulkUpdateBatchSize; int queueFetchWaitTime = openSearchUpdateParams.QueueFetchSleepTime; int searchDatabaseUpdateInterval = openSearchUpdateParams.SearchDatabaseUpdateInterval; + int maxInternalQueueSize = openSearchUpdateParams.MaxInternalQueueSize; Console.WriteLine($"Using a batch size of {bulkUpdateBatchSize} and a queue fetch interval of {queueFetchWaitTime} sceonds for Open Search bulk updates."); IUpdateStagingQueueReceiver interimQueue = ctx.GetRequiredService(); IOpenSearchIAViewUpdateRepository updateRepo = ctx.GetRequiredService(); ILogger logger = ctx.GetRequiredService>(); - return new UpdateOpenSearchService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime, searchDatabaseUpdateInterval); + return new UpdateOpenSearchService(interimQueue, updateRepo, logger, bulkUpdateBatchSize, queueFetchWaitTime, searchDatabaseUpdateInterval, maxInternalQueueSize); }); services.AddHostedService(); diff --git a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json index 22d3008..c2d9bec 100644 --- a/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json +++ b/NationalArchives.Taxonomy.Batch.Update.Elastic/appsettings.json @@ -20,7 +20,8 @@ "OpenSearchUpdateParams": { "BulkUpdateBatchSize": "1000", "QueueFetchSleepTime": "2000", - "SearchDatabaseUpdateInterval" : "1000" + "SearchDatabaseUpdateInterval": "1000", + "MaxInternalQueueSize" : "1000000" }, "UpdateOpenSearchConnectionParameters": { "Scheme": "https", diff --git a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs index 905ada2..ae12959 100644 --- a/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs +++ b/Taxonomy.Common/Service/Impl/UpdateOpenSearchService.cs @@ -21,6 +21,7 @@ public class UpdateOpenSearchService : IUpdateOpenSearchService, IDisposable private readonly int _batchSize; private readonly int _queueFetchWaitTimeMS; private readonly int _searchDatabaseUpdateIntervalMS; + private readonly int _maxInternalQueueSize; private readonly ILogger _logger; //private const int NULL_COUNTER_THRESHOLD = 259200; // 259200 seconds == 3 days. @@ -37,7 +38,7 @@ public class UpdateOpenSearchService : IUpdateOpenSearchService, IDisposable public bool IsProcessingComplete { get => _isProcessingComplete; set => _isProcessingComplete = value; } public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSearchIAViewUpdateRepository targetOpenSearchRepository, ILogger logger, - int batchSize = 1, int queueFetchWaitTimeMS = 1000, int searchDatabaseUpdateIntervalMS = 10000) + int batchSize, int queueFetchWaitTimeMS, int searchDatabaseUpdateIntervalMS, int maxInternalQueueSize) { if (updateQueue == null || targetOpenSearchRepository == null) { @@ -49,6 +50,7 @@ public UpdateOpenSearchService(IUpdateStagingQueueReceiver updateQueue, IOpenSea _batchSize = batchSize; _queueFetchWaitTimeMS = queueFetchWaitTimeMS; _searchDatabaseUpdateIntervalMS = searchDatabaseUpdateIntervalMS; + _maxInternalQueueSize = maxInternalQueueSize; _logger = logger; } @@ -102,8 +104,34 @@ private async Task StartProcessing(CancellationToken token) while (!IsProcessingComplete && !_cancelSource.IsCancellationRequested) { + List nextBatchOfResults = null; - List nextBatchOfResults = await _interimUpdateQueue.GetNextBatchOfResults(_logger, sqsRequestTimeoutSeconds: 30); + if (_internalQueue.Count < _maxInternalQueueSize) + { + nextBatchOfResults = await _interimUpdateQueue.GetNextBatchOfResults(_logger, sqsRequestTimeoutSeconds: 30); + } + else + { + // wait for some more database updates to reduce the internal queue size + + do + { + int latestIniternalQueueSize = _internalQueue.Count; + _logger.LogInformation($"Pausing fetch from SQS as the internal queue size is currently {latestIniternalQueueSize}. The configured maximum is {_maxInternalQueueSize}."); + _logger.LogInformation($"Internal queue needs to reach target size of {_maxInternalQueueSize / 2} before resuming fetches from SQS. Will check again in 5 minutes for search database updates to reduce the queue"); + + await Task.Delay(TimeSpan.FromMinutes(5)); + + if (_internalQueue.Count >= latestIniternalQueueSize) + { + throw new Exception("Internal queue reached or exceeded the maximum size. It has not reduced despite pausing fetches from SQS to allow for database updates. This indicates a possible issue with the database update process"); + } + } while (_internalQueue.Count > (_maxInternalQueueSize / 2)); + + _logger.LogInformation($"Internal queue size is now {_internalQueue.Count}. Resuming fetch of taxonomy results from SQS."); + nextBatchOfResults = await _interimUpdateQueue.GetNextBatchOfResults(_logger, sqsRequestTimeoutSeconds: 30); + + } if (nextBatchOfResults != null) {