Skip to content

Commit

Permalink
REL-847161 Update documentation considering changes for DataGrid inte…
Browse files Browse the repository at this point in the history
…gration (#13)

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* sdk bumped

* direct import settings updated

* fix

* direct import settings updated

* rest samples updated

* small improvements

* Update README.md

* Update README.md

* Update README.md
  • Loading branch information
joannapolrolniczakrelativity authored Aug 9, 2023
1 parent 9dade83 commit 83e0dd9
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 111 deletions.
53 changes: 50 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
**[.NET 7 Console Application - How-to](#net-7-code-samples---how-to)**<br>
**[.NET Framework & Kepler Console Application - How-to](#keplerclient-code-samples---how-to)**<br>
**[Powershell scripts - How-to](#powershell-script-samples---how-to)**<br>
### **[Performance Best Practices](#performance-best-practices)**
**[Import Job Settings](#import-job-settings)**<br>



Expand Down Expand Up @@ -213,9 +215,9 @@ which may lead to errors during import process.
.WithoutImages()
.WithFieldsMapped(x => x
.WithField(controlNumberColumnIndex, "Control Number")
.WithExtractedTextField(extractedTextPathColumnIndex, e => e
.WithExtractedTextInSeparateFiles(f => f
.WithEncoding("UTF-8"))))
.WithExtractedTextInSeparateFiles(f => f
.WithEncoding("UTF-16")
.WithFileSizeDefinedInColumn(fileSizeColumnIndex))))
.WithFolders(f => f
.WithRootFolderID(rootFolderId, r => r
.WithFolderPathDefinedInColumn(folderPathColumnIndex)));
Expand Down Expand Up @@ -1129,4 +1131,49 @@ List of samples:

- Invoke run-sample-import.ps1

<br><br>

---
# Performance Best Practices

## Import Job Settings

### Encoding
For improved performance when dealing with fileshare data on ADLS, we highly recommend using extracted text or other long text files encoded in UTF-16. By doing so, you can avoid the need for conversion to the correct encoding, leading to significant time savings in your document and image workflows.

For the document workflow, set **FieldMapping.Encoding** to UTF-16. Similarly, for the image workflow, configure **ImageSettings.ExtractedTextEncoding** as UTF-16. With these settings in place, the conversion overhead is eliminated, and your files will be copied directly in the unicode encoding, resulting in faster processing times.

ImportDocumentSettings importDocuments = ImportDocumentSettingsBuilder.Create()
.WithAppendMode()
.WithNatives(x => x
.WithFilePathDefinedInColumn(filePathColumnIndex)
.WithFileNameDefinedInColumn(fileNameColumnIndex))
.WithoutImages()
.WithFieldsMapped(x => x
.WithField(controlNumberColumnIndex, "Control Number")
.WithExtractedTextField(extractedTextPathColumnIndex, e => e
.WithExtractedTextInSeparateFiles(f => f
.WithEncoding("UTF-16")
.WithFileSizeDefinedInColumn(fileSizeColumnIndex))))
.WithoutFolders();


ImportDocumentSettings importImages = ImportDocumentSettingsBuilder.Create()
.WithAppendMode()
.WithoutNatives()
.WithImages(i => i
.WithAutoNumberImages()
.WithoutProduction()
.WithExtractedText(e => e.WithEncoding("UTF-16"))
.WithFileTypeAutoDetection())
.WithoutFieldsMapped()
.WithoutFolders();


### FileSizeColumnIndex
Another valuable setting that can enhance performance is the **FieldMapping.FileSizeColumnIndex**. By configuring this setting, the need for additional file size calculations can be eliminated. The file sizes will be automatically extracted from the load file, streamlining the process and saving valuable processing time.

**Note:** The FileSizeColumnIndex setting will only take effect if FieldMapping.ContainsFilePath is set to true, and the FieldMapping.Encoding is set to UTF-16. This property applies only to long text fields stored in Data Grid, including Extracted Text.



Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Relativity.Import.Models.SDK" Version="1.0.8" />
<PackageReference Include="Relativity.Import.Models.SDK" Version="1.0.22" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public async Task Sample05_ImportDocumentsWithExtractedText()
const int controlNumberColumnIndex = 0;
const int extractedTextPathColumnIndex = 12;
const int fileNameColumnIndex = 13;
const int fileSizeColumnIndex = 14;
const int filePathColumnIndex = 22;

// Path to the load file used in data source settings.
Expand All @@ -63,7 +64,8 @@ public async Task Sample05_ImportDocumentsWithExtractedText()
.WithField(controlNumberColumnIndex, "Control Number")
.WithExtractedTextField(extractedTextPathColumnIndex, e => e
.WithExtractedTextInSeparateFiles(f => f
.WithEncoding("UTF-8"))))
.WithEncoding("UTF-8")
.WithFileSizeDefinedInColumn(fileSizeColumnIndex))))
.WithoutFolders();

// Create payload for request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public async Task Sample07_DirectImportSettingsForDocuments()
const int extractedTextFilePathColumnIndex = 12;
const int emailToColumnIndex = 11;
const int fileNameColumnIndex = 13;
const int fileSizeColumnIndex = 14;
const int filePathColumnIndex = 22;

// Create payload for request.
Expand Down Expand Up @@ -99,22 +100,16 @@ public async Task Sample07_DirectImportSettingsForDocuments()
ContainsID = false,
ColumnIndex = extractedTextFilePathColumnIndex,
ContainsFilePath = true,
Encoding = "UTF-8",
FileSizeColumnIndex = fileSizeColumnIndex
},
},
},
Folder = new FolderSettings
{
FolderPathColumnIndex = null,
RootFolderID = rootFolderId,
},
Other = new OtherSettings
{
ExtractedText = new ExtractedTextSettings
{
Encoding = null,
ValidateEncoding = true,
},
},
}
};

// Create payload for request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,14 @@ public async Task Sample11_DirectImportSettingsForImages()
PageNumbering = PageNumbering.AutoNumberImages,
ProductionID = null,
LoadExtractedText = true,
ExtractedTextEncoding = "UTF-8"
},
Fields = null,
Folder = new FolderSettings
{
FolderPathColumnIndex = null,
RootFolderID = rootFolderId,
},
Other = new OtherSettings
{
ExtractedText = new ExtractedTextSettings
{
ValidateEncoding = true,
},
},
}
};

// Create payload for request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Relativity.Import.SDK" Version="1.0.8" />
<PackageReference Include="Relativity.Import.SDK" Version="1.0.22" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public async Task Sample05_ImportDocumentsWithExtractedText()
const int controlNumberColumnIndex = 0;
const int extractedTextPathColumnIndex = 12;
const int fileNameColumnIndex = 13;
const int fileSizeColumnIndex = 14;
const int filePathColumnIndex = 22;

// Path to the load files used in data source settings.
Expand All @@ -53,7 +54,8 @@ public async Task Sample05_ImportDocumentsWithExtractedText()
.WithField(controlNumberColumnIndex, "Control Number")
.WithExtractedTextField(extractedTextPathColumnIndex, e => e
.WithExtractedTextInSeparateFiles(f => f
.WithEncoding("UTF-8"))))
.WithEncoding("UTF-8")
.WithFileSizeDefinedInColumn(fileSizeColumnIndex))))
.WithoutFolders();

// Configuration settings for data source. Builder is used to create settings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public async Task Sample07_DirectImportSettingsForDocuments()
const int extractedTextFilePathColumnIndex = 12;
const int emailToColumnIndex = 11;
const int fileNameColumnIndex = 13;
const int fileSizeColumnIndex = 14;
const int filePathColumnIndex = 22;

// Configuration settings for document import. Example of set without using ImportDocumentSettingsBuilder.
Expand Down Expand Up @@ -87,22 +88,16 @@ public async Task Sample07_DirectImportSettingsForDocuments()
ContainsID = false,
ColumnIndex = extractedTextFilePathColumnIndex,
ContainsFilePath = true,
Encoding = "UTF-8",
FileSizeColumnIndex = fileSizeColumnIndex
},
},
},
Folder = new FolderSettings
{
FolderPathColumnIndex = null,
RootFolderID = rootFolderId,
},
Other = new OtherSettings
{
ExtractedText = new ExtractedTextSettings
{
Encoding = null,
ValidateEncoding = true,
},
},
}
};

// Example of data source configuration created without using DataSourceSettingsBuilder.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,14 @@ public async Task Sample11_DirectImportSettingsForImages()
PageNumbering = PageNumbering.AutoNumberImages,
ProductionID = null,
LoadExtractedText = true,
ExtractedTextEncoding = "UTF-8"
},
Fields = null,
Folder = new FolderSettings
{
FolderPathColumnIndex = null,
RootFolderID = rootFolderId,
},
Other = new OtherSettings
{
ExtractedText = new ExtractedTextSettings
{
ValidateEncoding = true,
},
},
}
};

// Configuration settings for data source created without DataSourceSettingsBuilder.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public async Task Sample16_ReadImportDocumentSettings()
const int controlNumberColumnIndex = 0;
const int emailToColumnIndex = 11;
const int fileNameColumnIndex = 13;
const int fileSizeColumnIndex = 14;
const int filePathColumnIndex = 22;

// Configuration of document import. Builder is used to create the settings.
Expand All @@ -47,7 +48,8 @@ public async Task Sample16_ReadImportDocumentSettings()
.WithField(emailToColumnIndex, "Email To")
.WithExtractedTextField(10,
e => e.WithExtractedTextInSeparateFiles(
a => a.WithEncoding("UTF-8"))))
a => a.WithEncoding("UTF-8")
.WithFileSizeDefinedInColumn(fileSizeColumnIndex))))
.WithoutFolders();

using (Relativity.Import.V1.Services.IDocumentConfigurationController documentConfiguration =
Expand Down Expand Up @@ -77,8 +79,9 @@ public async Task Sample16_ReadImportDocumentSettings()
{
// Reading of example fields.
Console.WriteLine($"Native.FileNameColumnIndex: {documentSettings.Value.Native.FileNameColumnIndex}");
Console.WriteLine($"ExtractedText.Encoding: {documentSettings.Value.Other?.ExtractedText?.Encoding}");
Console.WriteLine($"FieldMappings[0].ColumnIndex: {documentSettings.Value.Fields.FieldMappings[0].ColumnIndex}");
Console.WriteLine($"FieldMappings[2].Encoding: {documentSettings.Value.Fields.FieldMappings[2].Encoding}");
Console.WriteLine($"FieldMappings[2].FileSizeColumnIndex: {documentSettings.Value.Fields.FieldMappings[2].FileSizeColumnIndex}");
}
}
}
Expand All @@ -87,6 +90,7 @@ public async Task Sample16_ReadImportDocumentSettings()

/* Expected console result:
Native.FileNameColumnIndex: 13
ExtractedText.Encoding: UTF-8
FieldMappings[0].ColumnIndex: 0
FieldMappings[2].Encoding: UTF-8
FieldMappings[2].FileSizeColumnIndex: 14
*/
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Context "Sample05 Import documents with extracted text" {

$body = @{
applicationName = "Import-service-sample-app"
correlationID = "Sample-job-0005"
correlationID = "Sample-job-0005"
} | ConvertTo-Json -Depth 10

$response = $global:WebRequest.callPost($uri, $body)
Expand Down Expand Up @@ -47,16 +47,12 @@ Context "Sample05 Import documents with extracted text" {
"ColumnIndex": 12,
"Field": "Extracted Text",
"ContainsID": false,
"ContainsFilePath": true
"ContainsFilePath": true,
"Encoding": "UTF-8",
"FileSizeColumnIndex": 14
}
]
},
"Other":{
"ExtractedText":{
"Encoding": "UTF-8",
"ValidateEncoding": false
}
},
"Folder":null
}
}'
Expand All @@ -69,18 +65,18 @@ Context "Sample05 Import documents with extracted text" {
$uri = $global:Endpoints.importSourceAddUri($importId, $sourceId)
$dataSourceConfigurationBody = @{
dataSourceSettings = @{
path = $loadFilePath
path = $loadFilePath
firstLineContainsColumnNames = $true
startLine = 0
columnDelimiter = "|"
quoteDelimiter = "^"
newLineDelimiter = "#"
nestedValueDelimiter = "&"
multiValueDelimiter = "$"
endOfLine = 0
encoding = $null
cultureInfo = "en-us"
type = 2
startLine = 0
columnDelimiter = "|"
quoteDelimiter = "^"
newLineDelimiter = "#"
nestedValueDelimiter = "&"
multiValueDelimiter = "$"
endOfLine = 0
encoding = $null
cultureInfo = "en-us"
type = 2
}
} | ConvertTo-Json -Depth 10

Expand Down Expand Up @@ -114,8 +110,7 @@ Context "Sample05 Import documents with extracted text" {

[int]$sleepTime = 5

while($isJobFinished -ne $true)
{
while ($isJobFinished -ne $true) {
Start-Sleep -Seconds $sleepTime
$jobDetailsResponse = $global:WebRequest.callGet($uri)
$isJobFinished = $jobDetailsResponse."Value"."IsFinished"
Expand Down
Loading

0 comments on commit 83e0dd9

Please sign in to comment.