forked from Wikidata/Wikidata-Toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLocalDumpFileExample.java
95 lines (83 loc) · 3.19 KB
/
LocalDumpFileExample.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package org.wikidata.wdtk.examples;
/*
* #%L
* Wikidata Toolkit Examples
* %%
* Copyright (C) 2014 - 2015 Wikidata Toolkit Developers
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.IOException;
import org.wikidata.wdtk.dumpfiles.DumpContentType;
import org.wikidata.wdtk.dumpfiles.DumpProcessingController;
import org.wikidata.wdtk.dumpfiles.EntityTimerProcessor;
import org.wikidata.wdtk.dumpfiles.MwLocalDumpFile;
/**
* This class illustrates how to process local dumpfiles. It uses
* {@link EntityTimerProcessor} to process a dump.
*
* @author Markus Damm
*
*/
public class LocalDumpFileExample {
/**
* Path to the dump that should be processed
*/
private final static String DUMP_FILE = "./src/resources/sample-dump-20150815.json.gz";
public static void main(String[] args) throws IOException {
ExampleHelpers.configureLogging();
LocalDumpFileExample.printDocumentation();
DumpProcessingController dumpProcessingController = new DumpProcessingController(
"wikidatawiki");
// Note that the project name "wikidatawiki" is only for online access;
// not relevant here.
EntityTimerProcessor entityTimerProcessor = new EntityTimerProcessor(0);
dumpProcessingController.registerEntityDocumentProcessor(
entityTimerProcessor, null, true);
// Select local file (meta-data will be guessed):
System.out.println();
System.out
.println("Processing a local dump file giving only its location");
System.out
.println("(meta-data like the date is guessed from the file name):");
MwLocalDumpFile mwDumpFile = new MwLocalDumpFile(DUMP_FILE);
dumpProcessingController.processDump(mwDumpFile);
// Select local file and set meta-data:
System.out.println();
System.out
.println("Processing a local dump file with all meta-data set:");
mwDumpFile = new MwLocalDumpFile(DUMP_FILE, DumpContentType.JSON,
"20150815", "wikidatawiki");
dumpProcessingController.processDump(mwDumpFile);
entityTimerProcessor.close();
}
/**
* Prints some basic documentation about this program.
*/
public static void printDocumentation() {
System.out
.println("********************************************************************");
System.out.println("*** Wikidata Toolkit: LocalDumpFileExample");
System.out.println("*** ");
System.out
.println("*** This program illustrates how to process local dumps.");
System.out
.println("*** It uses an EntityTimerProcesses which counts processed items");
System.out.println("*** and elapsed time.");
System.out.println("*** ");
System.out.println("*** See source code for further details.");
System.out
.println("********************************************************************");
}
}