diff --git a/.gitignore b/.gitignore index 7a88658e..19d93d91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,4 @@ -/WebCollector/target/ -/WebCollector/.idea -/WebCollectorExample/ -/WebCollectorExample/target/ -/Lazy/target/ -/Lazy/.idea -/WebCollector-Hadoop/target/ -/JSRule/target/ \ No newline at end of file +WebCollectorExample +target +.idea +*.iml diff --git a/WebCollector/.idea/.name b/WebCollector/.idea/.name deleted file mode 100644 index 41e5d474..00000000 --- a/WebCollector/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -WebCollector \ No newline at end of file diff --git a/WebCollector/.idea/compiler.xml b/WebCollector/.idea/compiler.xml deleted file mode 100644 index a61dad69..00000000 --- a/WebCollector/.idea/compiler.xml +++ /dev/null @@ -1,32 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="CompilerConfiguration"> - <resourceExtensions /> - <wildcardResourcePatterns> - <entry name="!?*.java" /> - <entry name="!?*.form" /> - <entry name="!?*.class" /> - <entry name="!?*.groovy" /> - <entry name="!?*.scala" /> - <entry name="!?*.flex" /> - <entry name="!?*.kt" /> - <entry name="!?*.clj" /> - <entry name="!?*.aj" /> - </wildcardResourcePatterns> - <annotationProcessing> - <profile default="true" name="Default" enabled="false"> - <processorPath useClasspath="true" /> - </profile> - <profile default="false" name="Maven default annotation processors profile" enabled="true"> - <sourceOutputDir name="target/generated-sources/annotations" /> - <sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> - <outputRelativeToContentRoot value="true" /> - <processorPath useClasspath="true" /> - <module name="WebCollector" /> - </profile> - </annotationProcessing> - <bytecodeTargetLevel> - <module name="WebCollector" target="1.6" /> - </bytecodeTargetLevel> - </component> -</project> \ No newline at end of file diff --git a/WebCollector/.idea/copyright/profiles_settings.xml b/WebCollector/.idea/copyright/profiles_settings.xml deleted file mode 100644 index c7d1c5a8..00000000 --- a/WebCollector/.idea/copyright/profiles_settings.xml +++ /dev/null @@ -1,3 +0,0 @@ -<component name="CopyrightManager"> - <settings default="" /> -</component> \ No newline at end of file diff --git a/WebCollector/.idea/encodings.xml b/WebCollector/.idea/encodings.xml deleted file mode 100644 index fe7d837c..00000000 --- a/WebCollector/.idea/encodings.xml +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="Encoding"> - <file url="file://$PROJECT_DIR$" charset="UTF-8" /> - <file url="PROJECT" charset="UTF-8" /> - </component> -</project> \ No newline at end of file diff --git a/WebCollector/.idea/misc.xml b/WebCollector/.idea/misc.xml deleted file mode 100644 index b31cbbe6..00000000 --- a/WebCollector/.idea/misc.xml +++ /dev/null @@ -1,64 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ClientPropertiesManager"> - <properties class="javax.swing.AbstractButton"> - <property name="hideActionText" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JComponent"> - <property name="html.disable" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JEditorPane"> - <property name="JEditorPane.w3cLengthUnits" class="java.lang.Boolean" /> - <property name="JEditorPane.honorDisplayProperties" class="java.lang.Boolean" /> - <property name="charset" class="java.lang.String" /> - </properties> - <properties class="javax.swing.JList"> - <property name="List.isFileList" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JPasswordField"> - <property name="JPasswordField.cutCopyAllowed" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JSlider"> - <property name="Slider.paintThumbArrowShape" class="java.lang.Boolean" /> - <property name="JSlider.isFilled" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JTable"> - <property name="Table.isFileList" class="java.lang.Boolean" /> - <property name="JTable.autoStartsEdit" class="java.lang.Boolean" /> - <property name="terminateEditOnFocusLost" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JToolBar"> - <property name="JToolBar.isRollover" class="java.lang.Boolean" /> - </properties> - <properties class="javax.swing.JTree"> - <property name="JTree.lineStyle" class="java.lang.String" /> - </properties> - <properties class="javax.swing.text.JTextComponent"> - <property name="caretAspectRatio" class="java.lang.Double" /> - <property name="caretWidth" class="java.lang.Integer" /> - </properties> - </component> - <component name="EntryPointsManager"> - <entry_points version="2.0" /> - </component> - <component name="MavenProjectsManager"> - <option name="originalFiles"> - <list> - <option value="$PROJECT_DIR$/pom.xml" /> - </list> - </option> - </component> - <component name="ProjectLevelVcsManager" settingsEditedManually="false"> - <OptionsSetting value="true" id="Add" /> - <OptionsSetting value="true" id="Remove" /> - <OptionsSetting value="true" id="Checkout" /> - <OptionsSetting value="true" id="Update" /> - <OptionsSetting value="true" id="Status" /> - <OptionsSetting value="true" id="Edit" /> - <ConfirmationsSetting value="0" id="Add" /> - <ConfirmationsSetting value="0" id="Remove" /> - </component> - <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK"> - <output url="file://$PROJECT_DIR$/classes" /> - </component> -</project> \ No newline at end of file diff --git a/WebCollector/.idea/workspace.xml b/WebCollector/.idea/workspace.xml deleted file mode 100644 index 118957a8..00000000 --- a/WebCollector/.idea/workspace.xml +++ /dev/null @@ -1,1286 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ChangeListManager"> - <list default="true" id="2d187928-03e4-460a-a87e-ae6ec47223d2" name="Default" comment="" /> - <ignored path="WebCollector.iws" /> - <ignored path=".idea/workspace.xml" /> - <ignored path="$PROJECT_DIR$/target/" /> - <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> - <option name="TRACKING_ENABLED" value="true" /> - <option name="SHOW_DIALOG" value="false" /> - <option name="HIGHLIGHT_CONFLICTS" value="true" /> - <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> - <option name="LAST_RESOLUTION" value="IGNORE" /> - </component> - <component name="ChangesViewManager" flattened_view="true" show_ignored="false" /> - <component name="CreatePatchCommitExecutor"> - <option name="PATCH_PATH" value="" /> - </component> - <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" /> - <component name="FavoritesManager"> - <favorites_list name="WebCollector" /> - </component> - <component name="FileEditorManager"> - <leaf> - <file leaf-file-name="Fetcher.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="228" column="0" selection-start-line="228" selection-start-column="0" selection-end-line="228" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="pom.xml" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/pom.xml"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="DBManager.java" pinned="false" current-in-tab="true"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/DBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.5033113"> - <caret line="38" column="0" selection-start-line="38" selection-start-column="0" selection-end-line="38" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="BerkeleyDBManager.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="52" column="45" selection-start-line="52" selection-start-column="45" selection-end-line="52" selection-end-column="45" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="Crawler.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/Crawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="71" column="23" selection-start-line="71" selection-start-column="23" selection-end-line="71" selection-end-column="23" /> - <folding> - <element signature="e#5327#5328#0" expanded="true" /> - <element signature="e#5365#5366#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="RamDB.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDB.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="26" column="13" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" /> - <folding> - <element signature="imports" expanded="true" /> - </folding> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="BerkeleyGenerator.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="99" column="69" selection-start-line="99" selection-start-column="69" selection-end-line="99" selection-end-column="69" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="BerkeleyDBUtils.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBUtils.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="61" column="43" selection-start-line="61" selection-start-column="43" selection-end-line="61" selection-end-column="43" /> - <folding /> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="CrawlDatumFormater.java" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="76" column="65" selection-start-line="76" selection-start-column="65" selection-end-line="76" selection-end-column="65" /> - <folding /> - </state> - </provider> - </entry> - </file> - </leaf> - </component> - <component name="FileTemplateManagerImpl"> - <option name="RECENT_TEMPLATES"> - <list> - <option value="Class" /> - </list> - </option> - </component> - <component name="GradleLocalSettings"> - <option name="externalProjectsViewState"> - <projects_view /> - </option> - </component> - <component name="IdeDocumentHistory"> - <option name="CHANGED_PATHS"> - <list> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Visitor.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/AutoParseExecutor.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/BasicCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/CommonExecutor.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/Common.java" /> - <option value="$PROJECT_DIR$/src/main/java/RubyExecutor.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/TestCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/RubyCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/Config.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoGenerator.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyGenerator.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamGenerator.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/AutoParseCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.java" /> - <option value="$PROJECT_DIR$/src/main/java/CsdnCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBManager.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDBManager.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoDBManager.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BreadthCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/Crawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoSeleniumCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoSelenium.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Executor.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoBingCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/CrawlDatum.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/Page.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoDepthCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoPostCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/Tutorial2Crawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/CrawlDatums.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/TestCrawler.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDB.java" /> - <option value="$PROJECT_DIR$/pom.xml" /> - <option value="$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/DBManager.java" /> - </list> - </option> - </component> - <component name="MavenImportPreferences"> - <option name="generalSettings"> - <MavenGeneralSettings> - <option name="mavenHome" value="D:/apache/apache-maven-3.3.3" /> - </MavenGeneralSettings> - </option> - </component> - <component name="MavenProjectNavigator"> - <treeState> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="" /> - <option name="myItemType" value="org.jetbrains.idea.maven.navigator.MavenProjectsStructure$RootNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="org.jetbrains.idea.maven.navigator.MavenProjectsStructure$ProjectNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="" /> - <option name="myItemType" value="org.jetbrains.idea.maven.navigator.MavenProjectsStructure$RootNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="org.jetbrains.idea.maven.navigator.MavenProjectsStructure$ProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="Lifecycle" /> - <option name="myItemType" value="org.jetbrains.idea.maven.navigator.MavenProjectsStructure$LifecycleNode" /> - </PATH_ELEMENT> - </PATH> - </treeState> - </component> - <component name="NamedScopeManager"> - <order /> - </component> - <component name="ProjectFrameBounds"> - <option name="x" value="-8" /> - <option name="y" value="-8" /> - <option name="width" value="1456" /> - <option name="height" value="916" /> - </component> - <component name="ProjectLevelVcsManager" settingsEditedManually="false"> - <OptionsSetting value="true" id="Add" /> - <OptionsSetting value="true" id="Remove" /> - <OptionsSetting value="true" id="Checkout" /> - <OptionsSetting value="true" id="Update" /> - <OptionsSetting value="true" id="Status" /> - <OptionsSetting value="true" id="Edit" /> - <ConfirmationsSetting value="0" id="Add" /> - <ConfirmationsSetting value="0" id="Remove" /> - </component> - <component name="ProjectView"> - <navigator currentView="ProjectPane" proportions="" version="1"> - <flattenPackages /> - <showMembers /> - <showModules /> - <showLibraryContents /> - <hideEmptyPackages /> - <abbreviatePackageNames /> - <autoscrollToSource /> - <autoscrollFromSource /> - <sortByType /> - </navigator> - <panes> - <pane id="Scratches" /> - <pane id="ProjectPane"> - <subPane> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="plugin" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="ram" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="plugin" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="mongo" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="plugin" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="berkeley" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="crawler" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="webcollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="crawldb" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="WebCollector" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="src" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="main" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="java" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="dmic" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - </subPane> - </pane> - <pane id="Scope" /> - <pane id="PackagesPane" /> - </panes> - </component> - <component name="PropertiesComponent"> - <property name="settings.editor.selected.configurable" value="MavenSettings" /> - <property name="settings.editor.splitter.proportion" value="0.2" /> - <property name="last_opened_file_path" value="$PROJECT_DIR$" /> - <property name="recentsLimit" value="5" /> - <property name="dynamic.classpath" value="false" /> - <property name="FullScreen" value="false" /> - </component> - <component name="RunManager" selected="Application.TestCrawler"> - <configuration default="false" name="CsdnCrawler" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" /> - <option name="MAIN_CLASS_NAME" value="CsdnCrawler" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="WebCollector" /> - <envs /> - <method /> - </configuration> - <configuration default="false" name="TutorialCrawler" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea"> - <pattern> - <option name="PATTERN" value="cn.edu.hfut.dmic.webcollector.example.*" /> - <option name="ENABLED" value="true" /> - </pattern> - </extension> - <option name="MAIN_CLASS_NAME" value="cn.edu.hfut.dmic.webcollector.example.TutorialCrawler" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="WebCollector" /> - <envs /> - <method /> - </configuration> - <configuration default="false" name="DemoSelenium" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea"> - <pattern> - <option name="PATTERN" value="cn.edu.hfut.dmic.webcollector.example.*" /> - <option name="ENABLED" value="true" /> - </pattern> - </extension> - <option name="MAIN_CLASS_NAME" value="cn.edu.hfut.dmic.webcollector.example.DemoSelenium" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="WebCollector" /> - <envs /> - <method /> - </configuration> - <configuration default="false" name="DemoBingCrawler" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea"> - <pattern> - <option name="PATTERN" value="cn.edu.hfut.dmic.webcollector.example.*" /> - <option name="ENABLED" value="true" /> - </pattern> - </extension> - <option name="MAIN_CLASS_NAME" value="cn.edu.hfut.dmic.webcollector.example.DemoBingCrawler" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="WebCollector" /> - <envs /> - <method /> - </configuration> - <configuration default="false" name="TestCrawler" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea"> - <pattern> - <option name="PATTERN" value="cn.edu.hfut.dmic.*" /> - <option name="ENABLED" value="true" /> - </pattern> - </extension> - <option name="MAIN_CLASS_NAME" value="cn.edu.hfut.dmic.TestCrawler" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="WebCollector" /> - <envs /> - <method /> - </configuration> - <configuration default="true" type="#org.jetbrains.idea.devkit.run.PluginConfigurationType" factoryName="Plugin"> - <module name="" /> - <option name="VM_PARAMETERS" value="-Xmx512m -Xms256m -XX:MaxPermSize=250m -ea" /> - <option name="PROGRAM_PARAMETERS" /> - <method /> - </configuration> - <configuration default="true" type="AndroidRunConfigurationType" factoryName="Android Application"> - <module name="" /> - <option name="ACTIVITY_CLASS" value="" /> - <option name="MODE" value="default_activity" /> - <option name="DEPLOY" value="true" /> - <option name="ARTIFACT_NAME" value="" /> - <option name="TARGET_SELECTION_MODE" value="EMULATOR" /> - <option name="USE_LAST_SELECTED_DEVICE" value="false" /> - <option name="PREFERRED_AVD" value="" /> - <option name="USE_COMMAND_LINE" value="true" /> - <option name="COMMAND_LINE" value="" /> - <option name="WIPE_USER_DATA" value="false" /> - <option name="DISABLE_BOOT_ANIMATION" value="false" /> - <option name="NETWORK_SPEED" value="full" /> - <option name="NETWORK_LATENCY" value="none" /> - <option name="CLEAR_LOGCAT" value="false" /> - <option name="SHOW_LOGCAT_AUTOMATICALLY" value="true" /> - <option name="FILTER_LOGCAT_AUTOMATICALLY" value="true" /> - <method /> - </configuration> - <configuration default="true" type="AndroidTestRunConfigurationType" factoryName="Android Tests"> - <module name="" /> - <option name="TESTING_TYPE" value="0" /> - <option name="INSTRUMENTATION_RUNNER_CLASS" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="PACKAGE_NAME" value="" /> - <option name="TARGET_SELECTION_MODE" value="EMULATOR" /> - <option name="USE_LAST_SELECTED_DEVICE" value="false" /> - <option name="PREFERRED_AVD" value="" /> - <option name="USE_COMMAND_LINE" value="true" /> - <option name="COMMAND_LINE" value="" /> - <option name="WIPE_USER_DATA" value="false" /> - <option name="DISABLE_BOOT_ANIMATION" value="false" /> - <option name="NETWORK_SPEED" value="full" /> - <option name="NETWORK_LATENCY" value="none" /> - <option name="CLEAR_LOGCAT" value="false" /> - <option name="SHOW_LOGCAT_AUTOMATICALLY" value="true" /> - <option name="FILTER_LOGCAT_AUTOMATICALLY" value="true" /> - <method /> - </configuration> - <configuration default="true" type="Applet" factoryName="Applet"> - <option name="HTML_USED" value="false" /> - <option name="WIDTH" value="400" /> - <option name="HEIGHT" value="300" /> - <option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" /> - <module /> - <method /> - </configuration> - <configuration default="true" type="Application" factoryName="Application"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" /> - <option name="MAIN_CLASS_NAME" /> - <option name="VM_PARAMETERS" /> - <option name="PROGRAM_PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="ENABLE_SWING_INSPECTOR" value="false" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <module name="" /> - <envs /> - <method /> - </configuration> - <configuration default="true" type="GradleRunConfiguration" factoryName="Gradle"> - <ExternalSystemSettings> - <option name="executionName" /> - <option name="externalProjectPath" /> - <option name="externalSystemIdString" value="GRADLE" /> - <option name="scriptParameters" /> - <option name="taskDescriptions"> - <list /> - </option> - <option name="taskNames"> - <list /> - </option> - <option name="vmOptions" /> - </ExternalSystemSettings> - <method /> - </configuration> - <configuration default="true" type="JUnit" factoryName="JUnit"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" /> - <module name="" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="PACKAGE_NAME" /> - <option name="MAIN_CLASS_NAME" /> - <option name="METHOD_NAME" /> - <option name="TEST_OBJECT" value="class" /> - <option name="VM_PARAMETERS" value="-ea" /> - <option name="PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$MODULE_DIR$" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <option name="TEST_SEARCH_SCOPE"> - <value defaultName="singleModule" /> - </option> - <envs /> - <patterns /> - <method /> - </configuration> - <configuration default="true" type="JarApplication" factoryName="JAR Application"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" /> - <envs /> - <method /> - </configuration> - <configuration default="true" type="MavenRunConfiguration" factoryName="Maven"> - <MavenSettings> - <option name="myGeneralSettings" /> - <option name="myRunnerSettings" /> - <option name="myRunnerParameters"> - <MavenRunnerParameters> - <option name="profiles"> - <set /> - </option> - <option name="goals"> - <list /> - </option> - <option name="profilesMap"> - <map /> - </option> - <option name="resolveToWorkspace" value="false" /> - <option name="workingDirPath" value="" /> - </MavenRunnerParameters> - </option> - </MavenSettings> - <method /> - </configuration> - <configuration default="true" type="Remote" factoryName="Remote"> - <option name="USE_SOCKET_TRANSPORT" value="true" /> - <option name="SERVER_MODE" value="false" /> - <option name="SHMEM_ADDRESS" value="javadebug" /> - <option name="HOST" value="localhost" /> - <option name="PORT" value="5005" /> - <method /> - </configuration> - <configuration default="true" type="TestNG" factoryName="TestNG"> - <extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" /> - <module name="" /> - <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" /> - <option name="ALTERNATIVE_JRE_PATH" /> - <option name="SUITE_NAME" /> - <option name="PACKAGE_NAME" /> - <option name="MAIN_CLASS_NAME" /> - <option name="METHOD_NAME" /> - <option name="GROUP_NAME" /> - <option name="TEST_OBJECT" value="CLASS" /> - <option name="VM_PARAMETERS" value="-ea" /> - <option name="PARAMETERS" /> - <option name="WORKING_DIRECTORY" value="$MODULE_DIR$" /> - <option name="OUTPUT_DIRECTORY" /> - <option name="ANNOTATION_TYPE" /> - <option name="ENV_VARIABLES" /> - <option name="PASS_PARENT_ENVS" value="true" /> - <option name="TEST_SEARCH_SCOPE"> - <value defaultName="singleModule" /> - </option> - <option name="USE_DEFAULT_REPORTERS" value="false" /> - <option name="PROPERTIES_FILE" /> - <envs /> - <properties /> - <listeners /> - <method /> - </configuration> - <list size="5"> - <item index="0" class="java.lang.String" itemvalue="Application.CsdnCrawler" /> - <item index="1" class="java.lang.String" itemvalue="Application.TutorialCrawler" /> - <item index="2" class="java.lang.String" itemvalue="Application.DemoSelenium" /> - <item index="3" class="java.lang.String" itemvalue="Application.DemoBingCrawler" /> - <item index="4" class="java.lang.String" itemvalue="Application.TestCrawler" /> - </list> - <recent_temporary> - <list size="5"> - <item index="0" class="java.lang.String" itemvalue="Application.TestCrawler" /> - <item index="1" class="java.lang.String" itemvalue="Application.DemoBingCrawler" /> - <item index="2" class="java.lang.String" itemvalue="Application.DemoSelenium" /> - <item index="3" class="java.lang.String" itemvalue="Application.CsdnCrawler" /> - <item index="4" class="java.lang.String" itemvalue="Application.TutorialCrawler" /> - </list> - </recent_temporary> - </component> - <component name="SbtLocalSettings"> - <option name="externalProjectsViewState"> - <projects_view /> - </option> - </component> - <component name="ShelveChangesManager" show_recycled="false" /> - <component name="SvnConfiguration"> - <configuration /> - </component> - <component name="TaskManager"> - <task active="true" id="Default" summary="Default task"> - <changelist id="2d187928-03e4-460a-a87e-ae6ec47223d2" name="Default" comment="" /> - <created>1450677009297</created> - <option name="number" value="Default" /> - <updated>1450677009297</updated> - </task> - <servers /> - </component> - <component name="ToolWindowManager"> - <frame x="-8" y="-8" width="1456" height="916" extended-state="6" /> - <editor active="true" /> - <layout> - <window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> - <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> - <window_info id="Palette	" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> - <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32988358" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" /> - <window_info id="Application Servers" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.15329513" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> - <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Mongo Explorer" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" /> - <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Designer" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> - <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.24641834" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> - <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - <window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> - <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" /> - <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" /> - <window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.3280543" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> - <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> - <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" /> - <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.2183258" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> - <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" /> - <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32961586" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> - </layout> - </component> - <component name="Vcs.Log.UiProperties"> - <option name="RECENTLY_FILTERED_USER_GROUPS"> - <collection /> - </option> - <option name="RECENTLY_FILTERED_BRANCH_GROUPS"> - <collection /> - </option> - </component> - <component name="VcsContentAnnotationSettings"> - <option name="myLimit" value="2678400000" /> - </component> - <component name="XDebuggerManager"> - <breakpoint-manager /> - <watches-manager /> - </component> - <component name="antWorkspaceConfiguration"> - <option name="IS_AUTOSCROLL_TO_SOURCE" value="false" /> - <option name="FILTER_TARGETS" value="false" /> - </component> - <component name="editorHistoryManager"> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBUtils.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="61" column="43" selection-start-line="61" selection-start-column="43" selection-end-line="61" selection-end-column="43" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="76" column="65" selection-start-line="76" selection-start-column="65" selection-end-line="76" selection-end-column="65" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="279" column="29" selection-start-line="279" selection-start-column="24" selection-end-line="279" selection-end-column="29" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/pom.xml"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDB.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="26" column="13" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" /> - <folding> - <element signature="imports" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="99" column="69" selection-start-line="99" selection-start-column="69" selection-end-line="99" selection-end-column="69" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBUtils.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="61" column="43" selection-start-line="61" selection-start-column="43" selection-end-line="61" selection-end-column="43" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="76" column="65" selection-start-line="76" selection-start-column="65" selection-end-line="76" selection-end-column="65" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="279" column="29" selection-start-line="279" selection-start-column="24" selection-end-line="279" selection-end-column="29" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/pom.xml"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="99" column="69" selection-start-line="99" selection-start-column="69" selection-end-line="99" selection-end-column="69" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBUtils.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="61" column="43" selection-start-line="61" selection-start-column="43" selection-end-line="61" selection-end-column="43" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="76" column="65" selection-start-line="76" selection-start-column="65" selection-end-line="76" selection-end-column="65" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/net/HttpRequest.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="-69.933334"> - <caret line="146" column="53" selection-start-line="146" selection-start-column="41" selection-end-line="146" selection-end-column="53" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/Config.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="34" column="43" selection-start-line="34" selection-start-column="43" selection-end-line="34" selection-end-column="43" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="81" column="37" selection-start-line="81" selection-start-column="37" selection-end-line="81" selection-end-column="37" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/Links.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="100" column="0" selection-start-line="100" selection-start-column="0" selection-end-line="100" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/AutoParseCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="35" column="2" selection-start-line="35" selection-start-column="2" selection-end-line="35" selection-end-column="2" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/Generator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.53571427"> - <caret line="34" column="56" selection-start-line="34" selection-start-column="56" selection-end-line="34" selection-end-column="56" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoDBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.9285714"> - <caret line="200" column="24" selection-start-line="200" selection-start-column="24" selection-end-line="200" selection-end-column="24" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="58" column="24" selection-start-line="58" selection-start-column="24" selection-end-line="58" selection-end-column="24" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.035714287"> - <caret line="56" column="3" selection-start-line="56" selection-start-column="3" selection-end-line="56" selection-end-column="61" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/mongo/MongoCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="36" column="59" selection-start-line="36" selection-start-column="59" selection-end-line="36" selection-end-column="59" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BreadthCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="35" column="56" selection-start-line="35" selection-start-column="56" selection-end-line="35" selection-end-column="56" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Visitor.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="16" selection-end-column="3" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Executor.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="16" column="3" selection-start-line="16" selection-start-column="3" selection-end-line="16" selection-end-column="3" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/Page.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="-14.2"> - <caret line="301" column="24" selection-start-line="301" selection-start-column="24" selection-end-line="301" selection-end-column="24" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoDepthCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.65413535"> - <caret line="61" column="25" selection-start-line="61" selection-start-column="25" selection-end-line="61" selection-end-column="25" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/CrawlDatum.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="-11.166667"> - <caret line="146" column="25" selection-start-line="146" selection-start-column="25" selection-end-line="146" selection-end-column="25" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/Tutorial2Crawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.15767635"> - <caret line="20" column="48" selection-start-line="20" selection-start-column="48" selection-end-line="20" selection-end-column="48" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/TutorialCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.4453125"> - <caret line="69" column="34" selection-start-line="69" selection-start-column="34" selection-end-line="69" selection-end-column="34" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoBingCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.7261411"> - <caret line="111" column="5" selection-start-line="111" selection-start-column="5" selection-end-line="111" selection-end-column="5" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CODE_COVERAGE.md"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoPostCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="16" selection-end-column="3" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoSelenium.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="36" column="13" selection-start-line="36" selection-start-column="13" selection-end-line="36" selection-end-column="13" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/model/CrawlDatums.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="81" column="29" selection-start-line="81" selection-start-column="29" selection-end-line="81" selection-end-column="29" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamCrawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.703125"> - <caret line="46" column="0" selection-start-line="46" selection-start-column="0" selection-end-line="46" selection-end-column="0" /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="228" column="0" selection-start-line="228" selection-start-column="0" selection-end-line="228" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyGenerator.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="99" column="69" selection-start-line="99" selection-start-column="69" selection-end-line="99" selection-end-column="69" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBUtils.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="61" column="43" selection-start-line="61" selection-start-column="43" selection-end-line="61" selection-end-column="43" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/util/CrawlDatumFormater.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="76" column="65" selection-start-line="76" selection-start-column="65" selection-end-line="76" selection-end-column="65" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/ram/RamDB.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="26" column="13" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" /> - <folding> - <element signature="imports" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/pom.xml"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/plugin/berkeley/BerkeleyDBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="52" column="45" selection-start-line="52" selection-start-column="45" selection-end-line="52" selection-end-column="45" /> - <folding /> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawler/Crawler.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.0"> - <caret line="71" column="23" selection-start-line="71" selection-start-column="23" selection-end-line="71" selection-end-column="23" /> - <folding> - <element signature="e#5327#5328#0" expanded="true" /> - <element signature="e#5365#5366#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/src/main/java/cn/edu/hfut/dmic/webcollector/crawldb/DBManager.java"> - <provider selected="true" editor-type-id="text-editor"> - <state vertical-scroll-proportion="0.5033113"> - <caret line="38" column="0" selection-start-line="38" selection-start-column="0" selection-end-line="38" selection-end-column="0" /> - <folding /> - </state> - </provider> - </entry> - </component> -</project> \ No newline at end of file diff --git a/WebCollector/WebCollector.iml b/WebCollector/WebCollector.iml deleted file mode 100644 index 806026a9..00000000 --- a/WebCollector/WebCollector.iml +++ /dev/null @@ -1,63 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> - <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_6" inherit-compiler-output="false"> - <output url="file://$MODULE_DIR$/target/classes" /> - <output-test url="file://$MODULE_DIR$/target/test-classes" /> - <content url="file://$MODULE_DIR$"> - <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" /> - <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" /> - <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" /> - <excludeFolder url="file://$MODULE_DIR$/target" /> - </content> - <orderEntry type="inheritedJdk" /> - <orderEntry type="sourceFolder" forTests="false" /> - <orderEntry type="library" name="Maven: org.jsoup:jsoup:1.8.3" level="project" /> - <orderEntry type="library" name="Maven: com.googlecode.juniversalchardet:juniversalchardet:1.0.3" level="project" /> - <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" /> - <orderEntry type="library" name="Maven: junit:junit:4.11" level="project" /> - <orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" /> - <orderEntry type="library" name="Maven: org.json:json:20140107" level="project" /> - <orderEntry type="library" name="Maven: com.sleepycat:je:5.0.73" level="project" /> - <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.9" level="project" /> - <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.9" level="project" /> - <orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:3.2.2" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-java:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-chrome-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-remote-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: cglib:cglib-nodep:2.1_3" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.code.gson:gson:2.3" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-api:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.guava:guava:18.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-htmlunit-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.htmlunit:htmlunit:2.15" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: xalan:xalan:2.7.1" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: xalan:serializer:2.7.1" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: commons-collections:commons-collections:3.2.1" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-lang3:3.3.2" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.httpcomponents:httpmime:4.3.3" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: commons-codec:commons-codec:1.9" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.htmlunit:htmlunit-core-js:2.15" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: xerces:xercesImpl:2.11.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: xml-apis:xml-apis:1.4.01" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.nekohtml:nekohtml:1.9.21" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.cssparser:cssparser:0.9.14" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.w3c.css:sac:1.3" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: commons-logging:commons-logging:1.1.3" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty:jetty-websocket:8.1.15.v20140411" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty:jetty-util:8.1.15.v20140411" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty:jetty-io:8.1.15.v20140411" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty:jetty-http:8.1.15.v20140411" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.httpcomponents:httpclient:4.3.4" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.httpcomponents:httpcore:4.3.2" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-firefox-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: commons-io:commons-io:2.4" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-exec:1.1" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-ie-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.java.dev.jna:jna:3.4.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: net.java.dev.jna:platform:3.4.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-safari-driver:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.seleniumhq.selenium:selenium-support:2.44.0" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: org.webbitserver:webbit:0.4.14" level="project" /> - <orderEntry type="library" scope="PROVIDED" name="Maven: io.netty:netty:3.5.2.Final" level="project" /> - </component> -</module> \ No newline at end of file diff --git a/WebCollector/src/main/java/cn/edu/hfut/dmic/contentextractor/ContentExtractor.java b/WebCollector/src/main/java/cn/edu/hfut/dmic/contentextractor/ContentExtractor.java index e697decc..c1b5995a 100644 --- a/WebCollector/src/main/java/cn/edu/hfut/dmic/contentextractor/ContentExtractor.java +++ b/WebCollector/src/main/java/cn/edu/hfut/dmic/contentextractor/ContentExtractor.java @@ -17,13 +17,6 @@ */ package cn.edu.hfut.dmic.contentextractor; -import cn.edu.hfut.dmic.webcollector.net.HttpRequest; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -34,6 +27,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import cn.edu.hfut.dmic.webcollector.net.HttpRequest; + /** * ContentExtractor could extract content,title,time from news webpage * @@ -42,6 +44,8 @@ public class ContentExtractor { public static final Logger LOG = LoggerFactory.getLogger(ContentExtractor.class); + private static final Pattern TIME_PATTERN = Pattern.compile("[^0-9]+([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-3][0-9])[^0-9]{1,5}([0-2][0-9]):([0-5][0-9])"); + private static final Pattern DATE_PATTERN = Pattern.compile("[^0-9]+([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-3][0-9])"); protected Document doc; @@ -197,8 +201,6 @@ public News getNews() throws Exception { } protected String getTime(Element contentElement) throws Exception { - String regex = "([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-9]{1,2})[^0-9]{1,5}?([0-2]?[1-9])[^0-9]{1,5}?([0-9]{1,2})[^0-9]{1,5}?([0-9]{1,2})"; - Pattern pattern = Pattern.compile(regex); Element current = contentElement; for (int i = 0; i < 2; i++) { if (current != null && current != doc.body()) { @@ -213,8 +215,8 @@ protected String getTime(Element contentElement) throws Exception { break; } String currentHtml = current.outerHtml(); - Matcher matcher = pattern.matcher(currentHtml); - if (matcher.find()) { + Matcher matcher = TIME_PATTERN.matcher(currentHtml); + if (matcher.find() && matcher.groupCount() >= 6) { return matcher.group(1) + "-" + matcher.group(2) + "-" + matcher.group(3) + " " + matcher.group(4) + ":" + matcher.group(5) + ":" + matcher.group(6); } if (current != doc.body()) { @@ -231,8 +233,6 @@ protected String getTime(Element contentElement) throws Exception { } protected String getDate(Element contentElement) throws Exception { - String regex = "([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-9]{1,2})"; - Pattern pattern = Pattern.compile(regex); Element current = contentElement; for (int i = 0; i < 2; i++) { if (current != null && current != doc.body()) { @@ -247,8 +247,8 @@ protected String getDate(Element contentElement) throws Exception { break; } String currentHtml = current.outerHtml(); - Matcher matcher = pattern.matcher(currentHtml); - if (matcher.find()) { + Matcher matcher = DATE_PATTERN.matcher(currentHtml); + if (matcher.find() && matcher.groupCount() >= 3) { return matcher.group(1) + "-" + matcher.group(2) + "-" + matcher.group(3); } if (current != doc.body()) { @@ -507,14 +507,14 @@ public static News getNewsByUrl(String url) throws Exception { } public static void main(String[] args) throws Exception { - - News news = ContentExtractor.getNewsByUrl("http://www.huxiu.com/article/121959/1.html"); + String url = "https://www.huxiu.com/article/167883.html"; + News news = ContentExtractor.getNewsByUrl(url); System.out.println(news.getUrl()); System.out.println(news.getTitle()); System.out.println(news.getTime()); System.out.println(news.getContent()); //System.out.println(news.getContentElement()); - + System.out.println(ContentExtractor.getNewsByUrl("http://www.huxiu.com/article/121959/1.html").getTime()); //System.out.println(news); }