From 5265191b38545059ffe498a2e3c32af80ebd5efb Mon Sep 17 00:00:00 2001 From: Konstantin_Rogozhinski Date: Tue, 26 Mar 2019 12:07:15 +0300 Subject: [PATCH 1/2] the first attempt --- .../java/ru/hh/school/homework/DirThread.java | 62 +++++++++++++++++ .../java/ru/hh/school/homework/Launcher.java | 66 ++----------------- .../ru/hh/school/homework/SearchThread.java | 46 +++++++++++++ .../ru/hh/school/homework/StaticMethods.java | 56 ++++++++++++++++ 4 files changed, 171 insertions(+), 59 deletions(-) create mode 100644 parallelism/src/main/java/ru/hh/school/homework/DirThread.java create mode 100644 parallelism/src/main/java/ru/hh/school/homework/SearchThread.java create mode 100644 parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java diff --git a/parallelism/src/main/java/ru/hh/school/homework/DirThread.java b/parallelism/src/main/java/ru/hh/school/homework/DirThread.java new file mode 100644 index 0000000..d22afb7 --- /dev/null +++ b/parallelism/src/main/java/ru/hh/school/homework/DirThread.java @@ -0,0 +1,62 @@ +package ru.hh.school.homework; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +import static java.util.Collections.reverseOrder; +import static java.util.Map.Entry.comparingByValue; +import static java.util.stream.Collectors.toMap; + +public class DirThread implements Runnable { + + private Thread thread; + private Path dirPath; + + DirThread (Path newDirPath) { + dirPath = newDirPath; + thread = new Thread(this, dirPath.toString()); + thread.start(); + } + + @Override + public void run() { + javaFileIter(dirPath); + } + + private void javaFileIter(Path dirPath) { + try { + Map wordMap = new HashMap<>(); + for (Path filePath : Files.newDirectoryStream(dirPath)) { + if (Files.isDirectory(filePath)) { + new DirThread (filePath); + } + if (filePath.toString().endsWith(".java")) { + wordMap = StaticMethods.mapCombiner(wordMap, StaticMethods.naiveCount(filePath)); + } + } + + wordMap = wordMap.entrySet() + .stream() + .sorted(comparingByValue(reverseOrder())) + .limit(10) + .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); + + String outputString = outputStringAssemble(wordMap); + System.out.print(outputString); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String outputStringAssemble (Map wordMap) { + String outputString = ""; + for (String word : wordMap.keySet()) { + new SearchThread(word, dirPath); + } + return outputString; + } +} diff --git a/parallelism/src/main/java/ru/hh/school/homework/Launcher.java b/parallelism/src/main/java/ru/hh/school/homework/Launcher.java index 0d13733..6aa5d6d 100644 --- a/parallelism/src/main/java/ru/hh/school/homework/Launcher.java +++ b/parallelism/src/main/java/ru/hh/school/homework/Launcher.java @@ -3,8 +3,11 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Map; import java.util.stream.Stream; +import java.util.stream.Collectors; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -17,65 +20,10 @@ public class Launcher { - public static void main(String[] args) throws IOException { - // Написать код, который, как можно более параллельно: - // - по заданному пути найдет все "*.java" файлы - // - для каждого файла вычислит 10 самых популярных слов (см. #naiveCount()) - // - соберет top 10 для каждой папки в которой есть хотя-бы один java файл - // - для каждого слова сходит в гугл и вернет количество результатов по нему (см. #naiveSearch()) - // - распечатает в консоль результаты в виде: - // <папка1> - <слово #1> - <кол-во результатов в гугле> - // <папка1> - <слово #2> - <кол-во результатов в гугле> - // ... - // <папка1> - <слово #10> - <кол-во результатов в гугле> - // <папка2> - <слово #1> - <кол-во результатов в гугле> - // <папка2> - <слово #2> - <кол-во результатов в гугле> - // ... - // <папка2> - <слово #10> - <кол-во результатов в гугле> - // ... - // - // Порядок результатов в консоли не обязательный. - // При желании naiveSearch и naiveCount можно оптимизировать. + public static void main(String[] args) throws IOException { + Path path = Path.of("C:\\1_Konstantin\\1_hh\\concurrency\\hh-school-1\\parallelism\\src\\main\\java\\ru\\hh\\school\\async"); - // test our naive methods: - testCount(); - testSearch(); - } + new DirThread(path); - private static void testCount() { - Path path = Path.of("d:\\projects\\hh-school\\parallelism\\src\\main\\java\\ru\\hh\\school\\parallelism\\Runner.java"); - System.out.println(naiveCount(path)); - } - - private static Map naiveCount(Path path) { - try { - return Files.lines(path) - .flatMap(line -> Stream.of(line.split("[^a-zA-Z0-9]"))) - .filter(word -> word.length() > 3) - .collect(groupingBy(identity(), counting())) - .entrySet() - .stream() - .sorted(comparingByValue(reverseOrder())) - .limit(10) - .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - catch (IOException e) { - throw new RuntimeException(e); } - } - - private static void testSearch() throws IOException { - System.out.println(naiveSearch("public")); - } - - private static long naiveSearch(String query) throws IOException { - Document document = Jsoup // - .connect("https://www.google.com/search?q=" + query) // - .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.110 Safari/537.36 Viv/2.3.1440.48") // - .get(); - - Element divResultStats = document.select("div#resultStats").first(); - return Long.valueOf(divResultStats.text().replaceAll("[^0-9]", "")); - } - -} +} \ No newline at end of file diff --git a/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java b/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java new file mode 100644 index 0000000..64bbca9 --- /dev/null +++ b/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java @@ -0,0 +1,46 @@ +package ru.hh.school.homework; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.nio.file.Path; + +public class SearchThread implements Runnable { + + private String word; + private Path dirPath; + private Thread thread; + + SearchThread(String newWord, Path newDirPath) { + word = newWord; + dirPath = newDirPath; + thread = new Thread(this, dirPath.toString().concat(word)); + thread.start(); + } + + @Override + public void run() { + try{ + String newLine = String.format("%s - %s - %d \n", dirPath.toString(), word, naiveSearch(word)); + System.out.print(newLine); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static long naiveSearch(String query) throws IOException { + try{ + Document document = Jsoup // + .connect("https://www.google.com/search?q=" + query) // + .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.110 Safari/537.36 Viv/2.3.1440.48") // + .get(); + Element divResultStats = document.select("div#resultStats").first(); + return Long.valueOf(divResultStats.text().replaceAll("[^0-9]", "")); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java b/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java new file mode 100644 index 0000000..1409343 --- /dev/null +++ b/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java @@ -0,0 +1,56 @@ +package ru.hh.school.homework; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Collections.reverseOrder; +import static java.util.Map.Entry.comparingByValue; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.toMap; + +public class StaticMethods { + + + protected static Map mapCombiner ( + Map map1, Map map2) { + return Stream.concat(map1.entrySet().stream(), map2.entrySet().stream()) + .collect(Collectors.groupingBy(Map.Entry::getKey, + Collectors.summingLong(Map.Entry::getValue))); + } + + protected static Map naiveCount(Path path) { + try { + return Files.lines(path) + .flatMap(line -> Stream.of(line.split("[^a-zA-Z0-9]"))) + .filter(word -> word.length() > 3) + .collect(groupingBy(identity(), counting())) + .entrySet() + .stream() + .sorted(comparingByValue(reverseOrder())) + .limit(10) + .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + protected static long naiveSearch(String query) throws IOException { + Document document = Jsoup // + .connect("https://www.google.com/search?q=" + query) // + .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.110 Safari/537.36 Viv/2.3.1440.48") // + .get(); + Element divResultStats = document.select("div#resultStats").first(); + return Long.valueOf(divResultStats.text().replaceAll("[^0-9]", "")); + } +} From 18ba191c36b2ce02bbfafe326bbc3caf805c06fb Mon Sep 17 00:00:00 2001 From: Konstantin_Rogozhinski Date: Thu, 28 Mar 2019 10:56:51 +0300 Subject: [PATCH 2/2] thread pools added --- .../java/ru/hh/school/homework/DirAction.java | 86 +++++++++++++++++++ .../java/ru/hh/school/homework/DirThread.java | 62 ------------- .../java/ru/hh/school/homework/Launcher.java | 35 ++++---- .../ru/hh/school/homework/SearchCall.java | 42 +++++++++ .../ru/hh/school/homework/SearchThread.java | 46 ---------- .../ru/hh/school/homework/StaticMethods.java | 13 ++- 6 files changed, 154 insertions(+), 130 deletions(-) create mode 100644 parallelism/src/main/java/ru/hh/school/homework/DirAction.java delete mode 100644 parallelism/src/main/java/ru/hh/school/homework/DirThread.java create mode 100644 parallelism/src/main/java/ru/hh/school/homework/SearchCall.java delete mode 100644 parallelism/src/main/java/ru/hh/school/homework/SearchThread.java diff --git a/parallelism/src/main/java/ru/hh/school/homework/DirAction.java b/parallelism/src/main/java/ru/hh/school/homework/DirAction.java new file mode 100644 index 0000000..ad68d64 --- /dev/null +++ b/parallelism/src/main/java/ru/hh/school/homework/DirAction.java @@ -0,0 +1,86 @@ +package ru.hh.school.homework; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.RecursiveAction; +import java.util.concurrent.Future; + +import org.slf4j.Logger; +import static org.slf4j.LoggerFactory.getLogger; + +public class DirAction extends RecursiveAction { + + /* + Класс, описывающий итерирование по содержимому файла. Если натыкаемся на директорию, + рекурсивно вызывается новый экземпляр DirAction + */ + + private static final Logger LOGGER = getLogger(Launcher.class); + + private Path dirPath; + + // мапа для "топовых" слов "припрятана" в private-аттрибут для потокобезопасности + private Map wordMap = new HashMap<>(); + + DirAction(Path newDirPath) { + dirPath = newDirPath; + } + + @Override + public void compute() { + javaFileIter(dirPath); + } + + private void javaFileIter(Path dirPath) { + try { + for (Path filePath : Files.newDirectoryStream(dirPath)) { + if (Files.isDirectory(filePath)) { + DirAction nestedDirAction = new DirAction(filePath); + nestedDirAction.fork(); + } + if (!Files.isDirectory(filePath) && filePath.toString().endsWith(".java")) { + wordMap = StaticMethods.mapCombiner(wordMap, + StaticMethods.naiveCount(filePath)); + } + } + + // из мапы "топовых" слов, собранных по отдельным файлам, собирает top-10 + // для данной директории dirPath + wordMap = StaticMethods.mapTop(wordMap); + + this.outputStringAssemble(); + } + catch (IOException e) { + throw new RuntimeException(e); + } finally { + // даже несмотря на этот join какой-то тред не закрывается + // даже после проведения всех требуемых вычислений :( + this.join(); + } + } + + private void outputStringAssemble () { + SearchCall searchCall = new SearchCall(dirPath); + try { + String outputString = ""; + ArrayList> futureList = new ArrayList<>(); + for (String word : wordMap.keySet()) { + Future future = searchCall.getNewString(word); + futureList.add(future); + } + for (Future future : futureList) { + outputString = outputString.concat(future.get()); + } + LOGGER.debug(outputString); + } catch (InterruptedException | ExecutionException e){ + throw new RuntimeException(); + } finally { + searchCall.shutdownExecutor(); + } + } +} diff --git a/parallelism/src/main/java/ru/hh/school/homework/DirThread.java b/parallelism/src/main/java/ru/hh/school/homework/DirThread.java deleted file mode 100644 index d22afb7..0000000 --- a/parallelism/src/main/java/ru/hh/school/homework/DirThread.java +++ /dev/null @@ -1,62 +0,0 @@ -package ru.hh.school.homework; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.reverseOrder; -import static java.util.Map.Entry.comparingByValue; -import static java.util.stream.Collectors.toMap; - -public class DirThread implements Runnable { - - private Thread thread; - private Path dirPath; - - DirThread (Path newDirPath) { - dirPath = newDirPath; - thread = new Thread(this, dirPath.toString()); - thread.start(); - } - - @Override - public void run() { - javaFileIter(dirPath); - } - - private void javaFileIter(Path dirPath) { - try { - Map wordMap = new HashMap<>(); - for (Path filePath : Files.newDirectoryStream(dirPath)) { - if (Files.isDirectory(filePath)) { - new DirThread (filePath); - } - if (filePath.toString().endsWith(".java")) { - wordMap = StaticMethods.mapCombiner(wordMap, StaticMethods.naiveCount(filePath)); - } - } - - wordMap = wordMap.entrySet() - .stream() - .sorted(comparingByValue(reverseOrder())) - .limit(10) - .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); - - String outputString = outputStringAssemble(wordMap); - System.out.print(outputString); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - - private String outputStringAssemble (Map wordMap) { - String outputString = ""; - for (String word : wordMap.keySet()) { - new SearchThread(word, dirPath); - } - return outputString; - } -} diff --git a/parallelism/src/main/java/ru/hh/school/homework/Launcher.java b/parallelism/src/main/java/ru/hh/school/homework/Launcher.java index 6aa5d6d..167da45 100644 --- a/parallelism/src/main/java/ru/hh/school/homework/Launcher.java +++ b/parallelism/src/main/java/ru/hh/school/homework/Launcher.java @@ -1,29 +1,26 @@ package ru.hh.school.homework; -import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Stream; -import java.util.stream.Collectors; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import static java.util.Collections.reverseOrder; -import static java.util.Map.Entry.comparingByValue; -import static java.util.function.Function.identity; -import static java.util.stream.Collectors.counting; -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.toMap; +import java.util.concurrent.ForkJoinPool; public class Launcher { - public static void main(String[] args) throws IOException { - Path path = Path.of("C:\\1_Konstantin\\1_hh\\concurrency\\hh-school-1\\parallelism\\src\\main\\java\\ru\\hh\\school\\async"); + public static void main(String[] args){ - new DirThread(path); + Path path = Path.of("C:\\1_Konstantin\\1_hh\\concurrency\\hh-school-1\\" + + "parallelism\\src\\main\\java\\ru\\hh\\school"); + // рекурсивный характер задачи вынудил меня прибегнуть к ForkJoinPool + ForkJoinPool pool = + new ForkJoinPool(Runtime.getRuntime().availableProcessors()); + + DirAction startDirAction = new DirAction(path); + + pool.invoke(startDirAction); + + // даже несмотря на это программа не завершается + // и после проведения всех требуемых вычислений: + // видимо, где-то не сджойнился тред + pool.shutdownNow(); } } \ No newline at end of file diff --git a/parallelism/src/main/java/ru/hh/school/homework/SearchCall.java b/parallelism/src/main/java/ru/hh/school/homework/SearchCall.java new file mode 100644 index 0000000..c84ca1d --- /dev/null +++ b/parallelism/src/main/java/ru/hh/school/homework/SearchCall.java @@ -0,0 +1,42 @@ +package ru.hh.school.homework; + +import java.nio.file.Path; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +public class SearchCall { + + /* + * + Класс, реализующий асинхронную отсылку запросов на гугл. + + Не придумал, как засунуть эти задачи в общий ForkJoinPool, + поэтому для каждой директории будет создаваться свой класс SearchCall, + в котором будет скромный тредпул на пять потоков. + + Таким образом, наибольшее количество потоков, которое может получиться: + 20 = 5 (SearchCall) * 4 (на главном ForkJoinPool) + * + */ + + private Path dirPath; + private ExecutorService executor = Executors.newFixedThreadPool(5); + + SearchCall(Path newDirPath) { + dirPath = newDirPath; + } + + public Future getNewString (String word) { + return executor.submit(() -> { + return String.format("%s - %s - %d \n", + dirPath.toString(), word, StaticMethods.naiveSearch(word)); + }); + } + + // Конечно, после выполнения запросов весь этот класс будет убит garbege collector'ом, + // но все же я решил предусмотреть остановку Executor-сервиса + public void shutdownExecutor(){ + executor.shutdownNow(); + } +} diff --git a/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java b/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java deleted file mode 100644 index 64bbca9..0000000 --- a/parallelism/src/main/java/ru/hh/school/homework/SearchThread.java +++ /dev/null @@ -1,46 +0,0 @@ -package ru.hh.school.homework; - -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; - -import java.io.IOException; -import java.nio.file.Path; - -public class SearchThread implements Runnable { - - private String word; - private Path dirPath; - private Thread thread; - - SearchThread(String newWord, Path newDirPath) { - word = newWord; - dirPath = newDirPath; - thread = new Thread(this, dirPath.toString().concat(word)); - thread.start(); - } - - @Override - public void run() { - try{ - String newLine = String.format("%s - %s - %d \n", dirPath.toString(), word, naiveSearch(word)); - System.out.print(newLine); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private static long naiveSearch(String query) throws IOException { - try{ - Document document = Jsoup // - .connect("https://www.google.com/search?q=" + query) // - .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.110 Safari/537.36 Viv/2.3.1440.48") // - .get(); - Element divResultStats = document.select("div#resultStats").first(); - return Long.valueOf(divResultStats.text().replaceAll("[^0-9]", "")); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - -} diff --git a/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java b/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java index 1409343..053931a 100644 --- a/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java +++ b/parallelism/src/main/java/ru/hh/school/homework/StaticMethods.java @@ -14,13 +14,19 @@ import static java.util.Collections.reverseOrder; import static java.util.Map.Entry.comparingByValue; import static java.util.function.Function.identity; -import static java.util.stream.Collectors.counting; -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.toMap; +import static java.util.stream.Collectors.*; public class StaticMethods { + protected static Map mapTop(Map wordMap) { + return wordMap.entrySet() + .stream() + .sorted(comparingByValue(reverseOrder())) + .limit(10) + .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + // сливает две мапы в одну, суммируя численные значения с одинаковым ключом protected static Map mapCombiner ( Map map1, Map map2) { return Stream.concat(map1.entrySet().stream(), map2.entrySet().stream()) @@ -28,6 +34,7 @@ protected static Map mapCombiner ( Collectors.summingLong(Map.Entry::getValue))); } + protected static Map naiveCount(Path path) { try { return Files.lines(path)